Ejemplo n.º 1
0
def andrea_states():
    """
    Infer `T` hardmax state probability grids, one for each timestep.
    """
    T = 5
    N = 20
    R = -1
    beta = 1
    g = GridWorldMDP(N, N, default_reward=R)

    init_state = g.coor_to_state(0, 0)
    goal = g.coor_to_state(N-1, N//2)

    # A numpy.ndarray with dimensions (T x g.rows x g.cols).
    # `state_prob[t]` holds the exact state probabilities for
    # a beta-irrational, softmax-action-choice-over-hardmax-values
    # agent.
    state_prob = inf.state.infer_from_start(g, init_state, goal,
            T=T, beta=beta, all_steps=True).reshape(T+1, g.rows, g.cols)
    print(state_prob)

    # Plot each of the T heatmaps
    # beware: heat map's color scale changes with each plot
    for t, p in enumerate(state_prob):
        title = "t={}".format(t)
        plot_heat_maps(g, init_state, [p], [title], stars_grid=[goal],
                auto_logarithm=False)
Ejemplo n.º 2
0
    def experiment(self, N=10, iters=1000, mb_size=128, samples=10000):
        g = GridWorldMDP(N, N)
        goals = [g.coor_to_state(N - 1, N - 1), g.coor_to_state(0, 0)]
        assert self.G == len(goals)

        data = syn.gen_predict_actions(g,
                                       goals,
                                       self.k,
                                       self.l,
                                       samples=samples,
                                       beta=1e-3)
        test_data = syn.gen_predict_actions(g,
                                            goals,
                                            self.k,
                                            self.l,
                                            samples=100,
                                            beta=1e-3)

        with self.graph.as_default():
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())

            self.train_model(sess,
                             data,
                             mb_size=mb_size,
                             iters=iters,
                             test_data=test_data)
            self.assess_model(sess, test_data)
Ejemplo n.º 3
0
 def test_no_crash(self):
     g = GridWorldMDP(15, 15)
     goals = [
         g.coor_to_state(9, 9),
         g.coor_to_state(1, 1),
         g.coor_to_state(3, 3)
     ]
     data = gen_predict_actions(g, goals=goals, k=3, l=3)
     self.assertEqual(data.N, len(data.Y))
Ejemplo n.º 4
0
    def test_no_crash2(self):
        g = GridWorldMDP(15, 15)
        goals = [
            g.coor_to_state(9, 9),
            g.coor_to_state(1, 1),
            g.coor_to_state(3, 3)
        ]
        data = gen_predict_policy(g, goals=goals, samples=30)
        self.assertEqual(data.N, len(data.Y))

        for y in data.Y:
            self.assertTrue(0 <= y < g.A)
        for z in data.Z:
            self.assertTrue(0 <= z < len(goals))
Ejemplo n.º 5
0
def benchmark(traj_mode="diag", mode="tri", T=2, N=90, R=-1):
    g = GridWorldMDP(N, N, default_reward=R)

    g, _, start, dest_list = _occ_starter(N, R, mode)
    traj = _traj_starter(N, start, traj_mode)[:50]

    def test():
        D = inf.occupancy.infer(g, traj, dest_list, T=T, verbose=False)

    test()
    import cProfile
    cProfile.runctx('test()', globals(), locals())
Ejemplo n.º 6
0
def puddles_world(N=100, p=0.2, puddle_reward=-2):
    """
    Generate a world where some squares have scaled reward -2 or -2*sqrt(2).
    """
    reward_dict = {}
    for x in range(N):
        for y in range(N):
            if random.random() < p:
                reward_dict[(x, y)] = -2

    g = GridWorldMDP(N, N, reward_dict=reward_dict)
    return g
Ejemplo n.º 7
0
def build_deterministic_dataset():
    """
    Dataset which always returns all nine of the (state, action) pairs in
    a 3x3 gridworld where the goal is (2, 2).
    """
    g = GridWorldMDP(3, 3)
    coor = g.coor_to_state
    A = g.Actions
    policy = [((0, 0), A.UP_RIGHT), ((1, 0), A.UP_RIGHT), ((2, 0), A.UP),
              ((0, 1), A.UP_RIGHT), ((1, 1), A.UP_RIGHT), ((2, 1), A.UP),
              ((0, 2), A.RIGHT), ((1, 2), A.RIGHT), ((2, 2), A.ABSORB)]
    X = np.array([e[0] for e in policy])
    Y = np.array([e[1] for e in policy])
    Z = np.array([coor(2, 2)] * 9)

    return Data(X, Y, Z, name="tiny deterministic")