Python GridWorldMDP Examples

Programming Language: Python

Namespace/Package Name: pp.mdp

Class/Type: GridWorldMDP

Examples at hotexamples.com: 7

Python GridWorldMDP - 7 examples found. These are the top rated real world Python examples of pp.mdp.GridWorldMDP extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GridWorldMDP(7)

coor_to_state(3)

Frequently Used Methods

GridWorldMDP (7)

coor_to_state (3)

Example #1

Show file

def andrea_states():
    """
    Infer `T` hardmax state probability grids, one for each timestep.
    """
    T = 5
    N = 20
    R = -1
    beta = 1
    g = GridWorldMDP(N, N, default_reward=R)

    init_state = g.coor_to_state(0, 0)
    goal = g.coor_to_state(N-1, N//2)

    # A numpy.ndarray with dimensions (T x g.rows x g.cols).
    # `state_prob[t]` holds the exact state probabilities for
    # a beta-irrational, softmax-action-choice-over-hardmax-values
    # agent.
    state_prob = inf.state.infer_from_start(g, init_state, goal,
            T=T, beta=beta, all_steps=True).reshape(T+1, g.rows, g.cols)
    print(state_prob)

    # Plot each of the T heatmaps
    # beware: heat map's color scale changes with each plot
    for t, p in enumerate(state_prob):
        title = "t={}".format(t)
        plot_heat_maps(g, init_state, [p], [title], stars_grid=[goal],
                auto_logarithm=False)

Example #2

Show file

    def experiment(self, N=10, iters=1000, mb_size=128, samples=10000):
        g = GridWorldMDP(N, N)
        goals = [g.coor_to_state(N - 1, N - 1), g.coor_to_state(0, 0)]
        assert self.G == len(goals)

        data = syn.gen_predict_actions(g,
                                       goals,
                                       self.k,
                                       self.l,
                                       samples=samples,
                                       beta=1e-3)
        test_data = syn.gen_predict_actions(g,
                                            goals,
                                            self.k,
                                            self.l,
                                            samples=100,
                                            beta=1e-3)

        with self.graph.as_default():
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())

            self.train_model(sess,
                             data,
                             mb_size=mb_size,
                             iters=iters,
                             test_data=test_data)
            self.assess_model(sess, test_data)

Example #3

Show file

File: test_syn.py Project: shwang/goalstructure

 def test_no_crash(self):
     g = GridWorldMDP(15, 15)
     goals = [
         g.coor_to_state(9, 9),
         g.coor_to_state(1, 1),
         g.coor_to_state(3, 3)
     ]
     data = gen_predict_actions(g, goals=goals, k=3, l=3)
     self.assertEqual(data.N, len(data.Y))

Example #4

Show file

File: test_syn.py Project: shwang/goalstructure

    def test_no_crash2(self):
        g = GridWorldMDP(15, 15)
        goals = [
            g.coor_to_state(9, 9),
            g.coor_to_state(1, 1),
            g.coor_to_state(3, 3)
        ]
        data = gen_predict_policy(g, goals=goals, samples=30)
        self.assertEqual(data.N, len(data.Y))

        for y in data.Y:
            self.assertTrue(0 <= y < g.A)
        for z in data.Z:
            self.assertTrue(0 <= z < len(goals))

Example #5

Show file

def benchmark(traj_mode="diag", mode="tri", T=2, N=90, R=-1):
    g = GridWorldMDP(N, N, default_reward=R)

    g, _, start, dest_list = _occ_starter(N, R, mode)
    traj = _traj_starter(N, start, traj_mode)[:50]

    def test():
        D = inf.occupancy.infer(g, traj, dest_list, T=T, verbose=False)

    test()
    import cProfile
    cProfile.runctx('test()', globals(), locals())

Example #6

Show file

File: syn.py Project: shwang/goalstructure

def puddles_world(N=100, p=0.2, puddle_reward=-2):
    """
    Generate a world where some squares have scaled reward -2 or -2*sqrt(2).
    """
    reward_dict = {}
    for x in range(N):
        for y in range(N):
            if random.random() < p:
                reward_dict[(x, y)] = -2

    g = GridWorldMDP(N, N, reward_dict=reward_dict)
    return g

Example #7

Show file

def build_deterministic_dataset():
    """
    Dataset which always returns all nine of the (state, action) pairs in
    a 3x3 gridworld where the goal is (2, 2).
    """
    g = GridWorldMDP(3, 3)
    coor = g.coor_to_state
    A = g.Actions
    policy = [((0, 0), A.UP_RIGHT), ((1, 0), A.UP_RIGHT), ((2, 0), A.UP),
              ((0, 1), A.UP_RIGHT), ((1, 1), A.UP_RIGHT), ((2, 1), A.UP),
              ((0, 2), A.RIGHT), ((1, 2), A.RIGHT), ((2, 2), A.ABSORB)]
    X = np.array([e[0] for e in policy])
    Y = np.array([e[1] for e in policy])
    Z = np.array([coor(2, 2)] * 9)

    return Data(X, Y, Z, name="tiny deterministic")