コード例 #1
0
def test_trajectory_plotting():
    """Tests trajectory plotting"""
    from agents import MyopicAgent, OptimalAgent
    from gridworld.gridworld import GridworldMdp

    agent = OptimalAgent()
    mdp = GridworldMdp.generate_random(12, 12, pr_wall=0.1, pr_reward=0.1)
    agent.set_mdp(mdp)
    walls, reward, start = mdp.convert_to_numpy_input()
    myopic = MyopicAgent(horizon=10)
    _plot_reward_and_trajectories_helper(
        reward, reward, walls, start, myopic, OptimalAgent(), filename="trajectory.png"
    )
コード例 #2
0
        def check_model_equivalent(model, query, weights, mdp, num_iters):
            with tf.compat.v1.Session() as sess:
                sess.run(model.initialize_op)
                (qvals, ) = model.compute(['q_values'],
                                          sess,
                                          mdp,
                                          query,
                                          weight_inits=weights)

            agent = OptimalAgent(gamma=model.gamma, num_iters=num_iters)
            for i, proxy in enumerate(model.proxy_reward_space):
                for idx, val in zip(query, proxy):
                    mdp.rewards[idx] = val
                agent.set_mdp(mdp)
                check_qvals_equivalent(qvals[i], agent, mdp)
コード例 #3
0
def plot_reward_and_trajectories(
    true_reward,
    inferred_reward,
    walls,
    start,
    config,
    filename="reward_comparison.png",
    animate=False,
):
    """Plots reward vs inferred reward. On the true reward, plot the biased agent's trajectory. On the
    inferred reward, plot the optimal agent's trajectory.

    true_reward(ndarray): shape=(imsize x imsize)
    inferred_reward(ndarray): same as above
    walls(ndarray): shape=(imsize x imsize) of 0s and 1s, where 1s are walls
    start(tuple): containing (row, col)
    config(tf.config): config with agent params set
    filename(string): pathname of saved figure
    """
    from agents import OptimalAgent
    from gridworld.gridworld_data import create_agents_from_config

    dirs = os.path.dirname(filename)
    os.makedirs(dirs, exist_ok=True)

    true_agent, other_agent = create_agents_from_config(config)
    inferred_agent = OptimalAgent()

    _plot_reward_and_trajectories_helper(
        true_reward, inferred_reward, walls, start, true_agent, inferred_agent, filename
    )
コード例 #4
0
def run_test(walls, reward):
    """Runs test on given walls & rewards
    walls, reward: 2d numpy arrays (numbers)"""

    agent = OptimalAgent(num_iters=num_iters)
    agent.set_mdp(mdp)
    true_values = castAgentValuesToNumpy(agent.values)

    wall_tf = tf.placeholder(shape=(imsize, imsize), dtype=tf.float32)
    reward_tf = tf.placeholder(tf.float32, shape=(imsize, imsize))
    q_vals = test_model(wall_tf, reward_tf, tf_value_iter_model)
    out = sess.run(q_vals, feed_dict={wall_tf: walls, reward_tf: reward})
    out = np.reshape(out, (imsize * imsize, 5))
    predicted_values = np.max(out, axis=1).reshape((imsize, imsize))

    compareValues(true_values, predicted_values)
    visualizeValueDiff(true_values, predicted_values)
コード例 #5
0
ファイル: maxent.py プロジェクト: wwxFromTju/learning_biases
    # base = [['X','X','X','X','X','X'],
    #         ['X',' ','X',' ',' ','X'],
    #         ['X',' ','X','X',' ','X'],
    #         ['X',' ',' ',' ',' ','X'],
    #         ['X',' ',' ',' ',  1,'X'],
    #         ['X','X','X','X','X','X']]
    # base = [['X','X','X','X'],
    #         ['X',' ',' ','X'],
    #         ['X','1','X','X'],
    #         ['X','X','X','X']]
    grid = copy.deepcopy(base)
    grid[3][4] = 'A'
    # grid[1][4] = 'A'
    # grid[1][2] = 'A'
    # grid[1][4] = 'A'
    walls, start_state, inferred, rs = test_irl(grid, OptimalAgent(beta=1.0))

    print("inferred:\n", inferred)
    almostregret = evaluate_proxy(walls,
                                  start_state,
                                  inferred,
                                  rs,
                                  episode_length=20)
    print('Percent return:', almostregret)

    # print("")
    # walls, start_state, inferred, rs = test_irl(trans, OptimalAgent(beta=1.0))
    # print("inferred:\n",inferred)
    # almostregret = evaluate_proxy(walls,start_state,inferred,rs,episode_length=20)
    # print('Percent return:', almostregret)