def test_trajectory_plotting(): """Tests trajectory plotting""" from agents import MyopicAgent, OptimalAgent from gridworld.gridworld import GridworldMdp agent = OptimalAgent() mdp = GridworldMdp.generate_random(12, 12, pr_wall=0.1, pr_reward=0.1) agent.set_mdp(mdp) walls, reward, start = mdp.convert_to_numpy_input() myopic = MyopicAgent(horizon=10) _plot_reward_and_trajectories_helper( reward, reward, walls, start, myopic, OptimalAgent(), filename="trajectory.png" )
def check_model_equivalent(model, query, weights, mdp, num_iters): with tf.compat.v1.Session() as sess: sess.run(model.initialize_op) (qvals, ) = model.compute(['q_values'], sess, mdp, query, weight_inits=weights) agent = OptimalAgent(gamma=model.gamma, num_iters=num_iters) for i, proxy in enumerate(model.proxy_reward_space): for idx, val in zip(query, proxy): mdp.rewards[idx] = val agent.set_mdp(mdp) check_qvals_equivalent(qvals[i], agent, mdp)
def plot_reward_and_trajectories( true_reward, inferred_reward, walls, start, config, filename="reward_comparison.png", animate=False, ): """Plots reward vs inferred reward. On the true reward, plot the biased agent's trajectory. On the inferred reward, plot the optimal agent's trajectory. true_reward(ndarray): shape=(imsize x imsize) inferred_reward(ndarray): same as above walls(ndarray): shape=(imsize x imsize) of 0s and 1s, where 1s are walls start(tuple): containing (row, col) config(tf.config): config with agent params set filename(string): pathname of saved figure """ from agents import OptimalAgent from gridworld.gridworld_data import create_agents_from_config dirs = os.path.dirname(filename) os.makedirs(dirs, exist_ok=True) true_agent, other_agent = create_agents_from_config(config) inferred_agent = OptimalAgent() _plot_reward_and_trajectories_helper( true_reward, inferred_reward, walls, start, true_agent, inferred_agent, filename )
def run_test(walls, reward): """Runs test on given walls & rewards walls, reward: 2d numpy arrays (numbers)""" agent = OptimalAgent(num_iters=num_iters) agent.set_mdp(mdp) true_values = castAgentValuesToNumpy(agent.values) wall_tf = tf.placeholder(shape=(imsize, imsize), dtype=tf.float32) reward_tf = tf.placeholder(tf.float32, shape=(imsize, imsize)) q_vals = test_model(wall_tf, reward_tf, tf_value_iter_model) out = sess.run(q_vals, feed_dict={wall_tf: walls, reward_tf: reward}) out = np.reshape(out, (imsize * imsize, 5)) predicted_values = np.max(out, axis=1).reshape((imsize, imsize)) compareValues(true_values, predicted_values) visualizeValueDiff(true_values, predicted_values)
# base = [['X','X','X','X','X','X'], # ['X',' ','X',' ',' ','X'], # ['X',' ','X','X',' ','X'], # ['X',' ',' ',' ',' ','X'], # ['X',' ',' ',' ', 1,'X'], # ['X','X','X','X','X','X']] # base = [['X','X','X','X'], # ['X',' ',' ','X'], # ['X','1','X','X'], # ['X','X','X','X']] grid = copy.deepcopy(base) grid[3][4] = 'A' # grid[1][4] = 'A' # grid[1][2] = 'A' # grid[1][4] = 'A' walls, start_state, inferred, rs = test_irl(grid, OptimalAgent(beta=1.0)) print("inferred:\n", inferred) almostregret = evaluate_proxy(walls, start_state, inferred, rs, episode_length=20) print('Percent return:', almostregret) # print("") # walls, start_state, inferred, rs = test_irl(trans, OptimalAgent(beta=1.0)) # print("inferred:\n",inferred) # almostregret = evaluate_proxy(walls,start_state,inferred,rs,episode_length=20) # print('Percent return:', almostregret)