def plot_tdiffs(agent_class, runs=20): ''' Plot tdiff for the runs. ''' agent = load(agent_class, 1) tdiffs = 0*np.array(agent.tdiffs) for run in range(1, runs + 1): agent = load(agent_class, run) tdiffs += np.array(agent.tdiffs) / runs plt.plot(tdiffs, agent.colour, label=agent.legend) plt.xlabel('Episodes') plt.title('Average Delta') plt.ylabel('Delta') plt.savefig('./runs/delta.png', bbox_inches='tight')
def plot_tdiffs(agent_class, runs=20): ''' Plot tdiff for the runs. ''' agent = load(agent_class, 1) tdiffs = 0 * np.array(agent.tdiffs) for run in range(1, runs + 1): agent = load(agent_class, run) tdiffs += np.array(agent.tdiffs) / runs plt.plot(tdiffs, agent.colour, label=agent.legend) plt.xlabel('Episodes') plt.title('Average Delta') plt.ylabel('Delta') plt.savefig('./runs/delta.png', bbox_inches='tight')
def plot_value_function(agent_class, run, i): ''' Plot the value functions for run i. ''' plt.clf() agent = load(agent_class, run) state0 = simulator.Simulator().get_state() values, qval1, qval2 = [], [], [] min_range = -SHIFT_VECTOR[i] max_range = SCALE_VECTOR[i] variables = [] for j in range(VALUE_STEPS): var = max_range * (1. * j / VALUE_STEPS) + min_range state0[i] = var values.append(agent.value_function(state0)) feat = agent.action_features[0](state0) qval1.append(agent.action_weights[0].dot(feat)) qval2.append(agent.action_weights[1].dot(feat)) variables.append(var) max_val = max(max(qval1), max(qval2), min(values)) min_val = min(min(qval1), min(qval2), min(values)) plt.plot(variables, values, '-b', label='$V(s)$') plt.plot(variables, qval1, '-r', label='$Q(s, a_1)$') plt.plot(variables, qval2, '-g', label='$Q(s, a_2)$') plt.axis([min_range, max_range, min_val, max_val]) plt.legend(loc='lower right') plt.xlabel(str(i)) plt.ylabel('$V$') plt.savefig('./runs/' + agent.name + '/value_functions/s' + str(i), bbox_inches='tight')
def plot_x_dx(agent_class, run): ''' Plot the value function over x, dx. ''' plt.clf() agent = load(agent_class, run) fig = plt.figure() state = simulator.Simulator().get_state() plot = fig.add_subplot(111, projection='3d') plot.set_xlabel('x') plot.set_ylabel('dx') plot.set_zlabel('Action-Value') xxrange = np.arange(0, 1000, 10.0) yyrange = np.arange(0, 200, 10.0) xgrid, ygrid = np.meshgrid(xxrange, yyrange) get_state = lambda x, dx: np.append(np.array([x, dx]), state[2:]) function2 = lambda x, dx: agent.action_weights[0].dot( fourier_basis(get_state(x, dx))) function3 = lambda x, dx: agent.action_weights[1].dot( fourier_basis(get_state(x, dx))) functions = [function2, function3] colours = [[1, 0, 0]] for col, func in zip(colours, functions): zarray = np.array( [func(x, dx) for x, dx in zip(np.ravel(xgrid), np.ravel(ygrid))]) zgrid = zarray.reshape(xgrid.shape) print col plot.plot_surface(xgrid, ygrid, zgrid, color=col) plt.savefig('./runs/' + agent.name + '/value_functions/xdx', bbox_inches='tight')
def plot_value_function(agent_class, run, i): ''' Plot the value functions for run i. ''' plt.clf() agent = load(agent_class, run) state0 = simulator.Simulator().get_state() values, qval1, qval2 = [], [], [] min_range = -SHIFT_VECTOR[i] max_range = SCALE_VECTOR[i] variables = [] for j in range(VALUE_STEPS): var = max_range*(1.*j / VALUE_STEPS) + min_range state0[i] = var values.append(agent.value_function(state0)) feat = agent.action_features[0](state0) qval1.append(agent.action_weights[0].dot(feat)) qval2.append(agent.action_weights[1].dot(feat)) variables.append(var) max_val = max(max(qval1), max(qval2), min(values)) min_val = min(min(qval1), min(qval2), min(values)) plt.plot(variables, values, '-b', label='$V(s)$') plt.plot(variables, qval1, '-r', label='$Q(s, a_1)$') plt.plot(variables, qval2, '-g', label='$Q(s, a_2)$') plt.axis([min_range, max_range, min_val, max_val]) plt.legend(loc='lower right') plt.xlabel(str(i)) plt.ylabel('$V$') plt.savefig('./runs/' + agent.name + '/value_functions/s' + str(i), bbox_inches='tight')
def plot_episode(agent_class, run): ''' Plot an example run. ''' agent = load(agent_class, run) sims = [] for _ in range(PLOT_EPISODES): sim = simulator.Simulator() agent.run_episode(sim) sims.append(sim) import interface interface.Interface().draw_episode(sims, 'after', SAVE)
def plot_return_agents(agents, max_runs, runs=50): ''' Plot all the average returns for all agents. ''' plt.clf() for agent_class in agents: returns = np.zeros((max_runs, )) data = np.zeros((runs, max_runs)) for run in range(1, runs + 1): agent = load(agent_class, run) ret = np.load(agent.filename + '.npy') ret = average_return(ret[:max_runs]) returns += ret / runs data[run - 1, :] = ret plot_return(agent, returns, data) plt.legend(loc='upper left') plt.savefig('./runs/return', bbox_inches='tight')
def plot_return_agents(agents, max_runs, runs=50): ''' Plot all the average returns for all agents. ''' plt.clf() for agent_class in agents: returns = np.zeros((max_runs,)) data = np.zeros((runs, max_runs)) for run in range(1, runs + 1): agent = load(agent_class, run) ret = np.load(agent.filename + '.npy') ret = average_return(ret[:max_runs]) returns += ret / runs data[run-1, :] = ret plot_return(agent, returns, data) plt.legend(loc='upper left') plt.savefig('./runs/return', bbox_inches='tight')
def plot_x_dx(agent_class, run): ''' Plot the value function over x, dx. ''' plt.clf() agent = load(agent_class, run) fig = plt.figure() state = simulator.Simulator().get_state() plot = fig.add_subplot(111, projection='3d') plot.set_xlabel('x') plot.set_ylabel('dx') plot.set_zlabel('Action-Value') xxrange = np.arange(0, 1000, 10.0) yyrange = np.arange(0, 200, 10.0) xgrid, ygrid = np.meshgrid(xxrange, yyrange) get_state = lambda x, dx: np.append(np.array([x, dx]), state[2:]) function2 = lambda x, dx: agent.action_weights[0].dot(fourier_basis(get_state(x, dx))) function3 = lambda x, dx: agent.action_weights[1].dot(fourier_basis(get_state(x, dx))) functions = [function2, function3] colours = [[1, 0, 0]] for col, func in zip(colours, functions): zarray = np.array([func(x, dx) for x, dx in zip(np.ravel(xgrid), np.ravel(ygrid))]) zgrid = zarray.reshape(xgrid.shape) print col plot.plot_surface(xgrid, ygrid, zgrid, color=col) plt.savefig('./runs/' + agent.name + '/value_functions/xdx', bbox_inches='tight')
def plot_run(agent_class, run): ''' Plot a single run. ''' agent = load(agent_class, run) returns = average_return(np.load(agent.filename + '.npy')) plot_return(agent, returns) plt.savefig(agent.filename + '.png', bbox_inches='tight')