if __name__ == '__main__': # create Problem problem = MORLGridworld() random.seed(2) np.random.seed(2) # learning rate alfacheb = 0.11 eps = 0.9 ref_points = [[10.0, -1000.0, 10.0], [-1000.0, 10.0, 10.0], [10.0, 10.0, -1000.0]] agents = [] scalarization_weights = [0.0, 0.0] interactions = 1000 log.info('Started reference point experiment') payoutslist = [] for ref_p in xrange(len(ref_points)): agents.append( MORLHVBAgent(problem, alfacheb, eps, ref_points[ref_p], scalarization_weights)) payouts, moves, states = morl_interact_multiple_episodic( agents[ref_p], problem, interactions, max_episode_length=300) payoutslist.append(payouts) policy = PolicyFromAgent(problem, agents[ref_p], mode='greedy') # policy_heat_plot(problem, policy, states) plot_hypervolume(agents, problem, name='reference point') print 'final average reward' + str(np.mean(payoutslist[0], axis=0)) print 'final average reward' + str(np.mean(payoutslist[1], axis=0)) print 'final average reward' + str(np.mean(payoutslist[2], axis=0))
# make the interactions log.info('Playing %i interactions on chebyagent' % interactions) payouts, moves, states = morl_interact_multiple_episodic( chebyagent, problem, interactions, max_episode_length=300) # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " + # str(states[:]) + '\n') log.info('Playing %i interactions on hvb agent' % interactions) payouts2, moves2, states2 = morl_interact_multiple_episodic( hvbagent, problem, interactions, max_episode_length=300) # print("TEST(HVB): interactions made: \nP: "+str(payouts2[:])+",\n M: " + str(moves2[:]) + ",\n S: " + # str(states2[:]) + '\n') # extract all volumes of each agent agents = [hvbagent, chebyagent ] # plot the evolution of both agents hypervolume metrics plot_hypervolume(agents, problem, name='agent') plt.figure() length = min([len(payouts), len(payouts2)]) x = np.arange(length) if length != len(payouts): payouts = payouts[:length] else: payouts2 = payouts2[:length] plt.plot(x, payouts, 'r', label='cheb') plt.plot(x, payouts2, 'b', label='hvb') plt.show() if experiment_2: # list of agents with different weights agent_group = [] # list of volumes
chebyagent, problem, interactions, max_episode_length=300, discounted_eps=False) # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " + # str(states[:]) + '\n') #, moves, states = morl_interact_multiple_average_episodic(chebyagent, problem, 10, 500) # time = problem.time_token chebyagent._epsilon = 0.9 payouts, moves2, states = morl_interact_multiple_episodic( chebyagent, problem, 1, 300) velocity = problem.get_velocities(states) states = problem.create_plottable_states(states) plot_hypervolume([chebyagent], problem) forward_acc = [] backward_acc = [] nothin = [] for i in xrange(len(moves)): counter = list(moves[i]) nothin.append(counter.count(0)) forward_acc.append(counter.count(1)) backward_acc.append(counter.count(2)) x = np.arange(len(nothin)) if show_trend: nothin = mean_continued(nothin) backward_acc = mean_continued(backward_acc) forward_acc = mean_continued(forward_acc) plt.plot(x, nothin, 'y', label='no_acceleration') plt.plot(x, forward_acc, 'g', label='forward acceleration')
# agent = FixedPolicyAgent(problem, exp_policy) # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9) agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps, 4.0, [-1.0, -1.0, -1.0]) # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150) payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions, max_episode_length=150) learned_policy = PolicyFromAgent(problem, agent, mode='gibbs') # learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S") ## Plotting ## # plt.ion() # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png" titlestring = agent.name() # policy_plot2(problem, learned_policy, title=None, filename=titlestring) # policy_heat_plot(problem, learned_policy, states) # pickle_file_name = titlestring + '_' + time.strftime("%H%M%S") + '.p' # pickle.dump((payouts, moves, states, problem, agent), open(pickle_file_name, "wb")) # plt.ioff() plot_hypervolume([agent], problem) log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))
chebyagent2 = MORLScalarizingAgent(problem2, [1.0, 0.0, 0.0], alpha=alfacheb, epsilon=eps, tau=tau, ref_point=ref) # both agents interact (times): interactions = 1000 # make the interactions log.info('Playing %i interactions on chebyagent' % interactions) payouts, moves, states = morl_interact_multiple_episodic(chebyagent, problem, interactions, max_episode_length=300) # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " + # str(states[:]) + '\n') log.info('Playing %i interactions on hvb agent' % interactions) payouts2, moves2, states2 = morl_interact_multiple_episodic(chebyagent2, problem, interactions, max_episode_length=299) # print("TEST(HVB): interactions made: \nP: "+str(payouts2[:])+",\n M: " + str(moves2[:]) + ",\n S: " + # str(states2[:]) + '\n') # extract all volumes of each agent agents = [chebyagent2, chebyagent] # plot the evolution of both agents hypervolume metrics plot_hypervolume(agents, problem, name='agent') plt.figure() length = min([len(payouts), len(payouts2)]) x = np.arange(length) if length != len(payouts): payouts = payouts[:length] else: payouts2 = payouts2[:length] plt.plot(x, payouts, 'r', label='cheb') plt.plot(x, payouts2, 'b', label='hvb') plt.show()
interactions = 600 if epsilon_experiment: log.info('Started epsilon experiment') for eps in xrange(len(epsilons)): agents.append( MORLScalarizingAgent( problem, epsilon=epsilons[eps], alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, function='chebishev')) morl_interact_multiple_episodic(agents[eps], problem, interactions) plot_hypervolume(agents, problem, name='epsilon') if gamma_experiment: log.info('Started gamma experiment') for gam in xrange(len(gammas)): agents.append( MORLScalarizingAgent( problem, epsilon=0.1, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, function='chebishev', gamma=gammas[gam])) morl_interact_multiple_episodic(agents[gam], problem, interactions)