def testWeightVariation(self): """ this test creates 6 different chebyshev agents whose weights are each different. in the end it compares hvs :return: """ # list of agents self.agents = [] # list of volumes self.vollist = [] # 6 agents with each different weights self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [1.0, 0.0, 0.0], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref)) self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [0.0, 1.0, 0.0], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref)) self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [0.5, 0.5, 0.0], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref)) self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [0.0, 0.5, 0.5], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref)) self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [0.5, 0.0, 0.5], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref)) self.agents.append( MORLScalarizingAgent(self.gridworldproblem, [0.33, 0.33, 0.33], alpha=self.alf, epsilon=self.eps, tau=self.tau, ref_point=self.ref))
def setUp(self): # create Problem self.gridworldproblem = MORLBuridansAssProblem() self.problem = Deepsea() # create an initialize randomly a weight vector self.scalarization_weights = np.zeros(self.problem.reward_dimension) self.scalarization_weights = random.sample( [i for i in np.linspace(0, 5, 5000)], len(self.scalarization_weights)) # tau is for chebyshev agent self.tau = 4.0 # ref point is used for Hypervolume calculation self.ref = [-1.0, -1.0, -1.0] # learning rate self.alf = 0.1 self.alfacheb = 0.1 self.alfahvb = 0.1 # Propability of epsilon greedy selection self.eps = 0.1 # create one agent using chebyshev scalarization method self.chebyagent = MORLScalarizingAgent(self.gridworldproblem, [1.0, 0.0, 0.0], alpha=self.alfacheb, epsilon=self.eps, tau=self.tau, ref_point=self.ref) # create one agent using Hypervolume based Algorithm self.hvbagent = MORLHVBAgent(self.gridworldproblem, alpha=self.alfahvb, epsilon=self.eps, ref=self.ref, scal_weights=[1.0, 10.0]) self.hagent = MORLHLearningAgent(self.problem, self.eps, self.alf, self.scalarization_weights) # both agents interact (times): self.interactions = 200 self.convHullAgent = MORLConvexHullValueIteration(self.problem) self.data = [[4, 2, 0], [1, 1, 1], [1, 1, 0], [1, 0, 1]]
saved_weights = [] plt.ion() problem = MORLBuridansAss1DProblem() scalarization_weights = np.array([1.0, 0.0, 0.0]) eps = 0.9 alfa = 0.08 tau = 2.0 ref_point = [ -1.0, ] * problem.reward_dimension interactions = 1500 chebyagent = MORLScalarizingAgent( problem, epsilon=eps, alpha=alfa, scalarization_weights=scalarization_weights, ref_point=ref_point, tau=tau, gamma=0.9, function='linear') payouts, moves, states = morl_interact_multiple_episodic( chebyagent, problem, interactions=interactions, max_episode_length=300) log.info('Average Payout: %s' % (str(payouts.mean(axis=0)))) volumes = [0] volumes.extend(chebyagent.max_volumes) x = np.arange(len(volumes)) ################################## # PLOT #
# tau is for chebyshev agent tau = 4.0 # ref point is used for Hypervolume calculation ref = [-1.0, -1.0, -1.0] # learning rate alf = 0.2 alfacheb = 0.2 alfahvb = 0.1 n_vectors = 5 # Propability of epsilon greedy selection eps = 0.1 # create one agent using scalarization method chebyagent = MORLScalarizingAgent(problem, [1.0, 0.0, 0.0], alpha=alfacheb, epsilon=eps, tau=tau, ref_point=ref) # create one agent using Hypervolume based Algorithm hvbagent = MORLHVBAgent(problem, alpha=alfahvb, epsilon=0.1, ref=ref, scal_weights=[1.0, 10.0]) # both agents interact (times): interactions = 1000 if experiment_1: # make the interactions log.info('Playing %i interactions on chebyagent' % interactions) payouts, moves, states = morl_interact_multiple_episodic( chebyagent, problem, interactions, max_episode_length=300)
# ref point is used for Hypervolume calculation ref = [ -10.0, ] * problem.reward_dimension # learning rate alfacheb = 0.4 # Propability of epsilon greedy selection eps = 0.9 # should we show total acceleration count or just trend: show_trend = True # create one agent using chebyshev scalarization method chebyagent = MORLScalarizingAgent( problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, gamma=0.9) # hvbagent = MORLHVBAgent(problem, alfacheb, eps, ref, [0.0, 0.0]) # both agents interact (times): interactions = 300 # payouts, moves, states = morl_interact_multiple_episodic( chebyagent, problem, interactions, max_episode_length=300, discounted_eps=False) # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " +
# scalarization_weights = np.array([0.0, 1.0]) # scalarization_weights = np.array([0.9, 0.1]) eps = 0.1 alfa = 0.4 runs = 2 interactions = 1000 # exp_policy = PolicyDeepseaExpert(problem, task='T2') # det_policy = PolicyDeepseaDeterministic(problem, policy='P1') # agent = FixedPolicyAgent(problem, exp_policy) # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9) agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps, 4.0, [-1.0, -1.0, -1.0]) # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150) payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions, max_episode_length=150) learned_policy = PolicyFromAgent(problem, agent, mode='gibbs') # learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S") ## Plotting ## # plt.ion() # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png" titlestring = agent.name()
scalarization_weights = [1.0, 0.0, 0.0] # tau is for chebyshev agent tau = 0.1 # ref point is used for Hypervolume calculation ref = [ -0.1, ] * problem.reward_dimension # learning rate alfacheb = 0.01 # Propability of epsilon greedy selection eps = 0.7 # create one agent using chebyshev scalarization method chebyagent = MORLScalarizingAgent( problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau) # both agents interact (times): interactions = 2000 n_vectors = 2 if hypervolume_experiment: # make the interactions payouts, moves, states = morl_interact_multiple_episodic( chebyagent, problem, interactions, max_episode_length=150) print("TEST(cheby): interactions made: \nP: " + str(payouts[:]) + ",\n M: " + str(moves[:]) + ",\n S: " + str(states[:]) + '\n') plot_hypervolume([chebyagent], problem)
def eps(): for i in xrange(interactions / 2): yield 0.1 yield 0.8 alfa = 0.1 runs = 1 interactions = 10000 ref_point = [ -1.0, ] * problem.reward_dimension # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9) agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps(), 4.0, ref_point) # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150) payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions, max_episode_length=150) log.info('Average Payout: %s' % (str(payouts.mean(axis=0)))) # learned_policy = PolicyFromAgent(problem, agent, mode='gibbs') learned_policy = PolicyFromAgent(problem, agent, mode=None) # learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # learned_policy = PolicyGridworld(problem, policy='DIAGONAL') # learned_policy = PolicyGridworld(problem, policy='RIGHT') # learned_policy = PolicyGridworld(problem, policy='DOWN') # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")
problem = MORLResourceGatheringProblem() # scalarization_weights = np.array([0.153, 0.847]) # scalarization_weights = np.array([0.5, 0.5]) scalarization_weights = np.array([0.5, 0.5, 0.0]) # scalarization_weights = np.array([0.0, 1.0]) # scalarization_weights = np.array([0.9, 0.1]) eps = 0.4 alfa = 0.4 runs = 1 interactions = 100 max_steps = 100 tau = 1.0 agent = MORLScalarizingAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, tau=tau, gamma=1.0, ref_point=[-1.0, -1.0, -1.0]) # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150) payouts, moves, states = morl_interact_multiple_episodic(agent, problem, interactions=interactions) log.info('Average Payout: %s' % (str(payouts.mean(axis=0)))) learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S") states = problem.create_plottable_states(states) ## Plotting ## # plt.ion()
if __name__ == '__main__': # create Problem problem = MORLGridworld() # create an initialize randomly a weight vector scalarization_weights = [1.0, 0.0, 0.0] # tau is for chebyshev agent tau = 4.0 # ref point is used for Hypervolume calculation ref = [-1.0, ]*problem.reward_dimension # learning rate alfacheb = 0.11 # Propability of epsilon greedy selection eps = 0.1 hv_calc = HyperVolumeCalculator(ref) # create one agent using chebyshev scalarization method chebyagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau) linearagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, function='linear') # both agents interact (times): interactions = 1000 c_payouts, c_moves, c_states = morl_interact_multiple_episodic(chebyagent, problem, interactions, max_episode_length=150) l_payouts, l_moves, l_states = morl_interact_multiple_episodic(linearagent, problem, interactions, max_episode_length=150) c_rewards = [] for i in xrange(len(c_payouts)): cummulated = np.zeros(problem.reward_dimension)
gammas = np.arange(0, 1, 0.1) alphas = np.arange(0, 1, 0.1) taus = np.arange(0.0, 10.0, 1.0) ref_points = [[-1.0, -1.0, -25.0], [-1.0, -25.0, -1.0], [-25.0, -1.0, -1.0]] # agents: agents = [] interactions = 600 if epsilon_experiment: log.info('Started epsilon experiment') for eps in xrange(len(epsilons)): agents.append( MORLScalarizingAgent( problem, epsilon=epsilons[eps], alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, function='chebishev')) morl_interact_multiple_episodic(agents[eps], problem, interactions) plot_hypervolume(agents, problem, name='epsilon') if gamma_experiment: log.info('Started gamma experiment') for gam in xrange(len(gammas)): agents.append( MORLScalarizingAgent( problem, epsilon=0.1, alpha=alfacheb,
eps = 0.9 alfa = 0.3 runs = 1 interactions = 500 max_steps = 150 tau = 1.0 ref_point = [ -3.0, ] * problem.reward_dimension # hvbagent = MORLHVBAgent(problem, alpha=alfa, epsilon=0.9, ref=ref_point, scal_weights=[1.0, 10.0]) agent = MORLScalarizingAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, tau=tau, lmbda=0.95, ref_point=ref_point) payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions) log.info('Average Payout: %s' % (str(payouts.mean(axis=0)))) learned_policy = PolicyFromAgent(problem, agent, mode='greedy') states = problem.create_plottable_states(states) policy_plot2(problem, learned_policy, title=None, filename=None) policy_heat_plot(problem, learned_policy, states) plot_hypervolume([agent], problem) log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))
problem = MOPuddleworldProblem(size=20) scalarization_weights = np.array([1.0, 0.0]) max_episode_l = 200 alfa = 0.1 tau = 1.0 interactions = 50 eps = 0.9 agent = MORLScalarizingAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, tau=tau, lmbda=1.0, ref_point=[-1.0, -1.0]) payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions, max_episode_length=max_episode_l) agent.create_scalar_Q_table() x = [w for w in xrange(problem._size)] y = [d for d in xrange(problem._size)] x, y = np.meshgrid(x, y) z = np.array([ max([agent.Qs[s, a] for a in xrange(problem.n_actions)])
alfa = 0.1 runs = 3 interactions = 500 episode_length = 150 tau = 4.0 # only for Chebyshev and deepsea gamma = 0.9 ref = [-1.0, -1.0, -1.0] # reference point for hypervolume calculation # Select a learning agent: # agent1 = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps) # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9) agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps, tau, ref_point=ref, gamma=gamma, function='chebishev') # agent = MORLHVBAgent(problem, alfa, eps, ref, scalarization_weights) # Run the experiment one time for the given number of interactions payouts, moves, states = morl_interact_multiple_episodic( agent, problem, interactions=interactions, max_episode_length=episode_length) # only for multidimensional state problems (resource gathering, buridans ass) states = problem.create_plottable_states(states) # Repeat experiment for "runs" times and average the results # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions,