# pickle.dump((payouts, moves, states, problem, agent), open('test_pickle.p', "wb")) i_morl = InverseMORLIRL(problem, learned_policy) scalarization_weights_alge = i_morl.solvealge() # log.info("scalarization weights (alge): %s" % (str(scalarization_weights_alge))) # # problem2 = MORLGridworldStatic() agent2 = PreScalarizedQMorlAgent(problem2, scalarization_weights_alge, alpha=alfa, epsilon=eps) payouts2, moves2, states2 = morl_interact_multiple_episodic( agent2, problem2, interactions=interactions, max_episode_length=150) log.info('Average Payout: %s' % (str(payouts2.mean(axis=0)))) #learned_policy2 = PolicyFromAgent(problem2, agent2, mode='gibbs') learned_policy2 = PolicyFromAgent(problem2, agent2, mode='greedy') ## Plotting ## plt.ion() policy_plot2(problem, learned_policy) # policy_heat_plot(problem, learned_policy, states) plt.ioff() # policy_plot2(problem2, learned_policy2) policy_heat_plot(problem2, learned_policy2, states2)
ref_point=[-1.0, -1.0, -1.0]) # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150) payouts, moves, states = morl_interact_multiple_episodic(agent, problem, interactions=interactions) log.info('Average Payout: %s' % (str(payouts.mean(axis=0)))) learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # learned_policy = PolicyFromAgent(problem, agent, mode='greedy') # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S") states = problem.create_plottable_states(states) ## Plotting ## # plt.ion() # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png" # titlestring = agent.name() policy_plot2(problem, learned_policy, title=None, filename=None) policy_heat_plot(problem, learned_policy, states) # pickle_file_name = titlestring + '_' + time.strftime("%H%M%S") + '.p' # pickle.dump((payouts, moves, states, problem, agent), open(pickle_file_name, "wb")) # plt.ioff() plot_hypervolume([agent], problem) log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))
apx, apy, apz, upx, upy, upz = [], [], [], [], [], [] for i in agent.pareto: apx.append(i[0]) apy.append(i[1]) apz.append(i[2]) ax.scatter(apx, apy, apz, 'b') plt.show() print 'R:' + str(hv_calculator.compute_hv(agent.pareto)) # now you can choose a specific weight and train on it weights = [[1.0, 0.0], [0.0, 1.0], [0.5, 0.5]] mean_count = np.mean(agent.interactions_per_weight) interacts = [] i = 0 for specific_weight in weights: agent.train_one_weight(specific_weight) interacts.append( agent.interactions_per_weight[len(agent.interactions_per_weight) - 1]) print 'H-agent needed for weight: ' + str(specific_weight) + ' ' + \ str(interacts[i]) + \ ' interactions. Average before was:' + str(mean_count) i += 1 agent.plot_interaction_rhos(specific_weight) policy2 = PolicyFromAgent(agent.problem, agent, mode='greedy') policy_plot2(problem, policy2) print 'Average interactions needed for the specific weights: ' + str( np.mean(interacts))
agent._Q_sets[agent.s_a_mapping[s, a]] = (agent.hull_add( agent._Q_sets[agent.s_a_mapping[s, a]], new_hull)) for s in xrange(problem.n_states): candidates = [] for a in xrange(problem.n_actions): for p in xrange(len(agent._Q_sets[agent.s_a_mapping[s, a]])): candidates.append( np.array(agent._Q_sets[agent.s_a_mapping[s, a]][p])) candidates = remove_duplicates(candidates) # candidates = agent.hv_calculator.extract_front(candidates) candidates = agent.get_hull(candidates) agent._V[s] = candidates pbar.update(i_count) print agent._Q_sets # problem.n_states = 25 if problem.reward_dimension == 3: weights = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.5, 0.0, 0.5], [0.33, 0.33, 0.33]] if problem.reward_dimension == 2: weights = [[1.0, 0.0], [0.0, 1.0], [0.5, 0.5]] for weight in weights: agent.extract_policy(weight) policy = PolicyFromAgent(problem, agent, mode='greedy') policy_plot2(problem, policy, str(weight)) plt.show()