# pickle.dump((payouts, moves, states, problem, agent), open('test_pickle.p', "wb"))

    i_morl = InverseMORLIRL(problem, learned_policy)
    scalarization_weights_alge = i_morl.solvealge()
    #
    log.info("scalarization weights (alge): %s" %
             (str(scalarization_weights_alge)))
    #
    #
    problem2 = MORLGridworldStatic()
    agent2 = PreScalarizedQMorlAgent(problem2,
                                     scalarization_weights_alge,
                                     alpha=alfa,
                                     epsilon=eps)
    payouts2, moves2, states2 = morl_interact_multiple_episodic(
        agent2, problem2, interactions=interactions, max_episode_length=150)
    log.info('Average Payout: %s' % (str(payouts2.mean(axis=0))))

    #learned_policy2 = PolicyFromAgent(problem2, agent2, mode='gibbs')
    learned_policy2 = PolicyFromAgent(problem2, agent2, mode='greedy')

    ## Plotting ##

    plt.ion()
    policy_plot2(problem, learned_policy)
    # policy_heat_plot(problem, learned_policy, states)
    plt.ioff()
    # policy_plot2(problem2, learned_policy2)
    policy_heat_plot(problem2, learned_policy2, states2)
Beispiel #2
0
                                 ref_point=[-1.0, -1.0, -1.0])


    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150)
    payouts, moves, states = morl_interact_multiple_episodic(agent, problem, interactions=interactions)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))

    learned_policy = PolicyFromAgent(problem, agent, mode='greedy')
    # learned_policy = PolicyFromAgent(problem, agent, mode='greedy')

    # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")
    states = problem.create_plottable_states(states)

    ## Plotting ##

    # plt.ion()

    # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png"
    # titlestring = agent.name()
    policy_plot2(problem, learned_policy, title=None, filename=None)
    policy_heat_plot(problem, learned_policy, states)
    # pickle_file_name = titlestring + '_' + time.strftime("%H%M%S") + '.p'
    # pickle.dump((payouts, moves, states, problem, agent), open(pickle_file_name, "wb"))

    # plt.ioff()
    plot_hypervolume([agent], problem)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))



        apx, apy, apz, upx, upy, upz = [], [], [], [], [], []
        for i in agent.pareto:
            apx.append(i[0])
            apy.append(i[1])
            apz.append(i[2])
            ax.scatter(apx, apy, apz, 'b')
        plt.show()

    print 'R:' + str(hv_calculator.compute_hv(agent.pareto))
    # now you can choose a specific weight and train on it
    weights = [[1.0, 0.0], [0.0, 1.0], [0.5, 0.5]]
    mean_count = np.mean(agent.interactions_per_weight)
    interacts = []
    i = 0
    for specific_weight in weights:
        agent.train_one_weight(specific_weight)
        interacts.append(
            agent.interactions_per_weight[len(agent.interactions_per_weight) -
                                          1])
        print 'H-agent needed for weight: ' + str(specific_weight) + ' ' + \
              str(interacts[i]) + \
              ' interactions. Average before was:' + str(mean_count)
        i += 1
        agent.plot_interaction_rhos(specific_weight)

        policy2 = PolicyFromAgent(agent.problem, agent, mode='greedy')
        policy_plot2(problem, policy2)

    print 'Average interactions needed for the specific weights: ' + str(
        np.mean(interacts))
Beispiel #4
0
                agent._Q_sets[agent.s_a_mapping[s, a]] = (agent.hull_add(
                    agent._Q_sets[agent.s_a_mapping[s, a]], new_hull))

        for s in xrange(problem.n_states):
            candidates = []
            for a in xrange(problem.n_actions):
                for p in xrange(len(agent._Q_sets[agent.s_a_mapping[s, a]])):
                    candidates.append(
                        np.array(agent._Q_sets[agent.s_a_mapping[s, a]][p]))

            candidates = remove_duplicates(candidates)
            # candidates = agent.hv_calculator.extract_front(candidates)
            candidates = agent.get_hull(candidates)
            agent._V[s] = candidates
        pbar.update(i_count)
    print agent._Q_sets
    # problem.n_states = 25
    if problem.reward_dimension == 3:
        weights = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0],
                   [0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.5, 0.0, 0.5],
                   [0.33, 0.33, 0.33]]
    if problem.reward_dimension == 2:
        weights = [[1.0, 0.0], [0.0, 1.0], [0.5, 0.5]]

    for weight in weights:
        agent.extract_policy(weight)

        policy = PolicyFromAgent(problem, agent, mode='greedy')
        policy_plot2(problem, policy, str(weight))
        plt.show()