Beispiel #1
0
    def calc_frontier_scalarized(self,
                                 p: Vector,
                                 q: Vector,
                                 solutions_known: list = None) -> list:
        """
        This is a search_distance method to calc pareto'state frontier.

        Return a list of supported solutions costs, this method is only valid to two objectives problems.
        Applies a dichotomous search to find all supported solutions costs.

        :param solutions_known: If we know the possible solutions, we can indicate them to the algorithm to improve the
            training of the agent. If is None, then is ignored.
        :param p: 2D point
        :param q: 2D point
        :return:
        """

        # A new list with p and q
        result = [p, q]

        # Create a new stack
        accumulate = list()

        # Push a vector with p and q in the stack
        accumulate.append(tuple(result))

        while len(accumulate) > 0:
            # Pop the next pair of points from the stack.
            a, b = accumulate.pop()

            try:
                # Order points nearest to the center using euclidean distance.
                a, b = tuple(Vector.order_vectors_by_origin_nearest([a, b]))
            except ValueError:
                print('Error to unpack {} and {}'.format(a, b))
                continue

            # Convert to vectors
            a, b = VectorDecimal(a), VectorDecimal(b)

            # Decompose points
            a_x, a_y = a
            b_x, b_y = b

            # Calculate the parameters of the new linear objective function (multiply by -1. to convert in maximize
            # problem)
            w1 = np.multiply(a_y - b_y, -1.)
            w2 = np.multiply(b_x - a_x, -1.)

            # Solve P to find a new solution ang get its cost vector c.
            c = self.find_c_vector(w1, w2, solutions_known=solutions_known)

            # Decompose c vector.
            c_x, c_y = c

            if (w1 * a_x + w2 * a_y) != (w1 * c_x +
                                         w2 * c_y) and c not in result:
                # c is the cost of a new supported solution

                # Push new pair in the stack
                accumulate.append((a, c))

                # Push new pair in the stack
                accumulate.append((c, b))

                # Add c to the result
                result.append(c)

                # Pareto'state frontier found
                self.pareto_frontier_found.append(c)

        return result
Beispiel #2
0
def test_agents(environment: Environment,
                hv_reference: Vector,
                variable: str,
                agents_configuration: dict,
                graph_configuration: dict,
                epsilon: float = None,
                alpha: float = None,
                max_steps: int = None,
                states_to_observe: list = None,
                number_of_agents: int = 30,
                gamma: float = 1.,
                solution: list = None,
                initial_q_value: Vector = None,
                evaluation_mechanism: EvaluationMechanism = None):
    """
    If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data
    in the last steps (ignore `interval`).

    If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every
    `interval` steps.

    :param initial_q_value:
    :param graph_configuration:
    :param solution:
    :param environment:
    :param hv_reference:
    :param variable:
    :param agents_configuration:
    :param epsilon:
    :param alpha:
    :param max_steps:
    :param states_to_observe:
    :param number_of_agents:
    :param gamma:
    :param evaluation_mechanism:
    :return:
    """

    # Extract graph_types
    graph_types = set(graph_configuration.keys())

    if len(graph_types) > 2:
        print("Isn't recommended more than 2 graphs")

    # Parameters
    if states_to_observe is None:
        states_to_observe = {environment.initial_state}

    complex_states = isinstance(environment.observation_space[0],
                                gym.spaces.Tuple)

    if not complex_states and GraphType.DATA_PER_STATE in graph_types:
        print(
            "This environment has complex states, so DATA_PER_STATE graph is disabled."
        )
        graph_configuration.pop(GraphType.DATA_PER_STATE)

    # Build environment
    env_name = environment.__class__.__name__
    env_name_snake = str_to_snake_case(env_name)

    # File timestamp
    timestamp = int(time.time())

    # Write all information in configuration path
    write_config_file(timestamp=timestamp,
                      number_of_agents=number_of_agents,
                      env_name_snake=env_name_snake,
                      seed=','.join(map(str, range(number_of_agents))),
                      epsilon=epsilon,
                      alpha=alpha,
                      gamma=gamma,
                      max_steps=max_steps,
                      variable=variable,
                      agents_configuration=agents_configuration,
                      graph_configuration=graph_configuration,
                      evaluation_mechanism=evaluation_mechanism)

    # Create graphs structure
    graphs, graphs_info = initialize_graph_data(
        graph_types=graph_types, agents_configuration=agents_configuration)

    # Show information
    print('Environment: {}'.format(env_name))

    for graph_type in graph_types:

        # Extract interval and limit
        interval = graph_configuration[graph_type].get('interval', 1)
        limit = graph_configuration[graph_type]['limit']

        # Show information
        print(('\t' * 1) +
              "Graph type: {} - [{}/{}]".format(graph_type, limit, interval))

        # Set interval to get train_data
        Agent.interval_to_get_data = interval

        # Execute a iteration with different initial_seed for each agent indicate
        for seed in range(number_of_agents):

            # Show information
            print(('\t' * 2) + "Execution: {}".format(seed + 1))

            # For each configuration
            for agent_type in agents_configuration:

                # Show information
                print(('\t' * 3) + 'Agent: {}'.format(agent_type.value))

                # Extract configuration for that agent
                for configuration in agents_configuration[agent_type].keys():

                    # Show information
                    print(
                        ('\t' * 4) + '{}: {}'.format(variable, configuration),
                        end=' ')

                    # Mark of time
                    t0 = time.time()

                    # Reset environment
                    environment.reset()
                    environment.seed(seed=seed)

                    # Variable parameters
                    parameters = {
                        'epsilon': epsilon,
                        'alpha': alpha,
                        'gamma': gamma,
                        'max_steps': max_steps,
                        'evaluation_mechanism': evaluation_mechanism,
                        'initial_value': initial_q_value
                    }

                    if variable == 'decimal_precision':
                        Vector.set_decimal_precision(
                            decimal_precision=configuration)
                    else:
                        # Modify current configuration
                        parameters.update({variable: configuration})

                    agent, v_s_0 = train_agent_and_get_v_s_0(
                        agent_type=agent_type,
                        environment=environment,
                        graph_type=graph_type,
                        graph_types=graph_types,
                        hv_reference=hv_reference,
                        limit=limit,
                        seed=seed,
                        parameters=parameters,
                        states_to_observe=states_to_observe)

                    print('-> {:.2f}s'.format(time.time() - t0))

                    train_data = dict()

                    if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE:
                        train_data.update({
                            'vectors': {
                                state: {
                                    action: agent.q_set(state=state,
                                                        action=action)
                                    for action in agent.nd[state].keys()
                                }
                                for state in agent.nd.keys()
                            }
                        })

                    # Order vectors by origin Vec(0) nearest
                    train_data.update({
                        'v_s_0':
                        Vector.order_vectors_by_origin_nearest(vectors=v_s_0),
                        # 'q': agent.q,
                        # 'v': agent.v
                    })

                    # Write vectors found into path
                    dumps_train_data(
                        timestamp=timestamp,
                        seed=seed,
                        env_name_snake=env_name_snake,
                        train_data=train_data,
                        variable=variable,
                        agent_type=agent_type,
                        configuration=configuration,
                        evaluation_mechanism=evaluation_mechanism,
                        columns=environment.observation_space[0].n)

                    # Update graphs
                    update_graphs(graphs=graphs,
                                  agent=agent,
                                  graph_type=graph_type,
                                  configuration=str(configuration),
                                  agent_type=agent_type,
                                  states_to_observe=states_to_observe,
                                  graphs_info=graphs_info,
                                  solution=solution)

    prepare_data_and_show_graph(timestamp=timestamp,
                                env_name=env_name,
                                env_name_snake=env_name_snake,
                                graphs=graphs,
                                number_of_agents=number_of_agents,
                                agents_configuration=agents_configuration,
                                alpha=alpha,
                                epsilon=epsilon,
                                gamma=gamma,
                                graph_configuration=graph_configuration,
                                max_steps=max_steps,
                                initial_state=environment.initial_state,
                                variable=variable,
                                graphs_info=graphs_info,
                                evaluation_mechanism=evaluation_mechanism,
                                solution=solution)