def calc_frontier_scalarized(self, p: Vector, q: Vector, solutions_known: list = None) -> list: """ This is a search_distance method to calc pareto'state frontier. Return a list of supported solutions costs, this method is only valid to two objectives problems. Applies a dichotomous search to find all supported solutions costs. :param solutions_known: If we know the possible solutions, we can indicate them to the algorithm to improve the training of the agent. If is None, then is ignored. :param p: 2D point :param q: 2D point :return: """ # A new list with p and q result = [p, q] # Create a new stack accumulate = list() # Push a vector with p and q in the stack accumulate.append(tuple(result)) while len(accumulate) > 0: # Pop the next pair of points from the stack. a, b = accumulate.pop() try: # Order points nearest to the center using euclidean distance. a, b = tuple(Vector.order_vectors_by_origin_nearest([a, b])) except ValueError: print('Error to unpack {} and {}'.format(a, b)) continue # Convert to vectors a, b = VectorDecimal(a), VectorDecimal(b) # Decompose points a_x, a_y = a b_x, b_y = b # Calculate the parameters of the new linear objective function (multiply by -1. to convert in maximize # problem) w1 = np.multiply(a_y - b_y, -1.) w2 = np.multiply(b_x - a_x, -1.) # Solve P to find a new solution ang get its cost vector c. c = self.find_c_vector(w1, w2, solutions_known=solutions_known) # Decompose c vector. c_x, c_y = c if (w1 * a_x + w2 * a_y) != (w1 * c_x + w2 * c_y) and c not in result: # c is the cost of a new supported solution # Push new pair in the stack accumulate.append((a, c)) # Push new pair in the stack accumulate.append((c, b)) # Add c to the result result.append(c) # Pareto'state frontier found self.pareto_frontier_found.append(c) return result
def test_agents(environment: Environment, hv_reference: Vector, variable: str, agents_configuration: dict, graph_configuration: dict, epsilon: float = None, alpha: float = None, max_steps: int = None, states_to_observe: list = None, number_of_agents: int = 30, gamma: float = 1., solution: list = None, initial_q_value: Vector = None, evaluation_mechanism: EvaluationMechanism = None): """ If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data in the last steps (ignore `interval`). If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every `interval` steps. :param initial_q_value: :param graph_configuration: :param solution: :param environment: :param hv_reference: :param variable: :param agents_configuration: :param epsilon: :param alpha: :param max_steps: :param states_to_observe: :param number_of_agents: :param gamma: :param evaluation_mechanism: :return: """ # Extract graph_types graph_types = set(graph_configuration.keys()) if len(graph_types) > 2: print("Isn't recommended more than 2 graphs") # Parameters if states_to_observe is None: states_to_observe = {environment.initial_state} complex_states = isinstance(environment.observation_space[0], gym.spaces.Tuple) if not complex_states and GraphType.DATA_PER_STATE in graph_types: print( "This environment has complex states, so DATA_PER_STATE graph is disabled." ) graph_configuration.pop(GraphType.DATA_PER_STATE) # Build environment env_name = environment.__class__.__name__ env_name_snake = str_to_snake_case(env_name) # File timestamp timestamp = int(time.time()) # Write all information in configuration path write_config_file(timestamp=timestamp, number_of_agents=number_of_agents, env_name_snake=env_name_snake, seed=','.join(map(str, range(number_of_agents))), epsilon=epsilon, alpha=alpha, gamma=gamma, max_steps=max_steps, variable=variable, agents_configuration=agents_configuration, graph_configuration=graph_configuration, evaluation_mechanism=evaluation_mechanism) # Create graphs structure graphs, graphs_info = initialize_graph_data( graph_types=graph_types, agents_configuration=agents_configuration) # Show information print('Environment: {}'.format(env_name)) for graph_type in graph_types: # Extract interval and limit interval = graph_configuration[graph_type].get('interval', 1) limit = graph_configuration[graph_type]['limit'] # Show information print(('\t' * 1) + "Graph type: {} - [{}/{}]".format(graph_type, limit, interval)) # Set interval to get train_data Agent.interval_to_get_data = interval # Execute a iteration with different initial_seed for each agent indicate for seed in range(number_of_agents): # Show information print(('\t' * 2) + "Execution: {}".format(seed + 1)) # For each configuration for agent_type in agents_configuration: # Show information print(('\t' * 3) + 'Agent: {}'.format(agent_type.value)) # Extract configuration for that agent for configuration in agents_configuration[agent_type].keys(): # Show information print( ('\t' * 4) + '{}: {}'.format(variable, configuration), end=' ') # Mark of time t0 = time.time() # Reset environment environment.reset() environment.seed(seed=seed) # Variable parameters parameters = { 'epsilon': epsilon, 'alpha': alpha, 'gamma': gamma, 'max_steps': max_steps, 'evaluation_mechanism': evaluation_mechanism, 'initial_value': initial_q_value } if variable == 'decimal_precision': Vector.set_decimal_precision( decimal_precision=configuration) else: # Modify current configuration parameters.update({variable: configuration}) agent, v_s_0 = train_agent_and_get_v_s_0( agent_type=agent_type, environment=environment, graph_type=graph_type, graph_types=graph_types, hv_reference=hv_reference, limit=limit, seed=seed, parameters=parameters, states_to_observe=states_to_observe) print('-> {:.2f}s'.format(time.time() - t0)) train_data = dict() if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE: train_data.update({ 'vectors': { state: { action: agent.q_set(state=state, action=action) for action in agent.nd[state].keys() } for state in agent.nd.keys() } }) # Order vectors by origin Vec(0) nearest train_data.update({ 'v_s_0': Vector.order_vectors_by_origin_nearest(vectors=v_s_0), # 'q': agent.q, # 'v': agent.v }) # Write vectors found into path dumps_train_data( timestamp=timestamp, seed=seed, env_name_snake=env_name_snake, train_data=train_data, variable=variable, agent_type=agent_type, configuration=configuration, evaluation_mechanism=evaluation_mechanism, columns=environment.observation_space[0].n) # Update graphs update_graphs(graphs=graphs, agent=agent, graph_type=graph_type, configuration=str(configuration), agent_type=agent_type, states_to_observe=states_to_observe, graphs_info=graphs_info, solution=solution) prepare_data_and_show_graph(timestamp=timestamp, env_name=env_name, env_name_snake=env_name_snake, graphs=graphs, number_of_agents=number_of_agents, agents_configuration=agents_configuration, alpha=alpha, epsilon=epsilon, gamma=gamma, graph_configuration=graph_configuration, max_steps=max_steps, initial_state=environment.initial_state, variable=variable, graphs_info=graphs_info, evaluation_mechanism=evaluation_mechanism, solution=solution)