def draft_b_lp(columns: int): # Create environment environment = DeepSeaTreasureRightDownStochastic(columns=columns) # Vector precision Vector.set_decimal_precision(decimal_precision=0.000001) # Create instance of AgentB agent = AgentB(environment=environment, limited_precision=True) agent.simulate()
def get_trained_agent() -> AgentBN: # Environment environment = ResourceGatheringEpisodic() # Agent agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent agent.train(graph_type=GraphType.SWEEP, limit=10) return agent
def main(): # Get trained agent print('Training agent...') agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0), False) # Initial vectors v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Show information print('Vectors obtained after m3_max algorithm: ') print(vectors, end='\n\n') # Define a tolerance decimal_precision = 0.0000001 # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) print('Evaluating policies gotten...') # For each vector for vector in vectors: # Specify objective vector objective_vector = vector.copy() print('Recovering policy for objective vector: {}...'.format( objective_vector)) # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) print('Evaluating policy obtaining...', end='\n\n') # Train until converge with `decimal_precision` tolerance. policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=decimal_precision) # Save policy and it evaluation. simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
def train_from_zero(): # Define variables limit = int(3e6) epsilon = 0.4 max_steps = 1000 alpha = 0.1 gamma = 1 graph_type = GraphType.EPISODES columns_list = range(1, 6) decimals = [0.01, 0.05] for decimal_precision in decimals: # Set vector decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) for columns in columns_list: # Environment environment = DeepSeaTreasureRightDownStochastic(columns=columns) # Create agent agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, epsilon=epsilon, alpha=alpha, gamma=gamma, max_steps=max_steps) # Time train t0 = time.time() # Show numbers of columns print('# of columns: {}'.format(columns)) # Agent training agent.train(graph_type=graph_type, limit=limit) # Calc total time total_time = time.time() - t0 prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit, total_time)
def main(): # Get trained agent agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0)) # agent: AgentBN = AgentBN.load( # filename='bn/models/rg_1584437328_0.005.bin' # ) v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=0.0000001) for vector in vectors: # Recreate the index objective vector. # objective_vector = IndexVector( # index=vector, vector=trained_agent.v[initial_state][vector] # ) objective_vector = vector.copy() # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=0.0000001) simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
def train_agent() -> Agent: # Environment # environment = DeepSeaTreasureRightDownStochastic(columns=3) # environment = DeepSeaTreasureRightDown(columns=3) # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95) # environment = DeepSeaTreasure() environment = ResourceGathering() # Agent # agent = AgentMPQ( # environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000 # ) # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01) agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001) agent.train(graph_type=GraphType.SWEEP, limit=13) return agent
def train_from_file(): # Models Path models_path = 'mpq/models/dstrds_1579869395_1.0_4.bin' agent: AgentMPQ = u_models.binary_load( path=dumps_path.joinpath(models_path)) # Data Path data_path = dumps_path.joinpath( 'mpq/train_data/dstrds_1579869395_1.0_4.yml') data_file = data_path.open(mode='r', encoding='UTF-8') # Load yaml from file data = yaml.load(data_file, Loader=yaml.FullLoader) # Extract relevant data for training before_training_execution = float(data['time']) decimal_precision = float(data['agent']['decimal_precision']) graph_type = GraphType.from_string(data['training']['graph_type']) limit = int(data['training']['limit']) columns = int(data['environment']['columns']) # Set decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) # Time train t0 = time.time() # Agent training agent.train(graph_type=graph_type, limit=limit) # Calc total time total_time = (time.time() - t0) + before_training_execution prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit, total_time)
def draft_w(): tolerance = 0.00001 for decimal_precision in [0.05, 0.005, 0.001]: # Create environment # environment = ResourceGatheringEpisodicSimplified() # environment = ResourceGatheringSimplified() environment = ResourceGatheringEpisodic() # Create agent agent_w = AgentW(environment=environment, convergence_graph=True, gamma=.9) # Time train t0 = time.time() # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) agent_w.train(graph_type=GraphType.SWEEP, limit=1) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = { key: [vector.tolist() for vector in vectors] for key, vectors in agent_w.v.items() } # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_w.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_w.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_w.gamma, 'initial_q_value': agent_w.initial_q_value, 'initial_seed': agent_w.initial_seed, 'interval_to_get_data': agent_w.interval_to_get_data, 'max_steps': agent_w.max_iterations, 'total_sweeps': agent_w.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment)
def draft_w(): tolerance = 0.00001 gamma = 0.95 # for decimal_precision in [1, 0.5, 0.1, 0.05, 0.005]: for decimal_precision in [0.1, 0.05, 0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) # for i in range(1, 5): for i in [10]: # Create environment # environment = PyramidMDPNoBounces(diagonals=i, n_transition=0.95) # environment = DeepSeaTreasure(columns=i) # environment = ResourceGatheringEpisodic() environment = ResourceGathering() # Create agent agent_w = AgentW(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() # # Calc number of sweeps limit # limit = (i + 1) * 3 print('{}-diagonals'.format(i)) # print('{}-sweeps'.format(limit)) agent_w.train(graph_type=GraphType.SWEEP, limit=40) # agent_w.train(graph_type=GraphType.SWEEP, tolerance=tolerance) # x = list(range(1, agent_w.total_sweeps)) # y = agent_w.convergence_graph_data.copy() # # plt.title('{} Diagonals'.format(i)) # plt.y_label('Hypervolume max difference') # plt.x_label('Sweeps') # plt.plot(x, y) # plt.show() # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_w.v.items()} # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_w.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_w.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_w.gamma, 'initial_q_value': agent_w.initial_q_value, 'initial_seed': agent_w.initial_seed, 'interval_to_get_data': agent_w.interval_to_get_data, 'max_steps': agent_w.max_iterations, 'total_sweeps': agent_w.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, columns=i, environment=environment)
def draft_w(): gamma = .9 # for decimal_precision in [0.01, 0.005, 1, 0.5, 0.05, 0.1]: for decimal_precision in [0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) tolerance = decimal_precision for i in ['full']: # Create environment for environment in [ResourceGathering()]: # Create agent agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() # # Calc number of sweeps limit print('{} cols \ntolerance: {}'.format(i, tolerance)) agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=30) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = { key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items() } # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_bn.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_bn.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_bn.gamma, 'initial_q_value': agent_bn.initial_q_value, 'initial_seed': agent_bn.initial_seed, 'interval_to_get_data': agent_bn.interval_to_get_data, 'total_sweeps': agent_bn.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution # dumps(data=data, columns=i, environment=environment) dumps(data=data, environment=environment) # Dump agent agent_bn.save()
def test_agents(environment: Environment, hv_reference: Vector, variable: str, agents_configuration: dict, graph_configuration: dict, epsilon: float = None, alpha: float = None, max_steps: int = None, states_to_observe: list = None, number_of_agents: int = 30, gamma: float = 1., solution: list = None, initial_q_value: Vector = None, evaluation_mechanism: EvaluationMechanism = None): """ If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data in the last steps (ignore `interval`). If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every `interval` steps. :param initial_q_value: :param graph_configuration: :param solution: :param environment: :param hv_reference: :param variable: :param agents_configuration: :param epsilon: :param alpha: :param max_steps: :param states_to_observe: :param number_of_agents: :param gamma: :param evaluation_mechanism: :return: """ # Extract graph_types graph_types = set(graph_configuration.keys()) if len(graph_types) > 2: print("Isn't recommended more than 2 graphs") # Parameters if states_to_observe is None: states_to_observe = {environment.initial_state} complex_states = isinstance(environment.observation_space[0], gym.spaces.Tuple) if not complex_states and GraphType.DATA_PER_STATE in graph_types: print( "This environment has complex states, so DATA_PER_STATE graph is disabled." ) graph_configuration.pop(GraphType.DATA_PER_STATE) # Build environment env_name = environment.__class__.__name__ env_name_snake = str_to_snake_case(env_name) # File timestamp timestamp = int(time.time()) # Write all information in configuration path write_config_file(timestamp=timestamp, number_of_agents=number_of_agents, env_name_snake=env_name_snake, seed=','.join(map(str, range(number_of_agents))), epsilon=epsilon, alpha=alpha, gamma=gamma, max_steps=max_steps, variable=variable, agents_configuration=agents_configuration, graph_configuration=graph_configuration, evaluation_mechanism=evaluation_mechanism) # Create graphs structure graphs, graphs_info = initialize_graph_data( graph_types=graph_types, agents_configuration=agents_configuration) # Show information print('Environment: {}'.format(env_name)) for graph_type in graph_types: # Extract interval and limit interval = graph_configuration[graph_type].get('interval', 1) limit = graph_configuration[graph_type]['limit'] # Show information print(('\t' * 1) + "Graph type: {} - [{}/{}]".format(graph_type, limit, interval)) # Set interval to get train_data Agent.interval_to_get_data = interval # Execute a iteration with different initial_seed for each agent indicate for seed in range(number_of_agents): # Show information print(('\t' * 2) + "Execution: {}".format(seed + 1)) # For each configuration for agent_type in agents_configuration: # Show information print(('\t' * 3) + 'Agent: {}'.format(agent_type.value)) # Extract configuration for that agent for configuration in agents_configuration[agent_type].keys(): # Show information print( ('\t' * 4) + '{}: {}'.format(variable, configuration), end=' ') # Mark of time t0 = time.time() # Reset environment environment.reset() environment.seed(seed=seed) # Variable parameters parameters = { 'epsilon': epsilon, 'alpha': alpha, 'gamma': gamma, 'max_steps': max_steps, 'evaluation_mechanism': evaluation_mechanism, 'initial_value': initial_q_value } if variable == 'decimal_precision': Vector.set_decimal_precision( decimal_precision=configuration) else: # Modify current configuration parameters.update({variable: configuration}) agent, v_s_0 = train_agent_and_get_v_s_0( agent_type=agent_type, environment=environment, graph_type=graph_type, graph_types=graph_types, hv_reference=hv_reference, limit=limit, seed=seed, parameters=parameters, states_to_observe=states_to_observe) print('-> {:.2f}s'.format(time.time() - t0)) train_data = dict() if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE: train_data.update({ 'vectors': { state: { action: agent.q_set(state=state, action=action) for action in agent.nd[state].keys() } for state in agent.nd.keys() } }) # Order vectors by origin Vec(0) nearest train_data.update({ 'v_s_0': Vector.order_vectors_by_origin_nearest(vectors=v_s_0), # 'q': agent.q, # 'v': agent.v }) # Write vectors found into path dumps_train_data( timestamp=timestamp, seed=seed, env_name_snake=env_name_snake, train_data=train_data, variable=variable, agent_type=agent_type, configuration=configuration, evaluation_mechanism=evaluation_mechanism, columns=environment.observation_space[0].n) # Update graphs update_graphs(graphs=graphs, agent=agent, graph_type=graph_type, configuration=str(configuration), agent_type=agent_type, states_to_observe=states_to_observe, graphs_info=graphs_info, solution=solution) prepare_data_and_show_graph(timestamp=timestamp, env_name=env_name, env_name_snake=env_name_snake, graphs=graphs, number_of_agents=number_of_agents, agents_configuration=agents_configuration, alpha=alpha, epsilon=epsilon, gamma=gamma, graph_configuration=graph_configuration, max_steps=max_steps, initial_state=environment.initial_state, variable=variable, graphs_info=graphs_info, evaluation_mechanism=evaluation_mechanism, solution=solution)
def main(): # Define gamma gamma = .9 # Each 30 sweeps make it a dump sweeps_dumps = 30 for decimal_precision in [0.01, 0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) # Define same tolerance that decimal precision, but is possible to change tolerance = decimal_precision # Create environment environment = ResourceGatheringEpisodic() # Create agent agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() print('Training with tolerance: {}...'.format(tolerance)) agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()} # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_bn.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_bn.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_bn.gamma, 'initial_q_value': agent_bn.initial_q_value, 'initial_seed': agent_bn.initial_seed, 'interval_to_get_data': agent_bn.interval_to_get_data, 'total_sweeps': agent_bn.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment) # Dump binary information agent_bn.save()