def get_trained_agent() -> AgentBN: # Environment environment = ResourceGatheringEpisodic() # Agent agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent agent.train(graph_type=GraphType.SWEEP, limit=10) return agent
def train_agent() -> Agent: # Environment # environment = DeepSeaTreasureRightDownStochastic(columns=3) # environment = DeepSeaTreasureRightDown(columns=3) # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95) # environment = DeepSeaTreasure() environment = ResourceGathering() # Agent # agent = AgentMPQ( # environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000 # ) # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01) agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001) agent.train(graph_type=GraphType.SWEEP, limit=13) return agent
def draft_w(): gamma = .9 # for decimal_precision in [0.01, 0.005, 1, 0.5, 0.05, 0.1]: for decimal_precision in [0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) tolerance = decimal_precision for i in ['full']: # Create environment for environment in [ResourceGathering()]: # Create agent agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() # # Calc number of sweeps limit print('{} cols \ntolerance: {}'.format(i, tolerance)) agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=30) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = { key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items() } # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_bn.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_bn.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_bn.gamma, 'initial_q_value': agent_bn.initial_q_value, 'initial_seed': agent_bn.initial_seed, 'interval_to_get_data': agent_bn.interval_to_get_data, 'total_sweeps': agent_bn.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution # dumps(data=data, columns=i, environment=environment) dumps(data=data, environment=environment) # Dump agent agent_bn.save()
def main(): # Define gamma gamma = .9 # Each 30 sweeps make it a dump sweeps_dumps = 30 for decimal_precision in [0.01, 0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) # Define same tolerance that decimal precision, but is possible to change tolerance = decimal_precision # Create environment environment = ResourceGatheringEpisodic() # Create agent agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() print('Training with tolerance: {}...'.format(tolerance)) agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()} # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_bn.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_bn.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_bn.gamma, 'initial_q_value': agent_bn.initial_q_value, 'initial_seed': agent_bn.initial_seed, 'interval_to_get_data': agent_bn.interval_to_get_data, 'total_sweeps': agent_bn.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment) # Dump binary information agent_bn.save()