def main(): """ Evaluate manually predefined policies :return: """ # Build agent agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(), gamma=0.9) # Policies policies = rge_policies.copy() # Simulation simulation = dict() for n, policy in enumerate(policies): # # of policy n += 1 # In this case we need transform to a list of tuples policy = list(map(lambda s: (s, policy[s]), policy)) # Evaluate policy policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=0.000001) # Update simulation simulation.update({n: policy_evaluated}) print(simulation)
def get_trained_agent() -> AgentBN: # Environment environment = ResourceGatheringEpisodic() # Agent agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent agent.train(graph_type=GraphType.SWEEP, limit=10) return agent
def evaluate_predefined_policies(): # Build agent agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(), gamma=0.9) # Policies policies = rge_policies.copy() # Simulation simulation = dict() for n, policy in enumerate(policies): # # of policy n += 1 # Evaluate policy policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=0.000001) # Update simulation simulation.update({n: policy_evaluated}) print(simulation)
def draft_w(): tolerance = 0.00001 for decimal_precision in [0.05, 0.005, 0.001]: # Create environment # environment = ResourceGatheringEpisodicSimplified() # environment = ResourceGatheringSimplified() environment = ResourceGatheringEpisodic() # Create agent agent_w = AgentW(environment=environment, convergence_graph=True, gamma=.9) # Time train t0 = time.time() # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) agent_w.train(graph_type=GraphType.SWEEP, limit=1) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = { key: [vector.tolist() for vector in vectors] for key, vectors in agent_w.v.items() } # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_w.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_w.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_w.gamma, 'initial_q_value': agent_w.initial_q_value, 'initial_seed': agent_w.initial_seed, 'interval_to_get_data': agent_w.interval_to_get_data, 'max_steps': agent_w.max_iterations, 'total_sweeps': agent_w.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment)
from environments import ResourceGatheringEpisodic nothing = (0, 0) gold = (1, 0) gem = (0, 1) both = (1, 1) UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 attacked = False # Build instance of environment env = ResourceGatheringEpisodic() default_policies = {state: UP for state in env.states()} policies: list = [ # Green (1) { **default_policies, **{ ((2, 4), nothing, attacked): RIGHT, ((3, 4), nothing, attacked): RIGHT, ((4, 4), nothing, attacked): UP, ((4, 3), nothing, attacked): UP, ((4, 2), nothing, attacked): UP, ((4, 1), gem, attacked): DOWN, ((4, 2), gem, attacked): DOWN,
def setUp(self): # Set initial_seed to 0 to testing. self.environment = ResourceGatheringEpisodic(seed=0)
class TestResourceGatheringLimit(TestResourceGathering): def setUp(self): # Set initial_seed to 0 to testing. self.environment = ResourceGatheringEpisodic(seed=0) def test_init(self): """ Testing if constructor works :return: """ # This environment must have another attributes self.assertTrue(hasattr(self.environment, 'gold_positions')) self.assertTrue(hasattr(self.environment, 'gem_positions')) self.assertTrue(hasattr(self.environment, 'enemies_positions')) self.assertTrue(hasattr(self.environment, 'home_position')) # Observation space self.assertEqual( gym.spaces.Tuple( (gym.spaces.Tuple( (gym.spaces.Discrete(5), gym.spaces.Discrete(5))), gym.spaces.Tuple( (gym.spaces.Discrete(2), gym.spaces.Discrete(2))), spaces.Boolean())), self.environment.observation_space) # By default initial position is (2, 4) self.assertEqual(((2, 4), (0, 0), False), self.environment.initial_state) # Default reward is (0, 0, 0) self.assertEqual((0, 0, 0), self.environment.default_reward) self.assertTrue(hasattr(self.environment, 'steps')) self.assertTrue(hasattr(self.environment, 'steps_limit')) def test_reset(self): self.environment.steps = random.randint(0, 1000) # Super method call super().test_reset() self.assertEqual(0, self.environment.steps) def test_step(self): """ Testing step method :return: """ # Simple valid step # Reward: # [enemy_attack, gold, gems] # Complex position: # (position, resources_available) # Remember that initial position is (2, 4) # Disable enemy attack self.environment.p_attack = 0 next_state, reward, is_final = None, None, None # Do 2 steps to RIGHT for _ in range(2): _ = self.environment.step(action=self.environment.actions['RIGHT']) # Do 3 steps to UP (Get a gem) for _ in range(3): next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions['UP']) self.assertEqual(((4, 1), (0, 1), False), next_state) self.assertEqual([0, 0, 0], reward) self.assertFalse(is_final) _ = self.environment.step(action=self.environment.actions['UP']) # Do 2 steps to LEFT for _ in range(2): next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions['LEFT']) self.assertEqual(((2, 0), (1, 1), False), next_state) self.assertEqual([0, 0, 0], reward) self.assertFalse(is_final) # Go to home # Do 4 steps to DOWN for _ in range(4): next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions['DOWN']) self.assertEqual(((2, 4), (1, 1), False), next_state) self.assertEqual([0, 1, 1], reward) self.assertTrue(is_final) ################################################################################################################ # Trying get gold through enemy ################################################################################################################ # Reset environment self.environment.reset() # Do 4 steps to UP for _ in range(4): next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions['UP']) self.assertEqual(((2, 0), (1, 0), False), next_state) self.assertEqual([0, 0, 0], reward) self.assertFalse(is_final) # Force to enemy attack self.environment.p_attack = 1 # Go to enemy position next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions['DOWN']) # Reset at home self.assertEqual(((2, 4), (0, 0), True), next_state) self.assertEqual([-1, 0, 0], reward) self.assertTrue(is_final) # Set a state with gold and gem self.environment.current_state = ((3, 4), (1, 1), False) # Waste time steps_used = self.environment.steps # Do steps until time_limit for _ in range(self.environment.steps_limit - steps_used): next_state, reward, is_final, _ = self.environment.step( action=self.environment.actions.get('RIGHT')) self.assertEqual(((4, 4), (1, 1), False), next_state) self.assertEqual([0, 0, 0], reward) self.assertTrue(is_final) def test_transition_reward(self): # In this environment doesn't mind initial state to get the reward for state in self.environment.states(): self.environment.current_state = state # Doesn't mind action too. for a in self.environment.action_space: for reachable_state in self.environment.reachable_states( state=state, action=a): # Decompose next state next_position, next_objects, next_attacked = reachable_state reward = self.environment.transition_reward( state=state, action=a, next_state=reachable_state) # Reach any final state if reachable_state in [((2, 4), (1, 0), False), ((2, 4), (0, 1), False), ((2, 4), (1, 1), False)]: expected_reward = list(next_objects) expected_reward.insert(0, 0) self.assertEqual(expected_reward, reward) # It'state attacked elif next_attacked: self.assertEqual([-1, 0, 0], reward) # Default reward else: self.assertEqual([0, 0, 0], reward) def test__next_state(self): """ Testing _next_state method :return: """ ################################################################################################################ # Begin at position (0, 0) (TOP-LEFT corner) ################################################################################################################ self.environment.reset() state = ((0, 0), (0, 0), False) self.environment.current_state = state # Cannot go to UP (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(state, next_state) # Go to RIGHT (increment x axis) next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(((1, 0), (0, 0), False), next_state) # Go to DOWN (increment y axis) next_state = self.environment.next_state( action=self.environment.actions['DOWN']) self.assertEqual(((0, 1), (0, 0), False), next_state) # Cannot go to LEFT (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['LEFT']) self.assertEqual(state, next_state) ################################################################################################################ # Set to (4, 0) (TOP-RIGHT corner) ################################################################################################################ self.environment.reset() state = ((4, 0), (0, 0), False) self.environment.current_state = state # Cannot go to UP (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(state, next_state) # Cannot go to RIGHT (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(state, next_state) # Go to DOWN (increment y axis) next_state = self.environment.next_state( action=self.environment.actions['DOWN']) self.assertEqual(((4, 1), (0, 1), False), next_state) # Go to LEFT (decrement x axis) (enemy) next_state = self.environment.next_state( action=self.environment.actions['LEFT']) self.assertEqual(((2, 4), (0, 0), True), next_state) ################################################################################################################ # Set to (4, 4) (DOWN-RIGHT corner) ################################################################################################################ self.environment.reset() state = ((4, 4), (0, 0), False) self.environment.current_state = state # Go to UP (decrement y axis) next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(((4, 3), (0, 0), False), next_state) # Cannot go to RIGHT (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(state, next_state) # Cannot go to DOWN (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['DOWN']) self.assertEqual(state, next_state) # Go to LEFT (decrement x axis) next_state = self.environment.next_state( action=self.environment.actions['LEFT']) self.assertEqual(((3, 4), (0, 0), False), next_state) ################################################################################################################ # Set to (0, 4) (DOWN-LEFT corner) ################################################################################################################ self.environment.reset() state = ((0, 4), (0, 0), False) self.environment.current_state = state # Go to UP (decrement y axis) next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(((0, 3), (0, 0), False), next_state) # Go to RIGHT (increment x axis) next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(((1, 4), (0, 0), False), next_state) # Cannot go to DOWN (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['DOWN']) self.assertEqual(state, next_state) # Cannot go to LEFT (Keep in same position) next_state = self.environment.next_state( action=self.environment.actions['LEFT']) self.assertEqual(state, next_state) ################################################################################################################ # Set to (1, 0) and go to get gold ################################################################################################################ self.environment.reset() state = ((1, 0), (0, 0), False) self.environment.current_state = state next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(((2, 0), (1, 0), False), next_state) ################################################################################################################ # Set to (1, 0) and go to get gold, but there isn't ################################################################################################################ self.environment.reset() state = ((1, 0), (0, 0), False) self.environment.current_state = state next_state = self.environment.next_state( action=self.environment.actions['RIGHT']) self.assertEqual(((2, 0), (1, 0), False), next_state) ################################################################################################################ # Set to (4, 2) and go to get gem ################################################################################################################ self.environment.reset() state = ((4, 2), (0, 0), False) self.environment.current_state = state next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(((4, 1), (0, 1), False), next_state) ################################################################################################################ # Set to (4, 2) and go to get gem, but there isn't ################################################################################################################ self.environment.reset() state = ((4, 2), (0, 0), False) self.environment.current_state = state next_state = self.environment.next_state( action=self.environment.actions['UP']) self.assertEqual(((4, 1), (0, 1), False), next_state) def test_reachable_states(self): limit_x = (self.environment.observation_space[0][0].n - 1) limit_y = (self.environment.observation_space[0][1].n - 1) # For any state the following happens for state in self.environment.states(): # Decompose state position, objects, attacked = state # Decompose elements (x, y) = position (gold, gems) = objects # Go to UP reachable_states = self.environment.reachable_states( state=state, action=self.environment.actions['UP']) reachable_states_len = len(reachable_states) expected_reachable_states = set() expected_reachable_states_len = 1 if position == (2, 2): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((2, 1), objects, False)) elif position == (3, 1) or position == (3, 0): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((3, 0), objects, False)) elif position == (4, 2): expected_reachable_states.add(((x, y - 1), (gold, 1), False)) elif position == (2, 1): expected_reachable_states.add(((x, y - 1), (1, gems), False)) elif y <= 0: expected_reachable_states.add(((x, y), objects, False)) elif y > 0: expected_reachable_states.add(((x, y - 1), objects, False)) self.assertEqual(expected_reachable_states_len, reachable_states_len) self.assertTrue( all(element in expected_reachable_states for element in reachable_states) and all(element in reachable_states for element in expected_reachable_states)) # Go to RIGHT reachable_states = self.environment.reachable_states( state=state, action=self.environment.actions['RIGHT']) reachable_states_len = len(reachable_states) expected_reachable_states = set() expected_reachable_states_len = 1 if position == (2, 0): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((3, 0), objects, False)) elif position == (1, 1): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((2, 1), objects, False)) elif position == (3, 1): expected_reachable_states.add(((x + 1, y), (gold, 1), False)) elif position == (1, 0): expected_reachable_states.add(((x + 1, y), (1, gems), False)) elif x >= limit_x: expected_reachable_states.add(((x, y), objects, False)) elif x < limit_x: expected_reachable_states.add(((x + 1, y), objects, False)) self.assertEqual(expected_reachable_states_len, reachable_states_len) self.assertTrue( all(element in expected_reachable_states for element in reachable_states) and all(element in reachable_states for element in expected_reachable_states)) # Go to DOWN reachable_states = self.environment.reachable_states( state=state, action=self.environment.actions['DOWN']) reachable_states_len = len(reachable_states) expected_reachable_states = set() expected_reachable_states_len = 1 if position == (2, 0): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((2, 1), objects, False)) elif position == (4, 0): expected_reachable_states.add(((x, y + 1), (gold, 1), False)) elif y >= limit_y: expected_reachable_states.add(((x, y), objects, False)) elif y < limit_y: expected_reachable_states.add(((x, y + 1), objects, False)) self.assertEqual(expected_reachable_states_len, reachable_states_len) self.assertTrue( all(element in expected_reachable_states for element in reachable_states) and all(element in reachable_states for element in expected_reachable_states)) # Go to LEFT reachable_states = self.environment.reachable_states( state=state, action=self.environment.actions['LEFT']) reachable_states_len = len(reachable_states) expected_reachable_states = set() expected_reachable_states_len = 1 if position == (4, 0): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((3, 0), objects, False)) elif position == (3, 1): expected_reachable_states_len = 2 expected_reachable_states.add(((2, 4), (0, 0), True)) expected_reachable_states.add(((2, 1), objects, False)) elif position == (3, 0): expected_reachable_states.add(((x - 1, y), (1, gems), False)) elif x <= 0: expected_reachable_states.add(((x, y), objects, False)) elif x > 0: expected_reachable_states.add(((x - 1, y), objects, False)) self.assertEqual(expected_reachable_states_len, reachable_states_len) self.assertTrue( all(element in expected_reachable_states for element in reachable_states) and all(element in reachable_states for element in expected_reachable_states))
def main(): # Define gamma gamma = .9 # Each 30 sweeps make it a dump sweeps_dumps = 30 for decimal_precision in [0.01, 0.005]: # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) # Define same tolerance that decimal precision, but is possible to change tolerance = decimal_precision # Create environment environment = ResourceGatheringEpisodic() # Create agent agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma) # Time train t0 = time.time() print('Training with tolerance: {}...'.format(tolerance)) agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()} # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_bn.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_bn.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_bn.gamma, 'initial_q_value': agent_bn.initial_q_value, 'initial_seed': agent_bn.initial_seed, 'interval_to_get_data': agent_bn.interval_to_get_data, 'total_sweeps': agent_bn.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment) # Dump binary information agent_bn.save()