def main():
    """
    Evaluate manually predefined policies
    :return:
    """
    # Build agent
    agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(),
                             gamma=0.9)

    # Policies
    policies = rge_policies.copy()

    # Simulation
    simulation = dict()

    for n, policy in enumerate(policies):

        # # of policy
        n += 1

        # In this case we need transform to a list of tuples
        policy = list(map(lambda s: (s, policy[s]), policy))

        # Evaluate policy
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.000001)

        # Update simulation
        simulation.update({n: policy_evaluated})

    print(simulation)
예제 #2
0
def get_trained_agent() -> AgentBN:
    # Environment
    environment = ResourceGatheringEpisodic()

    # Agent
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    agent.train(graph_type=GraphType.SWEEP, limit=10)

    return agent
예제 #3
0
def evaluate_predefined_policies():
    # Build agent
    agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(),
                             gamma=0.9)

    # Policies
    policies = rge_policies.copy()

    # Simulation
    simulation = dict()

    for n, policy in enumerate(policies):
        # # of policy
        n += 1

        # Evaluate policy
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.000001)

        # Update simulation
        simulation.update({n: policy_evaluated})

    print(simulation)
예제 #4
0
def draft_w():
    tolerance = 0.00001

    for decimal_precision in [0.05, 0.005, 0.001]:
        # Create environment
        # environment = ResourceGatheringEpisodicSimplified()
        # environment = ResourceGatheringSimplified()
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_w = AgentW(environment=environment,
                         convergence_graph=True,
                         gamma=.9)

        # Time train
        t0 = time.time()

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        agent_w.train(graph_type=GraphType.SWEEP, limit=1)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {
            key: [vector.tolist() for vector in vectors]
            for key, vectors in agent_w.v.items()
        }

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_w.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_w.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_w.gamma,
            'initial_q_value': agent_w.initial_q_value,
            'initial_seed': agent_w.initial_seed,
            'interval_to_get_data': agent_w.interval_to_get_data,
            'max_steps': agent_w.max_iterations,
            'total_sweeps': agent_w.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)
예제 #5
0
from environments import ResourceGatheringEpisodic

nothing = (0, 0)
gold = (1, 0)
gem = (0, 1)
both = (1, 1)

UP = 0
RIGHT = 1
DOWN = 2
LEFT = 3

attacked = False

# Build instance of environment
env = ResourceGatheringEpisodic()

default_policies = {state: UP for state in env.states()}

policies: list = [
    # Green (1)
    {
        **default_policies,
        **{
            ((2, 4), nothing, attacked): RIGHT,
            ((3, 4), nothing, attacked): RIGHT,
            ((4, 4), nothing, attacked): UP,
            ((4, 3), nothing, attacked): UP,
            ((4, 2), nothing, attacked): UP,
            ((4, 1), gem, attacked): DOWN,
            ((4, 2), gem, attacked): DOWN,
예제 #6
0
 def setUp(self):
     # Set initial_seed to 0 to testing.
     self.environment = ResourceGatheringEpisodic(seed=0)
예제 #7
0
class TestResourceGatheringLimit(TestResourceGathering):
    def setUp(self):
        # Set initial_seed to 0 to testing.
        self.environment = ResourceGatheringEpisodic(seed=0)

    def test_init(self):
        """
        Testing if constructor works
        :return:
        """

        # This environment must have another attributes
        self.assertTrue(hasattr(self.environment, 'gold_positions'))
        self.assertTrue(hasattr(self.environment, 'gem_positions'))
        self.assertTrue(hasattr(self.environment, 'enemies_positions'))
        self.assertTrue(hasattr(self.environment, 'home_position'))

        # Observation space
        self.assertEqual(
            gym.spaces.Tuple(
                (gym.spaces.Tuple(
                    (gym.spaces.Discrete(5), gym.spaces.Discrete(5))),
                 gym.spaces.Tuple(
                     (gym.spaces.Discrete(2), gym.spaces.Discrete(2))),
                 spaces.Boolean())), self.environment.observation_space)

        # By default initial position is (2, 4)
        self.assertEqual(((2, 4), (0, 0), False),
                         self.environment.initial_state)

        # Default reward is (0, 0, 0)
        self.assertEqual((0, 0, 0), self.environment.default_reward)

        self.assertTrue(hasattr(self.environment, 'steps'))
        self.assertTrue(hasattr(self.environment, 'steps_limit'))

    def test_reset(self):

        self.environment.steps = random.randint(0, 1000)

        # Super method call
        super().test_reset()

        self.assertEqual(0, self.environment.steps)

    def test_step(self):
        """
        Testing step method
        :return:
        """

        # Simple valid step
        # Reward:
        #   [enemy_attack, gold, gems]
        # Complex position:
        #   (position, resources_available)
        # Remember that initial position is (2, 4)

        # Disable enemy attack
        self.environment.p_attack = 0

        next_state, reward, is_final = None, None, None

        # Do 2 steps to RIGHT
        for _ in range(2):
            _ = self.environment.step(action=self.environment.actions['RIGHT'])

        # Do 3 steps to UP (Get a gem)
        for _ in range(3):
            next_state, reward, is_final, _ = self.environment.step(
                action=self.environment.actions['UP'])

        self.assertEqual(((4, 1), (0, 1), False), next_state)
        self.assertEqual([0, 0, 0], reward)
        self.assertFalse(is_final)

        _ = self.environment.step(action=self.environment.actions['UP'])

        # Do 2 steps to LEFT
        for _ in range(2):
            next_state, reward, is_final, _ = self.environment.step(
                action=self.environment.actions['LEFT'])

        self.assertEqual(((2, 0), (1, 1), False), next_state)
        self.assertEqual([0, 0, 0], reward)
        self.assertFalse(is_final)

        # Go to home
        # Do 4 steps to DOWN
        for _ in range(4):
            next_state, reward, is_final, _ = self.environment.step(
                action=self.environment.actions['DOWN'])

        self.assertEqual(((2, 4), (1, 1), False), next_state)
        self.assertEqual([0, 1, 1], reward)
        self.assertTrue(is_final)

        ################################################################################################################
        # Trying get gold through enemy
        ################################################################################################################

        # Reset environment
        self.environment.reset()

        # Do 4 steps to UP
        for _ in range(4):
            next_state, reward, is_final, _ = self.environment.step(
                action=self.environment.actions['UP'])

        self.assertEqual(((2, 0), (1, 0), False), next_state)
        self.assertEqual([0, 0, 0], reward)
        self.assertFalse(is_final)

        # Force to enemy attack
        self.environment.p_attack = 1

        # Go to enemy position
        next_state, reward, is_final, _ = self.environment.step(
            action=self.environment.actions['DOWN'])

        # Reset at home
        self.assertEqual(((2, 4), (0, 0), True), next_state)
        self.assertEqual([-1, 0, 0], reward)
        self.assertTrue(is_final)

        # Set a state with gold and gem
        self.environment.current_state = ((3, 4), (1, 1), False)

        # Waste time
        steps_used = self.environment.steps

        # Do steps until time_limit
        for _ in range(self.environment.steps_limit - steps_used):
            next_state, reward, is_final, _ = self.environment.step(
                action=self.environment.actions.get('RIGHT'))

        self.assertEqual(((4, 4), (1, 1), False), next_state)
        self.assertEqual([0, 0, 0], reward)
        self.assertTrue(is_final)

    def test_transition_reward(self):

        # In this environment doesn't mind initial state to get the reward
        for state in self.environment.states():

            self.environment.current_state = state

            # Doesn't mind action too.
            for a in self.environment.action_space:

                for reachable_state in self.environment.reachable_states(
                        state=state, action=a):

                    # Decompose next state
                    next_position, next_objects, next_attacked = reachable_state

                    reward = self.environment.transition_reward(
                        state=state, action=a, next_state=reachable_state)

                    # Reach any final state
                    if reachable_state in [((2, 4), (1, 0), False),
                                           ((2, 4), (0, 1), False),
                                           ((2, 4), (1, 1), False)]:

                        expected_reward = list(next_objects)
                        expected_reward.insert(0, 0)

                        self.assertEqual(expected_reward, reward)

                    # It'state attacked
                    elif next_attacked:
                        self.assertEqual([-1, 0, 0], reward)

                    # Default reward
                    else:
                        self.assertEqual([0, 0, 0], reward)

    def test__next_state(self):
        """
        Testing _next_state method
        :return:
        """

        ################################################################################################################
        # Begin at position (0, 0) (TOP-LEFT corner)
        ################################################################################################################
        self.environment.reset()

        state = ((0, 0), (0, 0), False)
        self.environment.current_state = state

        # Cannot go to UP (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(state, next_state)

        # Go to RIGHT (increment x axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(((1, 0), (0, 0), False), next_state)

        # Go to DOWN (increment y axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['DOWN'])
        self.assertEqual(((0, 1), (0, 0), False), next_state)

        # Cannot go to LEFT (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['LEFT'])
        self.assertEqual(state, next_state)

        ################################################################################################################
        # Set to (4, 0) (TOP-RIGHT corner)
        ################################################################################################################
        self.environment.reset()

        state = ((4, 0), (0, 0), False)
        self.environment.current_state = state

        # Cannot go to UP (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(state, next_state)

        # Cannot go to RIGHT (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(state, next_state)

        # Go to DOWN (increment y axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['DOWN'])
        self.assertEqual(((4, 1), (0, 1), False), next_state)

        # Go to LEFT (decrement x axis) (enemy)
        next_state = self.environment.next_state(
            action=self.environment.actions['LEFT'])
        self.assertEqual(((2, 4), (0, 0), True), next_state)

        ################################################################################################################
        # Set to (4, 4) (DOWN-RIGHT corner)
        ################################################################################################################
        self.environment.reset()

        state = ((4, 4), (0, 0), False)
        self.environment.current_state = state

        # Go to UP (decrement y axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(((4, 3), (0, 0), False), next_state)

        # Cannot go to RIGHT (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(state, next_state)

        # Cannot go to DOWN (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['DOWN'])
        self.assertEqual(state, next_state)

        # Go to LEFT (decrement x axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['LEFT'])
        self.assertEqual(((3, 4), (0, 0), False), next_state)

        ################################################################################################################
        # Set to (0, 4) (DOWN-LEFT corner)
        ################################################################################################################
        self.environment.reset()

        state = ((0, 4), (0, 0), False)
        self.environment.current_state = state

        # Go to UP (decrement y axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(((0, 3), (0, 0), False), next_state)

        # Go to RIGHT (increment x axis)
        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(((1, 4), (0, 0), False), next_state)

        # Cannot go to DOWN (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['DOWN'])
        self.assertEqual(state, next_state)

        # Cannot go to LEFT (Keep in same position)
        next_state = self.environment.next_state(
            action=self.environment.actions['LEFT'])
        self.assertEqual(state, next_state)

        ################################################################################################################
        # Set to (1, 0) and go to get gold
        ################################################################################################################
        self.environment.reset()

        state = ((1, 0), (0, 0), False)
        self.environment.current_state = state

        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(((2, 0), (1, 0), False), next_state)

        ################################################################################################################
        # Set to (1, 0) and go to get gold, but there isn't
        ################################################################################################################
        self.environment.reset()

        state = ((1, 0), (0, 0), False)
        self.environment.current_state = state

        next_state = self.environment.next_state(
            action=self.environment.actions['RIGHT'])
        self.assertEqual(((2, 0), (1, 0), False), next_state)

        ################################################################################################################
        # Set to (4, 2) and go to get gem
        ################################################################################################################
        self.environment.reset()

        state = ((4, 2), (0, 0), False)
        self.environment.current_state = state

        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(((4, 1), (0, 1), False), next_state)

        ################################################################################################################
        # Set to (4, 2) and go to get gem, but there isn't
        ################################################################################################################
        self.environment.reset()

        state = ((4, 2), (0, 0), False)
        self.environment.current_state = state

        next_state = self.environment.next_state(
            action=self.environment.actions['UP'])
        self.assertEqual(((4, 1), (0, 1), False), next_state)

    def test_reachable_states(self):

        limit_x = (self.environment.observation_space[0][0].n - 1)
        limit_y = (self.environment.observation_space[0][1].n - 1)

        # For any state the following happens
        for state in self.environment.states():

            # Decompose state
            position, objects, attacked = state

            # Decompose elements
            (x, y) = position
            (gold, gems) = objects

            # Go to UP
            reachable_states = self.environment.reachable_states(
                state=state, action=self.environment.actions['UP'])
            reachable_states_len = len(reachable_states)

            expected_reachable_states = set()
            expected_reachable_states_len = 1

            if position == (2, 2):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((2, 1), objects, False))

            elif position == (3, 1) or position == (3, 0):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((3, 0), objects, False))

            elif position == (4, 2):
                expected_reachable_states.add(((x, y - 1), (gold, 1), False))

            elif position == (2, 1):
                expected_reachable_states.add(((x, y - 1), (1, gems), False))

            elif y <= 0:
                expected_reachable_states.add(((x, y), objects, False))

            elif y > 0:
                expected_reachable_states.add(((x, y - 1), objects, False))

            self.assertEqual(expected_reachable_states_len,
                             reachable_states_len)
            self.assertTrue(
                all(element in expected_reachable_states
                    for element in reachable_states)
                and all(element in reachable_states
                        for element in expected_reachable_states))

            # Go to RIGHT
            reachable_states = self.environment.reachable_states(
                state=state, action=self.environment.actions['RIGHT'])
            reachable_states_len = len(reachable_states)

            expected_reachable_states = set()
            expected_reachable_states_len = 1

            if position == (2, 0):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((3, 0), objects, False))

            elif position == (1, 1):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((2, 1), objects, False))

            elif position == (3, 1):
                expected_reachable_states.add(((x + 1, y), (gold, 1), False))

            elif position == (1, 0):
                expected_reachable_states.add(((x + 1, y), (1, gems), False))

            elif x >= limit_x:
                expected_reachable_states.add(((x, y), objects, False))

            elif x < limit_x:
                expected_reachable_states.add(((x + 1, y), objects, False))

            self.assertEqual(expected_reachable_states_len,
                             reachable_states_len)
            self.assertTrue(
                all(element in expected_reachable_states
                    for element in reachable_states)
                and all(element in reachable_states
                        for element in expected_reachable_states))

            # Go to DOWN
            reachable_states = self.environment.reachable_states(
                state=state, action=self.environment.actions['DOWN'])
            reachable_states_len = len(reachable_states)

            expected_reachable_states = set()
            expected_reachable_states_len = 1

            if position == (2, 0):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((2, 1), objects, False))

            elif position == (4, 0):
                expected_reachable_states.add(((x, y + 1), (gold, 1), False))

            elif y >= limit_y:
                expected_reachable_states.add(((x, y), objects, False))

            elif y < limit_y:
                expected_reachable_states.add(((x, y + 1), objects, False))

            self.assertEqual(expected_reachable_states_len,
                             reachable_states_len)
            self.assertTrue(
                all(element in expected_reachable_states
                    for element in reachable_states)
                and all(element in reachable_states
                        for element in expected_reachable_states))

            # Go to LEFT
            reachable_states = self.environment.reachable_states(
                state=state, action=self.environment.actions['LEFT'])
            reachable_states_len = len(reachable_states)

            expected_reachable_states = set()
            expected_reachable_states_len = 1

            if position == (4, 0):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((3, 0), objects, False))

            elif position == (3, 1):
                expected_reachable_states_len = 2
                expected_reachable_states.add(((2, 4), (0, 0), True))
                expected_reachable_states.add(((2, 1), objects, False))

            elif position == (3, 0):
                expected_reachable_states.add(((x - 1, y), (1, gems), False))

            elif x <= 0:
                expected_reachable_states.add(((x, y), objects, False))

            elif x > 0:
                expected_reachable_states.add(((x - 1, y), objects, False))

            self.assertEqual(expected_reachable_states_len,
                             reachable_states_len)
            self.assertTrue(
                all(element in expected_reachable_states
                    for element in reachable_states)
                and all(element in reachable_states
                        for element in expected_reachable_states))
예제 #8
0
파일: draft_bn.py 프로젝트: Pozas91/tiadas
def main():
    # Define gamma
    gamma = .9
    # Each 30 sweeps make it a dump
    sweeps_dumps = 30

    for decimal_precision in [0.01, 0.005]:
        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        # Define same tolerance that decimal precision, but is possible to change
        tolerance = decimal_precision

        # Create environment
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma)

        # Time train
        t0 = time.time()

        print('Training with tolerance: {}...'.format(tolerance))

        agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()}

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_bn.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_bn.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_bn.gamma,
            'initial_q_value': agent_bn.initial_q_value,
            'initial_seed': agent_bn.initial_seed,
            'interval_to_get_data': agent_bn.interval_to_get_data,
            'total_sweeps': agent_bn.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)

        # Dump binary information
        agent_bn.save()