Ejemplo n.º 1
0
    def evaluate_bounds_are_of_type(self, bounds, type_id):
        sum = 0
        position = Vector(0, 0)

        # Bottom horizontal bound
        position.y = bounds.y_min
        for x in range(bounds.x_min, bounds.x_max):
            position.x = x
            sum += self.evaluate_tile_type(position, type_id)

        # Top horizontal bound
        position.y = bounds.y_max - 1
        for x in range(bounds.x_min, bounds.x_max):
            position.x = x
            sum += self.evaluate_tile_type(position, type_id)

        # Left vertical bound
        position.x = bounds.x_min
        for y in range(bounds.y_min, bounds.y_max - 1):
            position.y = y
            sum += self.evaluate_tile_type(position, type_id)

        # Right vertical bound
        position.x = bounds.x_max - 1
        for y in range(bounds.y_min + 1, bounds.y_max - 1):
            position.y = y
            sum += self.evaluate_tile_type(position, type_id)

        return sum
Ejemplo n.º 2
0
    def __init__(self,
                 initial_state: tuple = (0, 0),
                 default_reward: tuple = (-1, -1),
                 seed: int = 0,
                 n_transition: float = 0.95,
                 diagonals: int = 9,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (objective 1, objective 2)
        :param seed: Seed used for np.random.RandomState method.
        :param n_transition: if is 1, always do the action indicated. (Original is about 0.6)
        :param diagonals: Number of diagonals to be used to build this environment (allows experimenting with an
                        identical environment, but considering only the first k diagonals) (By default 9 - all).
        """

        # the original full-size environment.
        mesh_shape = (min(max(diagonals + 1, 1),
                          10), min(max(diagonals + 1, 1), 10))

        # Dictionary with final states as keys, and treasure amounts as values.
        diagonals_states = {
            x
            for x in zip(range(0, diagonals + 1, 1), range(diagonals, -1, -1))
        }

        # Generate finals states with its reward
        finals = {
            state: (Vector(state) + 1) * 10
            for state in diagonals_states
        }

        # Pareto optimal
        PyramidMDP.pareto_optimal = {
            Vector(state) + 1
            for state in diagonals_states
        }

        # Filter obstacles states
        obstacles = frozenset((x, y) for x, y in finals.keys()
                              for y in range(y, diagonals + 1)
                              if (x, y) not in finals)

        # Default reward (objective_1, objective_2)
        default_reward = Vector(default_reward)

        # Transaction
        assert 0 <= n_transition <= 1.
        self.n_transition = n_transition

        super().__init__(mesh_shape=mesh_shape,
                         initial_state=initial_state,
                         default_reward=default_reward,
                         finals=finals,
                         obstacles=obstacles,
                         seed=seed,
                         action_space=action_space)
Ejemplo n.º 3
0
    def __init__(self, initial_state: tuple = ((0, 0), False), default_reward: tuple = (0, 0), seed: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (objective 1, objective 2)
        :param seed: Seed used for np.random.RandomState method.
        :param action_space:
        """

        # List of all treasures and its reward.
        finals = {
            (8, 0): Vector([1, 9]),
            (8, 2): Vector([3, 9]),
            (8, 4): Vector([5, 9]),
            (8, 6): Vector([7, 9]),
            (8, 8): Vector([9, 9]),

            (0, 8): Vector([9, 1]),
            (2, 8): Vector([9, 3]),
            (4, 8): Vector([9, 5]),
            (6, 8): Vector([9, 7]),
        }

        # Define mesh shape
        mesh_shape = (9, 9)

        # Set obstacles
        obstacles = frozenset({(2, 2), (2, 3), (3, 2)})

        # Default reward plus time (objective 1, objective 2, time)
        default_reward += (-1,)
        default_reward = Vector(default_reward)

        # Build the observation space (position (x, y), bonus)
        observation_space = gym.spaces.Tuple(
            (
                gym.spaces.Tuple(
                    (gym.spaces.Discrete(mesh_shape[0]), gym.spaces.Discrete(mesh_shape[1]))
                ),
                spaces.Boolean()
            )
        )

        super().__init__(mesh_shape=mesh_shape, default_reward=default_reward, initial_state=initial_state,
                         finals=finals, obstacles=obstacles, observation_space=observation_space, seed=seed,
                         action_space=action_space)

        # Pits marks which returns the agent to the start location.
        self.pits = {
            (7, 1), (7, 3), (7, 5), (1, 7), (3, 7), (5, 7)
        }

        # X2 bonus
        self.bonus = [
            (3, 3)
        ]
Ejemplo n.º 4
0
    def evaluate_tiles_in_bounds_are_of_type(self, bounds, type_id):
        sum = 0
        position = Vector(0, 0)

        for x in range(bounds.x_min, bounds.x_max):
            position.x = x
            for y in range(bounds.y_min, bounds.y_max):
                position.y = y
                sum += self.evaluate_tile_type(position, type_id)

        return sum
Ejemplo n.º 5
0
def draft_b_lp(columns: int):
    # Create environment
    environment = DeepSeaTreasureRightDownStochastic(columns=columns)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.000001)

    # Create instance of AgentB
    agent = AgentB(environment=environment, limited_precision=True)

    agent.simulate()
Ejemplo n.º 6
0
    def is_room(self, start_position, area_bounds):
        found_floor = False
        current_position = Vector(start_position.x, start_position.y + 1)

        # Check vertical for the room bounds
        while (current_position.y <= area_bounds.y_max and not found_floor):
            if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]):
                found_floor = True
                current_position.y -= 1
            else:
                current_position.y += 1
        room_bounds_y = current_position.y
        next_position = room_bounds_y + 1

        if not found_floor or room_bounds_y == start_position.y:
            return False, next_position, None

        # Check horizontal for the room bounds
        found_floor = False
        current_position.x += 1

        while (current_position.x <= area_bounds.x_max and not found_floor):
            if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]):
                found_floor = True
                current_position.x -= 1
            else:
                current_position.x += 1

        room_bounds_x = current_position.x
        if not found_floor or room_bounds_x == start_position.x:
            return False, next_position, None

        # Check vertical with the bounds found for y
        for y in range(start_position.y, room_bounds_y + 1):
            current_position.y = y
            if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]):
                return False, next_position, None

        # Check horizontal with the bounds found for y
        current_position.y = start_position.y
        for x in range(start_position.x, room_bounds_x + 1):
            current_position.x = x
            if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]):
                return False, next_position, None

        if abs(start_position.x - room_bounds_x) == 1 or \
           abs(start_position.y - room_bounds_y) == 1:
            return False, next_position, None

        room_bounds = Bounds(start_position.x, start_position.y, room_bounds_x,
                             room_bounds_y)
        return True, next_position, room_bounds
Ejemplo n.º 7
0
def main():
    # Get trained agent
    print('Training agent...')
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0), False)

    # Initial vectors
    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Show information
    print('Vectors obtained after m3_max algorithm: ')
    print(vectors, end='\n\n')

    # Define a tolerance
    decimal_precision = 0.0000001

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=decimal_precision)

    print('Evaluating policies gotten...')

    # For each vector
    for vector in vectors:
        # Specify objective vector
        objective_vector = vector.copy()

        print('Recovering policy for objective vector: {}...'.format(
            objective_vector))

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        print('Evaluating policy obtaining...', end='\n\n')

        # Train until converge with `decimal_precision` tolerance.
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=decimal_precision)

        # Save policy and it evaluation.
        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Ejemplo n.º 8
0
def get_trained_agent() -> AgentBN:
    # Environment
    environment = ResourceGatheringEpisodic()

    # Agent
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    agent.train(graph_type=GraphType.SWEEP, limit=10)

    return agent
Ejemplo n.º 9
0
def load():
    # agent_10 = AgentBN.load(
    #     'bn/models/rge_1583857516_0.01.bin'
    # )

    # sorted_v_s_0_10 = sorted(agent_10.v[((2, 4), (0, 0))], key=lambda k: k[0])

    # agent_15 = AgentBN.load(
    #     'bn/models/rge_1583857532_0.01.bin'
    # )

    # sorted_v_s_0_15 = sorted(agent_15.v[((2, 4), (0, 0))], key=lambda k: k[0])

    agent_30: AgentBN = AgentBN.load('bn/models/rge_1583857678_0.01.bin')

    v_s_0_30 = agent_30.v[((2, 4), (0, 0))]
    v_s_0_30_nd = Vector.m3_max(set(v_s_0_30))

    # agent_10625 = AgentBN.load(
    #     filename='bn/models/rge_1583924116_0.01.bin'
    # )

    # sorted_v_s_0_10625 = sorted(agent_10625.v[((2, 4), (0, 0))], key=lambda k: k[0])

    pass
Ejemplo n.º 10
0
    def __init__(self,
                 initial_state: tuple = ((0, 0), False),
                 default_reward: tuple = (0, 0),
                 seed: int = 0):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (objective 1, objective 2)
        :param seed: Seed used for np.random.RandomState method.
        """

        # Create a bag action space
        action_space = Bag([])
        action_space.seed(seed)

        super().__init__(seed=seed,
                         initial_state=initial_state,
                         default_reward=default_reward,
                         action_space=action_space)

        # Set obstacles
        self.obstacles = frozenset({(2, 2)})

        # PITS are finals states in this variant
        self.finals.update({state: Vector([-50, -50]) for state in self.pits})

        self.pits = list()
Ejemplo n.º 11
0
    def __init__(self, initial_state: tuple = (5, 2), default_reward: tuple = (0, -1), seed: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (mission_success, radiation)
        :param seed: Seed used for np.random.RandomState method.
        """

        # List of all treasures and its reward.
        finals = {}
        finals.update({(0, i): 20 for i in range(5)})
        finals.update({(9, i): 10 for i in range(3)})
        finals.update({(12, i): 30 for i in range(5)})

        obstacles = frozenset()
        mesh_shape = (13, 5)
        default_reward = Vector(default_reward)

        super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, default_reward=default_reward,
                         finals=finals, obstacles=obstacles, action_space=action_space)

        self.asteroids = {
            (5, 0), (4, 1), (6, 1), (3, 2), (7, 2), (4, 3), (6, 3), (5, 4)
        }

        # Define radiations states (If the agent is on any of these, then receive -100 penalization)
        self.radiations = set()
        self.radiations = self.radiations.union({(1, i) for i in range(5)})
        self.radiations = self.radiations.union({(10, i) for i in range(5)})
        self.radiations = self.radiations.union({(11, i) for i in range(5)})
Ejemplo n.º 12
0
    def find_rooms(self, bounds):
        room_areas = list()

        position = Vector(0, 0)
        # Evaluate the area for rooms
        next_position = 0
        for x in range(bounds.x_min, bounds.x_max):
            position.x = x
            for y in range(bounds.y_min, bounds.y_max):
                position.y = y
                if self.is_tile_of_type(position, TILE_TYPES["WALL"]):
                    # Found wall tile
                    result, next_position, room_bounds = self.is_room(
                        position, bounds)
                    if result:
                        room_areas.append(room_bounds)
        self.rooms = room_areas
        return room_areas
Ejemplo n.º 13
0
def train_from_zero():
    # Define variables
    limit = int(3e6)
    epsilon = 0.4
    max_steps = 1000
    alpha = 0.1
    gamma = 1
    graph_type = GraphType.EPISODES
    columns_list = range(1, 6)
    decimals = [0.01, 0.05]

    for decimal_precision in decimals:

        # Set vector decimal precision
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        for columns in columns_list:
            # Environment
            environment = DeepSeaTreasureRightDownStochastic(columns=columns)

            # Create agent
            agent = AgentMPQ(environment=environment,
                             hv_reference=environment.hv_reference,
                             epsilon=epsilon,
                             alpha=alpha,
                             gamma=gamma,
                             max_steps=max_steps)

            # Time train
            t0 = time.time()

            # Show numbers of columns
            print('# of columns: {}'.format(columns))

            # Agent training
            agent.train(graph_type=graph_type, limit=limit)

            # Calc total time
            total_time = time.time() - t0

            prepare_for_dumps(agent, columns, decimal_precision, graph_type,
                              limit, total_time)
Ejemplo n.º 14
0
    def test_transition_reward(self):

        # In this environment doesn't mind initial state to get the reward
        state = self.environment.observation_space.sample()

        # Doesn't mind action too.
        action = self.environment.action_space.sample()

        # Asteroids states
        for asteroid_state in self.environment.asteroids:
            self.assertEqual(
                Vector((-100, -1)),
                self.environment.transition_reward(state=state,
                                                   action=action,
                                                   next_state=asteroid_state))

        # Radiations states
        for radiation_state in self.environment.radiations:
            self.assertEqual(
                Vector((0, -11)),
                self.environment.transition_reward(state=state,
                                                   action=action,
                                                   next_state=radiation_state))

        # Finals states
        for final_state, final_reward in self.environment.finals.items():
            self.assertEqual(
                Vector((final_reward, -1)),
                self.environment.transition_reward(state=state,
                                                   action=action,
                                                   next_state=final_state))

        simple_states = self.environment.states() - set(
            self.environment.finals.keys()).union(
                self.environment.radiations).union(self.environment.asteroids)

        for simple_state in simple_states:
            self.assertEqual(
                Vector((0, -1)),
                self.environment.transition_reward(state=state,
                                                   action=action,
                                                   next_state=simple_state))
Ejemplo n.º 15
0
    def setUp(self):
        # An observation space
        observation_space = gym.spaces.Discrete(7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Set initial_seed to 0 to testing.
        self.environment = Environment(observation_space=observation_space,
                                       default_reward=default_reward,
                                       seed=0)
Ejemplo n.º 16
0
    def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0,), seed: int = 0, columns: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (treasure_value, )
        :param seed: Seed used for np.random.RandomState method.
        """

        original_mesh_shape = (10, 11)

        # Reduce the number of diagonals
        if columns < 1 or columns > original_mesh_shape[0]:
            columns = original_mesh_shape[0]

        # List of all treasures and its reward.
        finals = {
            (0, 1): 5,
            (1, 2): 80,
            (2, 3): 120,
            (3, 4): 140,
            (4, 4): 145,
            (5, 4): 150,
            (6, 7): 163,
            (7, 7): 166,
            (8, 9): 173,
            (9, 10): 175,
        }

        # Filter finals states
        finals = dict(filter(lambda x: x[0][0] < columns, finals.items()))

        obstacles = frozenset()
        obstacles = obstacles.union([(0, y) for y in range(2, 11)])
        obstacles = obstacles.union([(1, y) for y in range(3, 11)])
        obstacles = obstacles.union([(2, y) for y in range(4, 11)])
        obstacles = obstacles.union([(3, y) for y in range(5, 11)])
        obstacles = obstacles.union([(4, y) for y in range(5, 11)])
        obstacles = obstacles.union([(5, y) for y in range(5, 11)])
        obstacles = obstacles.union([(6, y) for y in range(8, 11)])
        obstacles = obstacles.union([(7, y) for y in range(8, 11)])
        obstacles = obstacles.union([(8, y) for y in range(10, 11)])

        # Filter obstacles states
        obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles))

        # Resize mesh_shape
        mesh_shape = (columns, 11)

        # Default reward plus time (time_inverted, treasure_value, water_pressure)
        default_reward = (-1,) + default_reward + (0,)
        default_reward = Vector(default_reward)

        super().__init__(mesh_shape=mesh_shape, seed=seed, default_reward=default_reward, initial_state=initial_state,
                         finals=finals, obstacles=obstacles, action_space=action_space)
Ejemplo n.º 17
0
    def test_transition_reward(self):

        # In this environment doesn't mind initial state to get the reward
        state = self.environment.observation_space.sample()

        # Doesn't mind action too.
        action = self.environment.action_space.sample()

        # An intermediate state
        self.assertEqual(
            self.environment.transition_reward(state=state,
                                               action=action,
                                               next_state=(1, 1)),
            Vector((-1, 0, -2)))

        # A final state
        self.assertEqual(
            self.environment.transition_reward(state=state,
                                               action=action,
                                               next_state=(1, 2)),
            Vector([-1, 80, -3]))
Ejemplo n.º 18
0
def main():
    # Get trained agent
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0))

    # agent: AgentBN = AgentBN.load(
    #     filename='bn/models/rg_1584437328_0.005.bin'
    # )

    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=0.0000001)

    for vector in vectors:
        # Recreate the index objective vector.
        # objective_vector = IndexVector(
        #     index=vector, vector=trained_agent.v[initial_state][vector]
        # )

        objective_vector = vector.copy()

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.0000001)

        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Ejemplo n.º 19
0
def train_agent() -> Agent:
    # Environment
    # environment = DeepSeaTreasureRightDownStochastic(columns=3)
    # environment = DeepSeaTreasureRightDown(columns=3)
    # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95)
    # environment = DeepSeaTreasure()
    environment = ResourceGathering()

    # Agent
    # agent = AgentMPQ(
    #     environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000
    # )
    # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01)
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001)
    agent.train(graph_type=GraphType.SWEEP, limit=13)

    return agent
Ejemplo n.º 20
0
    def setUp(self):
        # Mesh shape
        mesh_shape = (7, 7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Obstacles
        obstacles = frozenset({
            (0, 0), (1, 1)
        })

        # Set initial_seed to 0 to testing.
        self.environment = EnvMesh(mesh_shape=mesh_shape, default_reward=default_reward, seed=0, obstacles=obstacles)
Ejemplo n.º 21
0
    def __init__(self,
                 initial_state: tuple = ((2, 4), (0, 0)),
                 default_reward: tuple = (0, 0, 0),
                 seed: int = 0,
                 p_attack: float = 0.1,
                 mesh_shape: tuple = (5, 5),
                 gold_positions: frozenset = frozenset({(2, 0)}),
                 gem_positions: frozenset = frozenset({(4, 1)}),
                 observation_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (enemy_attack, gold, gems)
        :param seed: Seed used for np.random.RandomState method.
        :param p_attack: Probability that a enemy attacks when agent stay in an enemy position.
        """

        default_reward = Vector(default_reward)

        if observation_space is None:
            # Build the observation space (position(x, y), quantity(gold, gems))
            observation_space = gym.spaces.Tuple(
                (gym.spaces.Tuple((gym.spaces.Discrete(mesh_shape[0]),
                                   gym.spaces.Discrete(mesh_shape[1]))),
                 gym.spaces.Tuple(
                     (gym.spaces.Discrete(2), gym.spaces.Discrete(2)))))

        # Define final states
        finals = frozenset()

        # Super constructor call.
        super().__init__(mesh_shape=mesh_shape,
                         seed=seed,
                         initial_state=initial_state,
                         default_reward=default_reward,
                         observation_space=observation_space,
                         finals=finals)

        # Positions where there are gold.
        self.gold_positions = gold_positions

        # Positions where there is a gem.
        self.gem_positions = gem_positions

        # States where there are enemies_positions
        self.enemies_positions = {(3, 0), (2, 1)}
        self.p_attack = p_attack
        self.home_position = (2, 4)

        self.checkpoints_states = self._checkpoints_states()
Ejemplo n.º 22
0
def train_from_file():
    # Models Path
    models_path = 'mpq/models/dstrds_1579869395_1.0_4.bin'

    agent: AgentMPQ = u_models.binary_load(
        path=dumps_path.joinpath(models_path))

    # Data Path
    data_path = dumps_path.joinpath(
        'mpq/train_data/dstrds_1579869395_1.0_4.yml')
    data_file = data_path.open(mode='r', encoding='UTF-8')

    # Load yaml from file
    data = yaml.load(data_file, Loader=yaml.FullLoader)

    # Extract relevant data for training
    before_training_execution = float(data['time'])
    decimal_precision = float(data['agent']['decimal_precision'])
    graph_type = GraphType.from_string(data['training']['graph_type'])
    limit = int(data['training']['limit'])
    columns = int(data['environment']['columns'])

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=decimal_precision)

    # Time train
    t0 = time.time()

    # Agent training
    agent.train(graph_type=graph_type, limit=limit)

    # Calc total time
    total_time = (time.time() - t0) + before_training_execution

    prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit,
                      total_time)
Ejemplo n.º 23
0
class SpaceExploration(EnvMesh):
    # Possible actions
    _actions = {'UP': 0, 'UP RIGHT': 1, 'RIGHT': 2, 'DOWN RIGHT': 3, 'DOWN': 4, 'DOWN LEFT': 5, 'LEFT': 6, 'UP LEFT': 7}

    # Experiments common hypervolume reference
    hv_reference = Vector([-100, -150])

    def __init__(self, initial_state: tuple = (5, 2), default_reward: tuple = (0, -1), seed: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (mission_success, radiation)
        :param seed: Seed used for np.random.RandomState method.
        """

        # List of all treasures and its reward.
        finals = {}
        finals.update({(0, i): 20 for i in range(5)})
        finals.update({(9, i): 10 for i in range(3)})
        finals.update({(12, i): 30 for i in range(5)})

        obstacles = frozenset()
        mesh_shape = (13, 5)
        default_reward = Vector(default_reward)

        super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, default_reward=default_reward,
                         finals=finals, obstacles=obstacles, action_space=action_space)

        self.asteroids = {
            (5, 0), (4, 1), (6, 1), (3, 2), (7, 2), (4, 3), (6, 3), (5, 4)
        }

        # Define radiations states (If the agent is on any of these, then receive -100 penalization)
        self.radiations = set()
        self.radiations = self.radiations.union({(1, i) for i in range(5)})
        self.radiations = self.radiations.union({(10, i) for i in range(5)})
        self.radiations = self.radiations.union({(11, i) for i in range(5)})

    def step(self, action: int) -> (tuple, Vector, bool, dict):
        """
        Given an action, do a step
        :param action:
        :return: (position, (mission_success, radiation), final, extra)
        """

        # Initialize reward as vector
        reward = self.default_reward.copy()

        # Update previous state
        self.current_state = self.next_state(action=action)

        # If the ship crash with asteroid, the ship is destroyed. else mission success.
        reward[0] = -100 if self.current_state in self.asteroids else self.finals.get(
            self.current_state, self.default_reward[0]
        )

        # If agent is in a radiation position, the penalty is -11, else is default radiation
        reward[1] = -11 if self.current_state in self.radiations else self.default_reward[1]

        # Check if is_final
        final = self.is_final(self.current_state)

        # Set extra
        info = {}

        return self.current_state, reward, final, info

    def next_position(self, action: int, position: tuple) -> (tuple, bool):
        """
        Given an action and a position, return the next position reached.
        :param action:
        :param position:
        :return:
        """

        # Get my position
        x, y = position

        # Get observations spaces
        observation_space_x, observation_space_y = self.observation_space.spaces

        # Do movement in cyclic mesh
        if action == self.actions['UP']:
            y = ue.move_up(y=y, limit=observation_space_y.n)
        elif action == self.actions['RIGHT']:
            x = ue.move_right(x=x, limit=observation_space_x.n)
        elif action == self.actions['DOWN']:
            y = ue.move_down(y=y, limit=observation_space_y.n)
        elif action == self.actions['LEFT']:
            x = ue.move_left(x=x, limit=observation_space_x.n)
        elif action == self.actions['UP RIGHT']:
            y = ue.move_up(y=y, limit=observation_space_y.n)
            x = ue.move_right(x=x, limit=observation_space_x.n)
        elif action == self.actions['DOWN RIGHT']:
            y = ue.move_down(y=y, limit=observation_space_y.n)
            x = ue.move_right(x=x, limit=observation_space_x.n)
        elif action == self.actions['DOWN LEFT']:
            y = ue.move_down(y=y, limit=observation_space_y.n)
            x = ue.move_left(x=x, limit=observation_space_x.n)
        elif action == self.actions['UP LEFT']:
            y = ue.move_up(y=y, limit=observation_space_y.n)
            x = ue.move_left(x=x, limit=observation_space_x.n)

        # Set next position
        next_position = x, y

        return next_position, True

    def next_state(self, action: int, state: tuple = None) -> tuple:
        """
        Calc next position with current position and action given, in this environment is 8-neighbors.
        :param state: If a position is given, do action from that position.
        :param action: from action_space
        :return:
        """

        # Get my position
        position = state if state else self.current_state

        next_position, is_valid = self.next_position(action=action, position=position)

        if not self.observation_space.contains(next_position) or not is_valid:
            next_position = position

        # Return (x, y) position
        return next_position

    def is_final(self, state: tuple = None) -> bool:
        """
        Is final if agent crash with asteroid or is on final position.
        :param state:
        :return:
        """

        # Check if agent crash with asteroid
        crash = state in self.asteroids

        # Check if agent is in final position
        final = state in self.finals.keys()

        return crash or final

    def transition_reward(self, state: tuple, action: int, next_state: tuple) -> Vector:
        """
        Return reward for reach `next_state` from `state` using `action`.

        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """
        # Initialize reward as vector
        reward = self.default_reward.copy()

        # If the ship crash with asteroid, the ship is destroyed. else mission success.
        reward[0] = -100 if next_state in self.asteroids else self.finals.get(
            next_state, reward[0]
        )

        # If agent is in a radiation position, the penalty is -11, else is default radiation
        reward[1] = -11 if next_state in self.radiations else reward[1]

        return reward
Ejemplo n.º 24
0
    config = configparser.ConfigParser()
    config.read(config_file)

    # TODO: add errors handling
    # TODO: move all to the new class

    start_point = Point(
        float(config['START POINT']['x']),
        float(config['START POINT']['y']),
        float(config['START POINT']['z']),
    )

    dimensions = Vector(
        float(config['SCENE DIMENSIONS']['dx']),
        float(config['SCENE DIMENSIONS']['dy']),
        float(config['SCENE DIMENSIONS']['dz']),
    )

    stl_file = config['OTHERS']['STL file path']
    condition = float(config['OTHERS']['minimum volume'])
    result_file_path = config['OTHERS']['result file path']

    stl = STL(stl_file)

    print('> Generate octree...')
    root = Node(start_point, dimensions)
    get_grid(root, condition=condition, object=stl)

    ### NP.ARRAY ###
    # arr = array([], dtype=float)
Ejemplo n.º 25
0
    def __init__(self,
                 initial_state: tuple = (0, 0),
                 default_reward: tuple = (0, ),
                 columns: int = 10,
                 seed: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (time_inverted, treasure_value)
        :param columns: Number of columns to be used to build this environment (allows experimenting with an identical
                        environment, but considering only the first k columns) (By default 10 - all).
        :param seed: Seed used for np.random.RandomState method.
        :param action_space: Specific action space
        """

        # the original full-size environment.
        original_mesh_shape = (10, 11)

        if columns < 1 or columns > original_mesh_shape[0]:
            columns = original_mesh_shape[0]

        # Dictionary with final states as keys, and treasure amounts as values.
        finals = {
            (0, 1): 1,
            (1, 2): 2,
            (2, 3): 3,
            (3, 4): 5,
            (4, 4): 8,
            (5, 4): 16,
            (6, 7): 24,
            (7, 7): 50,
            (8, 9): 74,
            (9, 10): 124,
        }

        # Filter finals states
        finals = dict(filter(lambda x: x[0][0] < columns, finals.items()))

        # Filter obstacles states
        obstacles = frozenset()
        obstacles = obstacles.union([(0, y) for y in range(2, 11)])
        obstacles = obstacles.union([(1, y) for y in range(3, 11)])
        obstacles = obstacles.union([(2, y) for y in range(4, 11)])
        obstacles = obstacles.union([(3, y) for y in range(5, 11)])
        obstacles = obstacles.union([(4, y) for y in range(5, 11)])
        obstacles = obstacles.union([(5, y) for y in range(5, 11)])
        obstacles = obstacles.union([(6, y) for y in range(8, 11)])
        obstacles = obstacles.union([(7, y) for y in range(8, 11)])
        obstacles = obstacles.union([(8, y) for y in range(10, 11)])
        obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles))

        # Subspace of the environment to be considered
        mesh_shape = (columns, 11)

        # Default reward plus time (time_inverted, treasure_value)
        default_reward = (-1, ) + default_reward
        default_reward = Vector(default_reward)

        super().__init__(mesh_shape=mesh_shape,
                         initial_state=initial_state,
                         default_reward=default_reward,
                         finals=finals,
                         obstacles=obstacles,
                         seed=seed,
                         action_space=action_space)
Ejemplo n.º 26
0
class DeepSeaTreasure(EnvMesh):
    # Possible actions
    _actions = {'UP': 0, 'RIGHT': 1, 'DOWN': 2, 'LEFT': 3}

    # Pareto optimal policy vector-values
    pareto_optimal = [
        Vector([-1, 1]),
        Vector([-3, 2]),
        Vector([-5, 3]),
        Vector([-7, 5]),
        Vector([-8, 8]),
        Vector([-9, 16]),
        Vector([-13, 24]),
        Vector([-14, 50]),
        Vector([-17, 74]),
        Vector([-19, 124])
    ]

    # Experiments common hypervolume reference
    hv_reference = Vector((-25, 0))

    def __init__(self,
                 initial_state: tuple = (0, 0),
                 default_reward: tuple = (0, ),
                 columns: int = 10,
                 seed: int = 0,
                 action_space: gym.spaces = None):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (time_inverted, treasure_value)
        :param columns: Number of columns to be used to build this environment (allows experimenting with an identical
                        environment, but considering only the first k columns) (By default 10 - all).
        :param seed: Seed used for np.random.RandomState method.
        :param action_space: Specific action space
        """

        # the original full-size environment.
        original_mesh_shape = (10, 11)

        if columns < 1 or columns > original_mesh_shape[0]:
            columns = original_mesh_shape[0]

        # Dictionary with final states as keys, and treasure amounts as values.
        finals = {
            (0, 1): 1,
            (1, 2): 2,
            (2, 3): 3,
            (3, 4): 5,
            (4, 4): 8,
            (5, 4): 16,
            (6, 7): 24,
            (7, 7): 50,
            (8, 9): 74,
            (9, 10): 124,
        }

        # Filter finals states
        finals = dict(filter(lambda x: x[0][0] < columns, finals.items()))

        # Filter obstacles states
        obstacles = frozenset()
        obstacles = obstacles.union([(0, y) for y in range(2, 11)])
        obstacles = obstacles.union([(1, y) for y in range(3, 11)])
        obstacles = obstacles.union([(2, y) for y in range(4, 11)])
        obstacles = obstacles.union([(3, y) for y in range(5, 11)])
        obstacles = obstacles.union([(4, y) for y in range(5, 11)])
        obstacles = obstacles.union([(5, y) for y in range(5, 11)])
        obstacles = obstacles.union([(6, y) for y in range(8, 11)])
        obstacles = obstacles.union([(7, y) for y in range(8, 11)])
        obstacles = obstacles.union([(8, y) for y in range(10, 11)])
        obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles))

        # Subspace of the environment to be considered
        mesh_shape = (columns, 11)

        # Default reward plus time (time_inverted, treasure_value)
        default_reward = (-1, ) + default_reward
        default_reward = Vector(default_reward)

        super().__init__(mesh_shape=mesh_shape,
                         initial_state=initial_state,
                         default_reward=default_reward,
                         finals=finals,
                         obstacles=obstacles,
                         seed=seed,
                         action_space=action_space)

    def step(self, action: int) -> (tuple, Vector, bool, dict):
        """
        Given an action, do a step
        :param action:
        :return: (position, (time_inverted, treasure_value), final, extra)
        """

        # Initialize rewards as vector
        reward = self.default_reward.copy()

        # Update current position
        self.current_state = self.next_state(action=action)

        # Get treasure value
        reward[1] = self.finals.get(self.current_state, self.default_reward[1])

        # Set extra
        info = {}

        # Check is_final
        final = self.is_final(self.current_state)

        return self.current_state, reward, final, info

    def transition_reward(self, state: tuple, action: int,
                          next_state: tuple) -> Vector:
        """
        Given a state, an action and a next state, return the corresponding reward.
        :param state:
        :param action:
        :param next_state:
        :return:
        """

        # Default reward
        reward = self.default_reward.copy()

        # Get treasure reward
        reward[1] = self.finals.get(next_state, self.default_reward[1])

        return reward
Ejemplo n.º 27
0
clock = pygame.time.Clock()
font = pygame.font.SysFont(None, 25)

WHITE = (255, 255, 255)
BLACK = (0, 0, 0)

RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)

FRAME_RATE = 60
WIDTH = 1000
HEIGHT = 800
FRICTION = 0
ELASTICITY = 1.0
GRAVITY = Vector(0, 0)
BALL_SIZE = 50
INITIAL_VELOCITY_SCALAR = 5

GAME_DISPLAY = pygame.display.set_mode((WIDTH, HEIGHT))
GAME_DISPLAY.fill(BLACK)

mouse_pos = None
modify_type = None
modify_up = False
modify_down = False
balls = []


def update_balls(balls):
    updated_balls = []
Ejemplo n.º 28
0
def draft_w():
    tolerance = 0.00001

    for decimal_precision in [0.05, 0.005, 0.001]:
        # Create environment
        # environment = ResourceGatheringEpisodicSimplified()
        # environment = ResourceGatheringSimplified()
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_w = AgentW(environment=environment,
                         convergence_graph=True,
                         gamma=.9)

        # Time train
        t0 = time.time()

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        agent_w.train(graph_type=GraphType.SWEEP, limit=1)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {
            key: [vector.tolist() for vector in vectors]
            for key, vectors in agent_w.v.items()
        }

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_w.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_w.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_w.gamma,
            'initial_q_value': agent_w.initial_q_value,
            'initial_seed': agent_w.initial_seed,
            'interval_to_get_data': agent_w.interval_to_get_data,
            'max_steps': agent_w.max_iterations,
            'total_sweeps': agent_w.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)
Ejemplo n.º 29
0
class MoPuddleWorld(EnvMesh):
    # Possible actions
    _actions = {'UP': 0, 'RIGHT': 1, 'DOWN': 2, 'LEFT': 3}

    # Experiments common hypervolume reference
    hv_reference = Vector([-50, -150])

    def __init__(self,
                 default_reward: tuple = (10, 0),
                 penalize_non_goal: float = -1,
                 seed: int = 0,
                 final_state: tuple = (19, 0),
                 action_space: gym.spaces = None):
        """
        :param default_reward: (non_goal_reached, puddle_penalize)
        :param penalize_non_goal: While agent does not reach a final position get a penalize.
        :param seed: Initial initial_seed. The same is used for _action_space,
                     observation_space, and random number generator
        :param final_state: This environment only has a final position.
        """

        self.final_state = final_state
        mesh_shape = (20, 20)
        default_reward = VectorDecimal(default_reward)

        super().__init__(mesh_shape=mesh_shape,
                         seed=seed,
                         default_reward=default_reward,
                         action_space=action_space)

        self.puddles = frozenset()
        self.puddles = self.puddles.union([(x, y) for x in range(0, 11)
                                           for y in range(3, 7)])
        self.puddles = self.puddles.union([(x, y) for x in range(6, 10)
                                           for y in range(2, 14)])
        self.penalize_non_goal = penalize_non_goal

        self.current_state = self.reset()

        # Get free spaces
        self.free_spaces = set(self.states() - self.puddles)

    def step(self, action: int) -> (tuple, VectorDecimal, bool, dict):
        """
        Given an action, do a step
        :param action:
        :return: (position, (non_goal_reached, puddle_penalize), final, extra)
        """

        # Initialize reward as vector
        reward = self.default_reward.copy()

        # Update previous position
        self.current_state = self.next_state(action=action)

        # If agent is in treasure
        final = self.is_final(self.current_state)

        # Set final reward
        if not final:
            reward[0] = self.penalize_non_goal

        # if the current position is in an puddle
        if self.current_state in self.puddles:
            # Set penalization per distance
            reward[1] = self.calc_puddle_penalization(state=self.current_state)

        # Set extra
        info = {}

        return self.current_state, reward, final, info

    def calc_puddle_penalization(self, state: tuple) -> float:
        """
        Return a float that represents a penalization, the penalization is the lowest distance between current state
        and the nearest border in manhattan distance.
        :param state:
        :return:
        """
        # Min distance found!
        min_distance = min(
            cityblock(self.current_state, state) for state in self.free_spaces)

        # Set penalization per distance
        return -min_distance

    def reset(self) -> tuple:
        """
        Get random non-goal position to current_value
        :return:
        """

        # Reset to initial seed
        self.seed(seed=self.initial_seed)

        random_space = None

        while random_space is None or random_space == self.final_state:
            random_space = self.observation_space.sample()

        self.current_state = random_space
        return self.current_state

    def is_final(self, state: tuple = None) -> bool:
        """
        Is final if agent is on final position
        :param state:
        :return:
        """
        return state == self.final_state

    def transition_reward(self, state: tuple, action: int,
                          next_state: tuple) -> Vector:
        """
        Return reward for reach `next_state` from `position` using `action`.

        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """

        # Initialize reward as vector
        reward = self.default_reward.copy()

        # If agent is in treasure
        final = self.is_final(next_state)

        # Set final reward
        if not final:
            reward[0] = self.penalize_non_goal

        # if the current position is in an puddle
        if next_state in self.puddles:
            # Min distance found!
            min_distance = min(
                cityblock(next_state, state) for state in self.free_spaces)

            # Set penalization per distance
            reward[1] = -min_distance

        return reward

    def states(self) -> set:
        """
        Return all possible states of this environment.
        :return:
        """

        # Unpack spaces
        x_position, y_position = self.observation_space.spaces

        return set((x, y) for x in range(x_position.n)
                   for y in range(y_position.n)).difference({self.final_state})
Ejemplo n.º 30
0
    },
    'W_{0.01}': {
        'color': 'c',
        'marker': 'state'
    },
    'W_{0.005}': {
        'color': 'b',
        'marker': 'd'
    },
    'W_{0.001}': {
        'color': 'k',
        'marker': 'o'
    }
}

vector_reference = Vector((-25, 0))


def pareto_graph(data: dict):
    # Columns
    columns = list(data.keys())[0]

    # Prepare hypervolume to dumps data
    pareto_file = Path(__file__).parent.joinpath(
        'article/output/pareto_{}.m'.format(columns))

    # If any parents doesn't exist, make it.
    pareto_file.parent.mkdir(parents=True, exist_ok=True)

    data = data[columns]