def test_transition_reward(self):

        # In this environment doesn't mind initial state to get the reward
        state = self.environment.observation_space.sample()

        # Doesn't mind action too.
        action = self.environment.action_space.sample()

        # An intermediate state
        self.assertEqual(
            self.environment.transition_reward(state=state,
                                               action=action,
                                               next_state=(1, 0)),
            self.environment.default_reward)

        # A final state
        self.assertEqual(
            self.environment.transition_reward(state=state,
                                               action=action,
                                               next_state=(3, 0)),
            VectorDecimal((1, )))

        # Another final state
        self.assertEqual(
            self.environment.transition_reward(state=state,
                                               action=action,
                                               next_state=(3, 1)),
            VectorDecimal((-1, )))
Exemple #2
0
    def __init__(self,
                 default_reward: tuple = (10, 0),
                 penalize_non_goal: float = -1,
                 seed: int = 0,
                 final_state: tuple = (19, 0),
                 action_space: gym.spaces = None):
        """
        :param default_reward: (non_goal_reached, puddle_penalize)
        :param penalize_non_goal: While agent does not reach a final position get a penalize.
        :param seed: Initial initial_seed. The same is used for _action_space,
                     observation_space, and random number generator
        :param final_state: This environment only has a final position.
        """

        self.final_state = final_state
        mesh_shape = (20, 20)
        default_reward = VectorDecimal(default_reward)

        super().__init__(mesh_shape=mesh_shape,
                         seed=seed,
                         default_reward=default_reward,
                         action_space=action_space)

        self.puddles = frozenset()
        self.puddles = self.puddles.union([(x, y) for x in range(0, 11)
                                           for y in range(3, 7)])
        self.puddles = self.puddles.union([(x, y) for x in range(6, 10)
                                           for y in range(2, 14)])
        self.penalize_non_goal = penalize_non_goal

        self.current_state = self.reset()

        # Get free spaces
        self.free_spaces = set(self.states() - self.puddles)
    def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0,), seed: int = 0):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (time_inverted, treasure_value)
        :param seed: Seed used for np.random.RandomState method.
        """

        super().__init__(initial_state=initial_state, default_reward=default_reward, seed=seed)

        # List of all treasures and its reward.
        self.finals = {
            (0, 1): 1,
            (1, 2): 2,
            (2, 3): 10,
            (3, 4): 11,
            (4, 4): 12,
            (5, 4): 13,
            (6, 7): 15,
            (7, 7): 18.5,
            (8, 9): 19,
            (9, 10): 20,
        }

        # Default reward plus time (time_inverted, treasure_value)
        self.default_reward = VectorDecimal(self.default_reward)
Exemple #4
0
    def test_m3_max_2_lists_not_duplicates(self):
        """
        Testing m3_max function
        :return:
        """

        # Test problems
        for problem, solution_non_dominated, solution_dominated in [
                self.first_quadrant, self.second_quadrant, self.third_quadrant,
                self.fourth_quadrant, self.all_quadrants
        ]:

            # Apply m3_max_2_lists algorithm
            non_dominated, dominated = VectorDecimal.m3_max_2_lists_not_duplicates(
                vectors=problem)

            # While not is empty
            while non_dominated:
                # Extract from non_dominated list and remove from solution list
                solution_non_dominated.remove(non_dominated.pop())

            # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution_non_dominated)

            while dominated:
                # Extract from dominated list and remove from solution list
                solution_dominated.remove(dominated.pop())

            # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution_dominated)
Exemple #5
0
    def __init__(self,
                 transitions: tuple = (0.8, 0.1, 0., 0.1),
                 initial_state: tuple = (0, 2),
                 seed: int = 0,
                 default_reward: tuple = (-0.04, )):
        """
        :param transitions:
            Probabilities to change direction of action given.
            [DIR_0, DIR_90, DIR_180, DIR_270]
        :param initial_state:
        :param default_reward:
        """

        # finals states and its reward
        finals = {(3, 0): 1, (3, 1): -1}

        # Set of obstacles
        obstacles = frozenset()
        obstacles = obstacles.union({(1, 1)})

        # Default shape
        mesh_shape = (4, 3)
        default_reward = VectorDecimal(default_reward)

        super().__init__(mesh_shape=mesh_shape,
                         seed=seed,
                         initial_state=initial_state,
                         obstacles=obstacles,
                         finals=finals,
                         default_reward=default_reward)

        self.transitions = transitions
Exemple #6
0
    def test_init(self):
        """
        Testing if constructor works
        :return:
        """

        components = [
            rnd.uniform(-100., 100.) for _ in range(rnd.randint(2, 10))
        ]

        # List
        x = VectorDecimal(components)
        self.assertTrue(isinstance(x.components, np.ndarray))

        # ndarray
        x = VectorDecimal(np.asarray(components))
        self.assertTrue(isinstance(x.components, np.ndarray))
Exemple #7
0
    def __init__(self,
                 initial_state: tuple = ((1, 1), {(0, 0), (2, 2)}, 0),
                 default_reward: tuple = (0., 0., 0.),
                 p_stolen: float = .9,
                 n_appear: int = 10,
                 stolen_penalty: float = -.5,
                 walking_penalty: float = -1,
                 hunger_penalty: float = -1.,
                 last_ate_limit: int = 9,
                 seed: int = 0):
        """
        :param initial_state: Initial state where start the agent.
        :param default_reward: (hunger, stolen, walking)
        :param p_stolen: Probability to stole food if not are visible.
        :param n_appear: Number of time-steps until food is regenerated.
        :param stolen_penalty: Penalty when the food are stolen.
        :param walking_penalty: Penalty for each step.
        :param hunger_penalty: Penalty for not eat.
        :param last_ate_limit: Limit of steps for the donkey has hungry.
        :param seed: Seed used for np.random.RandomState method.
        """

        mesh_shape = (3, 3)
        default_reward = VectorDecimal(default_reward)

        finals = set()

        for x in range(mesh_shape[0]):
            for y in range(mesh_shape[1]):
                for last_ate in range(last_ate_limit + 1):
                    finals.add(((x, y), frozenset(), last_ate))

        # Build the observation space (position(x, y), visible food (bag), last ate (discrete))
        observation_space = gym.spaces.Tuple(
            (gym.spaces.Tuple((gym.spaces.Discrete(mesh_shape[0]),
                               gym.spaces.Discrete(mesh_shape[1]))),
             spaces.Bag([
                 frozenset(),
                 frozenset({(0, 0)}),
                 frozenset({(2, 2)}),
                 frozenset({(0, 0), (2, 2)})
             ]), gym.spaces.Discrete(last_ate_limit + 1)))

        super().__init__(mesh_shape=mesh_shape,
                         default_reward=default_reward,
                         finals=finals,
                         observation_space=observation_space,
                         initial_state=initial_state,
                         seed=seed)

        self.p_stolen = p_stolen
        self.n_appear = n_appear
        self.walking_penalty = walking_penalty
        self.stolen_penalty = stolen_penalty
        self.hunger_penalty = hunger_penalty

        self.food_counter = {(0, 0): 0, (2, 2): 0}
Exemple #8
0
    def test_str(self):
        """
        Testing if override str operator works!
        :return:
        """

        x = VectorDecimal([1, 2, 3])
        self.assertEqual(np.array_str(x.components), str(x))

        ################################################################################################################

        x = VectorDecimal([1, -2])
        self.assertEqual(np.array_str(x.components), str(x))

        ################################################################################################################

        x = VectorDecimal([1., -2., 1])
        self.assertEqual(np.array_str(x.components), str(x))
Exemple #9
0
    def test_equal(self):
        """
        Testing if override = operator works!
        :return:
        """

        x = VectorDecimal(
            [rnd.uniform(-100., 100.) for _ in range(rnd.randint(2, 10))])
        y = deepcopy(x)

        self.assertEqual(y, x)
Exemple #10
0
    def test_length(self):
        """
        Testing if override len() operator works!
        :return:
        """

        for _ in range(5):
            n = rnd.randint(1, 20)
            n_length = VectorDecimal(
                [rnd.uniform(-100., 100.) for _ in range(n)])
            self.assertEqual(n, len(n_length))
Exemple #11
0
    def process_reward(self, reward: Vector) -> float:
        """
        Processing reward function.
        :param reward:
        :return:
        """

        # Convert to float vector
        reward = VectorDecimal(reward.components)

        # Multiply the reward for the vector weights, sum all components and return a reward of the same type as the
        # original, but with only one component.
        return float(np.sum(reward * self.weights))
Exemple #12
0
    def test_magnitude(self):
        """
        Testing magnitude property
        :return:
        """

        x = VectorDecimal([1, 2, 3.])
        self.assertEqual(math.sqrt((1 * 1) + (2 * 2) + (3 * 3)), x.magnitude())

        ################################################################################################################

        x = VectorDecimal([1., -2.])
        self.assertEqual(math.sqrt((1 * 1) + (-2 * -2)), x.magnitude())

        ################################################################################################################

        x = VectorDecimal([rnd.uniform(-100., 100.) for _ in range(6)])
        self.assertEqual(
            math.sqrt(sum(component**2 for component in x.components)),
            x.magnitude())
Exemple #13
0
    def test_gt(self):
        """
        Testing if override > operator works!
        :return:
        """

        x = VectorDecimal([5 + self.difference, 3 + self.difference])
        y = VectorDecimal([4, 4])
        z = VectorDecimal([5, 3])
        w = VectorDecimal([6, 4])
        t = VectorDecimal([3, 2])

        self.assertFalse(x > y)
        self.assertFalse(x > z)
        self.assertFalse(x > w)
        self.assertTrue(x > t)

        self.assertFalse(y > x)
        self.assertFalse(y > z)
        self.assertFalse(y > w)
        self.assertTrue(y > t)

        self.assertFalse(z > x)
        self.assertFalse(z > y)
        self.assertFalse(z > w)
        self.assertTrue(z > t)

        self.assertTrue(w > x)
        self.assertFalse(w > y)
        self.assertTrue(w > z)
        self.assertTrue(w > t)

        self.assertFalse(t > x)
        self.assertFalse(t > y)
        self.assertFalse(t > z)
        self.assertFalse(t > w)
Exemple #14
0
    def test_ge(self):
        """
        Testing if override >= operator works!
        :return:
        """

        x = VectorDecimal([5 + self.difference, 3 + self.difference])
        y = VectorDecimal([4, 4])
        z = VectorDecimal([5, 3])
        w = VectorDecimal([6, 4])
        t = VectorDecimal([3, 2])

        self.assertFalse(x >= y)
        self.assertTrue(x >= z)
        self.assertFalse(x >= w)
        self.assertTrue(x >= t)

        self.assertFalse(y >= x)
        self.assertFalse(y >= z)
        self.assertFalse(y >= w)
        self.assertTrue(y >= t)

        self.assertTrue(z >= x)
        self.assertFalse(z >= y)
        self.assertFalse(z >= w)
        self.assertTrue(z >= t)

        self.assertTrue(w >= x)
        self.assertTrue(w >= y)
        self.assertTrue(w >= z)
        self.assertTrue(w >= t)

        self.assertFalse(t >= x)
        self.assertFalse(t >= y)
        self.assertFalse(t >= z)
        self.assertFalse(t >= w)
Exemple #15
0
    def test_le(self):
        """
        Testing if override < operator works!
        :return:
        """

        x = VectorDecimal([5 + self.difference, 3 + self.difference])
        y = VectorDecimal([4, 4])
        z = VectorDecimal([5, 3])
        w = VectorDecimal([6, 4])
        t = VectorDecimal([3, 2])

        self.assertFalse(x <= y)
        self.assertTrue(x <= z)
        self.assertTrue(x <= w)
        self.assertFalse(x <= t)

        self.assertFalse(y <= x)
        self.assertFalse(y <= z)
        self.assertTrue(y <= w)
        self.assertFalse(y <= t)

        self.assertTrue(z <= x)
        self.assertFalse(z <= y)
        self.assertTrue(z <= w)
        self.assertFalse(z <= t)

        self.assertFalse(w <= x)
        self.assertFalse(w <= y)
        self.assertFalse(w <= z)
        self.assertFalse(w <= t)

        self.assertTrue(t <= x)
        self.assertTrue(t <= y)
        self.assertTrue(t <= z)
        self.assertTrue(t <= w)
Exemple #16
0
    def test_lt(self):
        """
        Testing if override < operator works!
        :return:
        """

        x = VectorDecimal([5 + self.difference, 3 + self.difference])
        y = VectorDecimal([4, 4])
        z = VectorDecimal([5, 3])
        w = VectorDecimal([6, 4])
        t = VectorDecimal([3, 2])

        self.assertFalse(x < y)
        self.assertFalse(x < z)
        self.assertTrue(x < w)
        self.assertFalse(x < t)

        self.assertFalse(y < x)
        self.assertFalse(y < z)
        self.assertFalse(y < w)
        self.assertFalse(y < t)

        self.assertFalse(z < x)
        self.assertFalse(z < y)
        self.assertTrue(z < w)
        self.assertFalse(z < t)

        self.assertFalse(w < x)
        self.assertFalse(w < y)
        self.assertFalse(w < z)
        self.assertFalse(w < t)

        self.assertTrue(t < x)
        self.assertTrue(t < y)
        self.assertTrue(t < z)
        self.assertTrue(t < w)
Exemple #17
0
    def test_m3_max(self):
        """
        Testing m3_max function
        :return:
        """

        # Test problems
        for problem, solution, _ in [
                self.first_quadrant, self.second_quadrant, self.third_quadrant,
                self.fourth_quadrant, self.all_quadrants
        ]:

            # Calc non_dominated Vectors
            non_dominated = VectorDecimal.m3_max(vectors=problem)

            # While not is empty
            while non_dominated:
                # Extract from non_dominated list and remove it from solution list
                solution.remove(non_dominated.pop())

            # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution)
Exemple #18
0
    def do_step(self) -> bool:
        """
        The agent does a step to learn vectors.
        :return:
        """

        # If the position is unknown, register it in different information dictionaries.
        if self.state not in self.q:
            self.q.update({self.state: dict()})

        if self.state not in self.s:
            self.s.update({self.state: dict()})

        if self.state not in self.v:
            self.v.update({self.state: dict()})

        if self.state not in self.indexes_counter:
            # Initialize counters
            self.indexes_counter.update({self.state: 0})

        # Get an action
        action = self.select_action()

        # Do step on environment
        next_state, reward, is_final_state, info = self.environment.step(
            action=action)

        # Convert to decimal vector
        reward = VectorDecimal(reward)

        # Increment steps done
        self.total_steps += 1
        self.steps += 1

        # if self.total_episodes >= 21238 and (self.state == (0, 0)):
        #     print('Q: \n{} \n\nV: \n{}'.format(self.q, self.v))

        # If next_state is a final position and not is register
        if is_final_state:

            # If not is register in V, register it
            if not self.v.get(next_state):
                self.v.update({
                    next_state: {
                        # By default finals states has a zero vector with a zero index
                        0: self.initial_q_value
                    }
                })

        # S(state) -> All known states with its action for the position given.
        pair_action_states_known_by_state = self.s.get(self.state)

        # S(state, a) -> All known states for position and action given.
        states_known_by_state = pair_action_states_known_by_state.get(
            action, list())

        # I_s_k
        relevant_indexes_of_next_state = self.relevant_indexes_of_state(
            state=next_state)

        # S_k in S_{n - 1}
        next_state_is_in_states_known = next_state in states_known_by_state

        # Check if sk not in S, and I_s_k is not empty
        if not next_state_is_in_states_known and relevant_indexes_of_next_state:
            # Q_n = N_n(state, a)
            self.new_operation(state=self.state,
                               action=action,
                               reward=reward,
                               next_state=next_state)

        elif next_state_is_in_states_known:
            # Q_n = U_n(state, a)
            self.update_operation(state=self.state,
                                  action=action,
                                  reward=reward,
                                  next_state=next_state)

        # Check if is necessary update V(state) to improve the performance
        self.check_if_need_update_v()

        # Update position
        self.state = next_state

        return is_final_state
Exemple #19
0
    def calc_frontier_scalarized(self,
                                 p: Vector,
                                 q: Vector,
                                 solutions_known: list = None) -> list:
        """
        This is a search_distance method to calc pareto'state frontier.

        Return a list of supported solutions costs, this method is only valid to two objectives problems.
        Applies a dichotomous search to find all supported solutions costs.

        :param solutions_known: If we know the possible solutions, we can indicate them to the algorithm to improve the
            training of the agent. If is None, then is ignored.
        :param p: 2D point
        :param q: 2D point
        :return:
        """

        # A new list with p and q
        result = [p, q]

        # Create a new stack
        accumulate = list()

        # Push a vector with p and q in the stack
        accumulate.append(tuple(result))

        while len(accumulate) > 0:
            # Pop the next pair of points from the stack.
            a, b = accumulate.pop()

            try:
                # Order points nearest to the center using euclidean distance.
                a, b = tuple(Vector.order_vectors_by_origin_nearest([a, b]))
            except ValueError:
                print('Error to unpack {} and {}'.format(a, b))
                continue

            # Convert to vectors
            a, b = VectorDecimal(a), VectorDecimal(b)

            # Decompose points
            a_x, a_y = a
            b_x, b_y = b

            # Calculate the parameters of the new linear objective function (multiply by -1. to convert in maximize
            # problem)
            w1 = np.multiply(a_y - b_y, -1.)
            w2 = np.multiply(b_x - a_x, -1.)

            # Solve P to find a new solution ang get its cost vector c.
            c = self.find_c_vector(w1, w2, solutions_known=solutions_known)

            # Decompose c vector.
            c_x, c_y = c

            if (w1 * a_x + w2 * a_y) != (w1 * c_x +
                                         w2 * c_y) and c not in result:
                # c is the cost of a new supported solution

                # Push new pair in the stack
                accumulate.append((a, c))

                # Push new pair in the stack
                accumulate.append((c, b))

                # Add c to the result
                result.append(c)

                # Pareto'state frontier found
                self.pareto_frontier_found.append(c)

        return result
Exemple #20
0
    def test_m3_max_2_lists_with_repetitions(self):
        """
        Testing m3_max function
        :return:
        """

        # Prepare Vectors
        problems = [
            (
                # Problem
                self.first_quadrant[0],
                # Non-dominated uniques
                self.first_quadrant[1],
                # Dominated (duplicates included)
                self.first_quadrant[2] + [
                    VectorDecimal([2 + self.difference, 4 - self.difference]),
                    VectorDecimal([0, 6]),
                    VectorDecimal([4, 1])
                ],
                # Non-dominated repeated
                [VectorDecimal([5 + self.difference, 3 - self.difference])]),
            (
                # Problem
                self.second_quadrant[0],
                # Non-dominated uniques
                self.second_quadrant[1],
                # Dominated (duplicates included)
                self.second_quadrant[2] + [
                    VectorDecimal([-6, 6]),
                    VectorDecimal([-4 + self.difference, 2 + self.difference])
                ],
                # Non-dominated repeated
                [
                    VectorDecimal([-4 - self.difference, 7 + self.difference]),
                    VectorDecimal([-1, 0])
                ]),
            (
                # Problem
                self.third_quadrant[0],
                # Non-dominated uniques
                self.third_quadrant[1],
                # Dominated (duplicates included)
                self.third_quadrant[2] + [
                    VectorDecimal([-7, -1]),
                    VectorDecimal([-4 + self.difference, -2 + self.difference])
                ],
                # Non-dominated repeated
                [
                    VectorDecimal([-2 - self.difference,
                                   -1 - self.difference]),
                    VectorDecimal([-1, -4])
                ]),
            (
                # Problem
                self.fourth_quadrant[0],
                # Non-dominated uniques
                self.fourth_quadrant[1],
                # Dominated (duplicates included)
                self.fourth_quadrant[2] + [
                    VectorDecimal([7 + self.difference, -3 - self.difference]),
                    VectorDecimal([2, -1])
                ],
                # Non-dominated repeated
                [
                    VectorDecimal([10 + self.difference,
                                   -1 + self.difference]),
                    VectorDecimal([10, -1])
                ]),
            (
                # Problem
                self.all_quadrants[0],
                # Non-dominated uniques
                self.all_quadrants[1],
                # Dominated (duplicates included)
                self.all_quadrants[2] + [
                    VectorDecimal([7 + self.difference, -3 - self.difference]),
                    VectorDecimal([-7, -1]),
                    VectorDecimal([-4 + self.difference, -2 + self.difference
                                   ]),
                    VectorDecimal([-6, 6]),
                    VectorDecimal([-4 + self.difference, 2 + self.difference]),
                    VectorDecimal([0, 6]),
                    VectorDecimal([4, 1]),
                    VectorDecimal([-1, 0]),
                    VectorDecimal([2 + self.difference, 4 - self.difference]),
                    VectorDecimal([2, -1]),
                    VectorDecimal([-2 - self.difference, -1 - self.difference
                                   ]),
                    VectorDecimal([-1, -4]),
                ],
                # Non-dominated repeated
                [
                    VectorDecimal([10 + self.difference,
                                   -1 + self.difference]),
                    VectorDecimal([10, -1]),
                    VectorDecimal([-4 - self.difference, 7 + self.difference]),
                    VectorDecimal([5 + self.difference, 3 - self.difference])
                ])
        ]

        for problem, solution_non_dominated_uniques, solution_dominated, solution_non_dominated_repeat in problems:
            # Apply m3_max_2_lists_with_repetitions algorithm
            non_dominated_unique, dominated, non_dominated_repeated = VectorDecimal.m3_max_2_lists_with_repetitions(
                vectors=problem)

            # While not is empty
            while non_dominated_unique:
                # Extract from non_dominated_unique list and remove from solution list
                solution_non_dominated_uniques.remove(
                    non_dominated_unique.pop())

                # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution_non_dominated_uniques)

            # While not is empty
            while dominated:
                # Extract from dominated list and remove from solution list
                solution_dominated.remove(dominated.pop())

            # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution_dominated)

            # While not is empty
            while non_dominated_repeated:
                # Extract from non_dominated_repeat list and remove from solution list
                solution_non_dominated_repeat.remove(
                    non_dominated_repeated.pop())

            # After previous process if solution list have any element, then assert is failed.
            self.assertFalse(solution_non_dominated_repeat)
Exemple #21
0
    def test_transition_reward(self):

        # In this environment doesn't mind initial state to get the reward
        state = self.environment.observation_space.sample()

        # Doesn't mind action too.
        action = self.environment.action_space.sample()

        # A non-puddle state
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(1, 0)
            ), VectorDecimal((-1, 0))
        )

        # Another non-puddle state
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(4, 10)
            ), VectorDecimal((-1, 0))
        )

        # Another non-puddle state
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(15, 12)
            ), VectorDecimal((-1, 0))
        )

        # State in a border of puddle
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(9, 2)
            ), VectorDecimal((-1, -1))
        )

        # Another state in a border of puddle
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(9, 10)
            ), VectorDecimal((-1, -1))
        )

        # State in a puddle
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(8, 10)
            ), VectorDecimal((-1, -2))
        )

        # State in a corner of puddle
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(9, 3)
            ), VectorDecimal((-1, -2))
        )

        # Final state
        self.assertEqual(
            self.environment.transition_reward(
                state=state, action=action, next_state=(19, 0)
            ), VectorDecimal((10, 0))
        )
Exemple #22
0
    def test_dominance(self):
        """
        Testing dominance function
        :return:
        """

        x = VectorDecimal([1, 2, 3])
        y = VectorDecimal([4, 5, 6])

        self.assertEqual(Dominance.is_dominated, VectorDecimal.dominance(x, y))
        self.assertEqual(Dominance.dominate, VectorDecimal.dominance(y, x))

        ################################################################################################################

        x = VectorDecimal([10, -1])
        y = VectorDecimal([2, -1])

        self.assertEqual(Dominance.dominate, VectorDecimal.dominance(x, y))
        self.assertEqual(Dominance.is_dominated, VectorDecimal.dominance(y, x))

        ################################################################################################################

        x = VectorDecimal([1, 2])
        y = VectorDecimal([0, 3])

        self.assertEqual(Dominance.otherwise, VectorDecimal.dominance(x, y))

        ################################################################################################################

        x = VectorDecimal([1.2, 10.00001])
        y = VectorDecimal([1.20001, 10.])

        # Are similar
        self.assertEqual(Dominance.equals, VectorDecimal.dominance(x, y))

        ################################################################################################################

        y = deepcopy(x)

        # Are equals
        self.assertEqual(Dominance.equals, VectorDecimal.dominance(x, y))
Exemple #23
0
    def setUp(self):

        # Vector configuration
        VectorDecimal.set_decimal_precision(decimal_precision=2)

        self.first_quadrant = (
            [
                # Problem
                VectorDecimal([0, 6]),
                VectorDecimal([1, 6]),
                VectorDecimal([2, 5]),
                VectorDecimal([2, 4]),
                VectorDecimal([2, 2]),
                VectorDecimal([3, 4]),
                VectorDecimal([4, 3]),
                VectorDecimal([4, 1]),
                VectorDecimal([5, 3]),
                VectorDecimal([5, 2]),
                VectorDecimal([6, 0]),

                # Repeats
                VectorDecimal([0, 6]),
                VectorDecimal([4, 1]),

                # Similar
                VectorDecimal([5 + self.difference, 3 - self.difference]),
                VectorDecimal([2 + self.difference, 4 - self.difference]),
            ],
            [
                # Non-dominated VectorFloats
                VectorDecimal([1, 6]),
                VectorDecimal([2, 5]),
                VectorDecimal([3, 4]),
                VectorDecimal([5, 3]),
                VectorDecimal([6, 0])
            ],
            [
                # Dominated VectorFloats
                VectorDecimal([0, 6]),
                VectorDecimal([2, 4]),
                VectorDecimal([2, 2]),
                VectorDecimal([4, 3]),
                VectorDecimal([4, 1]),
                VectorDecimal([5, 2]),
            ])

        self.second_quadrant = (
            [
                # Problem
                VectorDecimal([-1, 0]),
                VectorDecimal([-3, 4]),
                VectorDecimal([-4, 2]),
                VectorDecimal([-4, 7]),
                VectorDecimal([-6, 6]),
                VectorDecimal([-6, 0]),
                VectorDecimal([-8, 2]),

                # Repeats
                VectorDecimal([-1, 0]),
                VectorDecimal([-6, 6]),

                # Similar
                VectorDecimal([-4 + self.difference, 2 + self.difference]),
                VectorDecimal([-4 - self.difference, 7 + self.difference]),
            ],
            [
                # Non-dominated
                VectorDecimal([-1, 0]),
                VectorDecimal([-4, 7]),
                VectorDecimal([-3, 4]),
            ],
            [
                # Dominated VectorFloats
                VectorDecimal([-4, 2]),
                VectorDecimal([-6, 6]),
                VectorDecimal([-6, 0]),
                VectorDecimal([-8, 2]),
            ])

        self.third_quadrant = (
            [
                # Problem
                VectorDecimal([-1, -4]),
                VectorDecimal([-2, -1]),
                VectorDecimal([-3, -6]),
                VectorDecimal([-4, -2]),
                VectorDecimal([-5, -4]),
                VectorDecimal([-7, -1]),

                # Repeats
                VectorDecimal([-1, -4]),
                VectorDecimal([-7, -1]),

                # Similar
                VectorDecimal([-2 - self.difference, -1 - self.difference]),
                VectorDecimal([-4 + self.difference, -2 + self.difference]),
            ],
            [
                # Non-dominated
                VectorDecimal([-2, -1]),
                VectorDecimal([-1, -4])
            ],
            [
                # Dominated VectorFloats
                VectorDecimal([-3, -6]),
                VectorDecimal([-4, -2]),
                VectorDecimal([-5, -4]),
                VectorDecimal([-7, -1]),
            ])

        self.fourth_quadrant = (
            [
                # Problem
                VectorDecimal([2, -1]),
                VectorDecimal([3, -2]),
                VectorDecimal([1, -4]),
                VectorDecimal([3, -5]),
                VectorDecimal([5, -6]),
                VectorDecimal([7, -3]),
                VectorDecimal([10, -1]),

                # Repeats
                VectorDecimal([2, -1]),
                VectorDecimal([10, -1]),

                # Similar
                VectorDecimal([7 + self.difference, -3 - self.difference]),
                VectorDecimal([10 + self.difference, -1 + self.difference]),
            ],
            [
                # Non-dominated
                VectorDecimal([10, -1])
            ],
            [
                # Dominated
                VectorDecimal([2, -1]),
                VectorDecimal([3, -2]),
                VectorDecimal([1, -4]),
                VectorDecimal([3, -5]),
                VectorDecimal([5, -6]),
                VectorDecimal([7, -3]),
            ])

        self.all_quadrants = (
            # Problem
            self.first_quadrant[0] + self.second_quadrant[0] +
            self.third_quadrant[0] + self.fourth_quadrant[0],
            [
                # Non-dominate
                VectorDecimal([-4, 7]),
                VectorDecimal([1, 6]),
                VectorDecimal([2, 5]),
                VectorDecimal([3, 4]),
                VectorDecimal([5, 3]),
                VectorDecimal([6, 0]),
                VectorDecimal([10, -1])
            ],
            [
                # Dominated
                VectorDecimal([0, 6]),
                VectorDecimal([2, 4]),
                VectorDecimal([2, 2]),
                VectorDecimal([4, 3]),
                VectorDecimal([4, 1]),
                VectorDecimal([5, 2]),
                VectorDecimal([-1, 0]),
                VectorDecimal([-3, 4]),
                VectorDecimal([-4, 2]),
                VectorDecimal([-6, 6]),
                VectorDecimal([-6, 0]),
                VectorDecimal([-8, 2]),
                VectorDecimal([-1, -4]),
                VectorDecimal([-2, -1]),
                VectorDecimal([-3, -6]),
                VectorDecimal([-4, -2]),
                VectorDecimal([-5, -4]),
                VectorDecimal([-7, -1]),
                VectorDecimal([2, -1]),
                VectorDecimal([1, -4]),
                VectorDecimal([3, -2]),
                VectorDecimal([3, -5]),
                VectorDecimal([5, -6]),
                VectorDecimal([7, -3]),
            ])
Exemple #24
0
    def test_all_close(self):
        """
        Testing if two Vectors are similar
        :return:
        """

        x = VectorDecimal([1, 2, 3, 4])
        y = deepcopy(x)
        self.assertTrue(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1, .3])
        y = VectorDecimal([1, .3])
        self.assertTrue(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1.2, 10 + self.difference])
        y = VectorDecimal([1.2 + self.difference, 10.])
        self.assertTrue(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1.2 + self.difference, 10])
        y = VectorDecimal([1.2, 10. + self.difference])
        self.assertTrue(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1, .3])
        y = VectorDecimal([1])
        self.assertTrue(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1, .3])
        y = VectorDecimal([.3])
        self.assertFalse(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1, .3])
        y = VectorDecimal([1, 4])
        self.assertFalse(VectorDecimal.all_close(x, y))

        ################################################################################################################

        x = VectorDecimal([1, .3])
        y = VectorDecimal([2, .3])
        self.assertFalse(VectorDecimal.all_close(x, y))
Exemple #25
0
    def test_pow(self):
        """
        Testing if override ** operator works!
        :return:
        """

        x = VectorDecimal([1, 2, 3.])
        y = VectorDecimal([0., -2., 1.])
        self.assertEqual(VectorDecimal([1, 0.25, 3]), x**y)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        y = VectorDecimal([0., -3., 1.])
        self.assertEqual(VectorDecimal([1, 0.125, 4]), x**y)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        self.assertEqual(VectorDecimal([9, 4, 16]), x**2)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        self.assertEqual(VectorDecimal([9, 4, 16]), x**2.)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        y = VectorDecimal([4, 5, 6, 7])

        with self.assertRaises(ValueError):
            x**y
            y**x
Exemple #26
0
    def test_mul(self):
        """
        Testing if override * operator works!
        :return:
        """

        x = VectorDecimal([1, 2, 3.])
        y = VectorDecimal([0., -2., 1.])
        self.assertEqual(VectorDecimal([0, -4, 3]), x * y)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        y = VectorDecimal([0., -3., 1.])
        self.assertEqual(VectorDecimal([0, -6, 4]), x * y)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        self.assertEqual(VectorDecimal([-6, 4, 8]), x * 2)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        self.assertEqual(VectorDecimal([-6, 4, 8]), x * 2.)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        y = VectorDecimal([4, 5, 6, 7])

        with self.assertRaises(ValueError):
            x * y
            y * x
Exemple #27
0
    def test_sub(self):
        """
        Testing if override - operator works!
        :return:
        """

        x = VectorDecimal([1, 2, 3.])
        y = VectorDecimal([0., -2., 1.])
        self.assertEqual(VectorDecimal([1, 4., 2.]), x - y)

        ################################################################################################################

        x = VectorDecimal([-3., 0., 4.])
        y = VectorDecimal([0., -3., 5.])
        self.assertEqual(VectorDecimal([-3, 3., -1.]), x - y)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        self.assertEqual(VectorDecimal([0, 1, 2]), x - 1)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        self.assertEqual(VectorDecimal([0, 1, 2]), x - 1.)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        y = VectorDecimal([4, 5, 6, 7])

        with self.assertRaises(ValueError):
            x - y
            y - x
Exemple #28
0
    def test_add(self):
        """
        Testing if override + operator works!
        :return:
        """

        x = VectorDecimal([1, 2, 3.])
        y = VectorDecimal([0., -2., 1.])
        self.assertEqual(VectorDecimal([1, 0., 4.]), x + y)

        ################################################################################################################

        x = VectorDecimal([-3., 2, 4.])
        y = VectorDecimal([0., -3., 1.])
        self.assertEqual(VectorDecimal([-3, -1., 5.]), x + y)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        self.assertEqual(VectorDecimal([2, 3, 4]), x + 1)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        self.assertEqual(VectorDecimal([2, 3, 4]), x + 1.)

        ################################################################################################################

        x = VectorDecimal([1, 2, 3])
        y = VectorDecimal([4, 5, 6, 7])

        with self.assertRaises(ValueError):
            x + y
            y + x
Exemple #29
0
    def __init__(
            self,
            environment: Environment,
            hv_reference: Vector,
            alpha: float = 0.1,
            epsilon: float = 0.1,
            gamma: float = 1.,
            seed: int = 0,
            states_to_observe: set = None,
            max_steps: int = None,
            evaluation_mechanism: EvaluationMechanism = EvaluationMechanism.HV,
            graph_types: set = None,
            initial_value: VectorDecimal = None,
            convergence_graph: bool = False):
        """
        :param environment: An environment where agent does any operation.
        :param hv_reference: Reference vector to calc hypervolume
        :param alpha: Learning rate
        :param epsilon: Epsilon using in e-greedy policy, to explore more states.
        :param gamma: Discount factor
        :param seed: Seed used for np.random.RandomState method.
        :param states_to_observe: List of states from that we want to get a graphical output.
        :param max_steps: Limits of steps per episode.
        :param evaluation_mechanism: Evaluation mechanism used to calc best action to choose. Three values are
            available: 'C-PQL', 'PO-PQL', 'HV-PQL'
        :param graph_types: Set of types of graph to generate.
        :param initial_value: Vector with the algorithm begin to learn (by default zero vector).
        :param convergence_graph: If is True then algorithm collects data to draw a convergence graph.
        """

        # Types to show a graphs
        if graph_types is None:
            graph_types = {GraphType.STEPS, GraphType.MEMORY}

        # Super call __init__
        super().__init__(environment=environment,
                         epsilon=epsilon,
                         gamma=gamma,
                         seed=seed,
                         graph_types=graph_types,
                         states_to_observe=states_to_observe,
                         max_steps=max_steps,
                         initial_value=initial_value)

        if initial_value is None:
            self.initial_q_value = VectorDecimal(
                self.environment.default_reward.zero_vector)

        # Learning factor
        assert 0 < alpha <= 1
        self.alpha = alpha

        # Dictionary that stores all q values.
        # Key: position; Value: second level dictionary.
        # Second level dictionary: key: action; value: third level dictionary
        # Third level dictionary: key :index vector (element from cartesian product);
        #                        value: q-vector (instance of class IndexVector)
        self.q = dict()

        # States known by each position and action
        self.s = dict()

        # Return non dominate states for a position given
        self.v = dict()

        # Counter to indexes used by each pair (position, action)
        self.indexes_counter = dict()

        # Set of states that need be updated
        self.states_to_update = set()

        # Evaluation mechanism
        if evaluation_mechanism in (EvaluationMechanism.HV,
                                    EvaluationMechanism.PO,
                                    EvaluationMechanism.C):
            self.evaluation_mechanism = evaluation_mechanism
        else:
            raise ValueError('Evaluation mechanism does not valid.')

        self.hv_reference = hv_reference

        # Set if we want the graph of the convergence
        self.convergence_graph = convergence_graph
        self.convergence_graph_data = list()