def test_transition_reward(self): # In this environment doesn't mind initial state to get the reward state = self.environment.observation_space.sample() # Doesn't mind action too. action = self.environment.action_space.sample() # An intermediate state self.assertEqual( self.environment.transition_reward(state=state, action=action, next_state=(1, 0)), self.environment.default_reward) # A final state self.assertEqual( self.environment.transition_reward(state=state, action=action, next_state=(3, 0)), VectorDecimal((1, ))) # Another final state self.assertEqual( self.environment.transition_reward(state=state, action=action, next_state=(3, 1)), VectorDecimal((-1, )))
def __init__(self, default_reward: tuple = (10, 0), penalize_non_goal: float = -1, seed: int = 0, final_state: tuple = (19, 0), action_space: gym.spaces = None): """ :param default_reward: (non_goal_reached, puddle_penalize) :param penalize_non_goal: While agent does not reach a final position get a penalize. :param seed: Initial initial_seed. The same is used for _action_space, observation_space, and random number generator :param final_state: This environment only has a final position. """ self.final_state = final_state mesh_shape = (20, 20) default_reward = VectorDecimal(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, default_reward=default_reward, action_space=action_space) self.puddles = frozenset() self.puddles = self.puddles.union([(x, y) for x in range(0, 11) for y in range(3, 7)]) self.puddles = self.puddles.union([(x, y) for x in range(6, 10) for y in range(2, 14)]) self.penalize_non_goal = penalize_non_goal self.current_state = self.reset() # Get free spaces self.free_spaces = set(self.states() - self.puddles)
def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0,), seed: int = 0): """ :param initial_state: Initial state where start the agent. :param default_reward: (time_inverted, treasure_value) :param seed: Seed used for np.random.RandomState method. """ super().__init__(initial_state=initial_state, default_reward=default_reward, seed=seed) # List of all treasures and its reward. self.finals = { (0, 1): 1, (1, 2): 2, (2, 3): 10, (3, 4): 11, (4, 4): 12, (5, 4): 13, (6, 7): 15, (7, 7): 18.5, (8, 9): 19, (9, 10): 20, } # Default reward plus time (time_inverted, treasure_value) self.default_reward = VectorDecimal(self.default_reward)
def test_m3_max_2_lists_not_duplicates(self): """ Testing m3_max function :return: """ # Test problems for problem, solution_non_dominated, solution_dominated in [ self.first_quadrant, self.second_quadrant, self.third_quadrant, self.fourth_quadrant, self.all_quadrants ]: # Apply m3_max_2_lists algorithm non_dominated, dominated = VectorDecimal.m3_max_2_lists_not_duplicates( vectors=problem) # While not is empty while non_dominated: # Extract from non_dominated list and remove from solution list solution_non_dominated.remove(non_dominated.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution_non_dominated) while dominated: # Extract from dominated list and remove from solution list solution_dominated.remove(dominated.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution_dominated)
def __init__(self, transitions: tuple = (0.8, 0.1, 0., 0.1), initial_state: tuple = (0, 2), seed: int = 0, default_reward: tuple = (-0.04, )): """ :param transitions: Probabilities to change direction of action given. [DIR_0, DIR_90, DIR_180, DIR_270] :param initial_state: :param default_reward: """ # finals states and its reward finals = {(3, 0): 1, (3, 1): -1} # Set of obstacles obstacles = frozenset() obstacles = obstacles.union({(1, 1)}) # Default shape mesh_shape = (4, 3) default_reward = VectorDecimal(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, obstacles=obstacles, finals=finals, default_reward=default_reward) self.transitions = transitions
def test_init(self): """ Testing if constructor works :return: """ components = [ rnd.uniform(-100., 100.) for _ in range(rnd.randint(2, 10)) ] # List x = VectorDecimal(components) self.assertTrue(isinstance(x.components, np.ndarray)) # ndarray x = VectorDecimal(np.asarray(components)) self.assertTrue(isinstance(x.components, np.ndarray))
def __init__(self, initial_state: tuple = ((1, 1), {(0, 0), (2, 2)}, 0), default_reward: tuple = (0., 0., 0.), p_stolen: float = .9, n_appear: int = 10, stolen_penalty: float = -.5, walking_penalty: float = -1, hunger_penalty: float = -1., last_ate_limit: int = 9, seed: int = 0): """ :param initial_state: Initial state where start the agent. :param default_reward: (hunger, stolen, walking) :param p_stolen: Probability to stole food if not are visible. :param n_appear: Number of time-steps until food is regenerated. :param stolen_penalty: Penalty when the food are stolen. :param walking_penalty: Penalty for each step. :param hunger_penalty: Penalty for not eat. :param last_ate_limit: Limit of steps for the donkey has hungry. :param seed: Seed used for np.random.RandomState method. """ mesh_shape = (3, 3) default_reward = VectorDecimal(default_reward) finals = set() for x in range(mesh_shape[0]): for y in range(mesh_shape[1]): for last_ate in range(last_ate_limit + 1): finals.add(((x, y), frozenset(), last_ate)) # Build the observation space (position(x, y), visible food (bag), last ate (discrete)) observation_space = gym.spaces.Tuple( (gym.spaces.Tuple((gym.spaces.Discrete(mesh_shape[0]), gym.spaces.Discrete(mesh_shape[1]))), spaces.Bag([ frozenset(), frozenset({(0, 0)}), frozenset({(2, 2)}), frozenset({(0, 0), (2, 2)}) ]), gym.spaces.Discrete(last_ate_limit + 1))) super().__init__(mesh_shape=mesh_shape, default_reward=default_reward, finals=finals, observation_space=observation_space, initial_state=initial_state, seed=seed) self.p_stolen = p_stolen self.n_appear = n_appear self.walking_penalty = walking_penalty self.stolen_penalty = stolen_penalty self.hunger_penalty = hunger_penalty self.food_counter = {(0, 0): 0, (2, 2): 0}
def test_str(self): """ Testing if override str operator works! :return: """ x = VectorDecimal([1, 2, 3]) self.assertEqual(np.array_str(x.components), str(x)) ################################################################################################################ x = VectorDecimal([1, -2]) self.assertEqual(np.array_str(x.components), str(x)) ################################################################################################################ x = VectorDecimal([1., -2., 1]) self.assertEqual(np.array_str(x.components), str(x))
def test_equal(self): """ Testing if override = operator works! :return: """ x = VectorDecimal( [rnd.uniform(-100., 100.) for _ in range(rnd.randint(2, 10))]) y = deepcopy(x) self.assertEqual(y, x)
def test_length(self): """ Testing if override len() operator works! :return: """ for _ in range(5): n = rnd.randint(1, 20) n_length = VectorDecimal( [rnd.uniform(-100., 100.) for _ in range(n)]) self.assertEqual(n, len(n_length))
def process_reward(self, reward: Vector) -> float: """ Processing reward function. :param reward: :return: """ # Convert to float vector reward = VectorDecimal(reward.components) # Multiply the reward for the vector weights, sum all components and return a reward of the same type as the # original, but with only one component. return float(np.sum(reward * self.weights))
def test_magnitude(self): """ Testing magnitude property :return: """ x = VectorDecimal([1, 2, 3.]) self.assertEqual(math.sqrt((1 * 1) + (2 * 2) + (3 * 3)), x.magnitude()) ################################################################################################################ x = VectorDecimal([1., -2.]) self.assertEqual(math.sqrt((1 * 1) + (-2 * -2)), x.magnitude()) ################################################################################################################ x = VectorDecimal([rnd.uniform(-100., 100.) for _ in range(6)]) self.assertEqual( math.sqrt(sum(component**2 for component in x.components)), x.magnitude())
def test_gt(self): """ Testing if override > operator works! :return: """ x = VectorDecimal([5 + self.difference, 3 + self.difference]) y = VectorDecimal([4, 4]) z = VectorDecimal([5, 3]) w = VectorDecimal([6, 4]) t = VectorDecimal([3, 2]) self.assertFalse(x > y) self.assertFalse(x > z) self.assertFalse(x > w) self.assertTrue(x > t) self.assertFalse(y > x) self.assertFalse(y > z) self.assertFalse(y > w) self.assertTrue(y > t) self.assertFalse(z > x) self.assertFalse(z > y) self.assertFalse(z > w) self.assertTrue(z > t) self.assertTrue(w > x) self.assertFalse(w > y) self.assertTrue(w > z) self.assertTrue(w > t) self.assertFalse(t > x) self.assertFalse(t > y) self.assertFalse(t > z) self.assertFalse(t > w)
def test_ge(self): """ Testing if override >= operator works! :return: """ x = VectorDecimal([5 + self.difference, 3 + self.difference]) y = VectorDecimal([4, 4]) z = VectorDecimal([5, 3]) w = VectorDecimal([6, 4]) t = VectorDecimal([3, 2]) self.assertFalse(x >= y) self.assertTrue(x >= z) self.assertFalse(x >= w) self.assertTrue(x >= t) self.assertFalse(y >= x) self.assertFalse(y >= z) self.assertFalse(y >= w) self.assertTrue(y >= t) self.assertTrue(z >= x) self.assertFalse(z >= y) self.assertFalse(z >= w) self.assertTrue(z >= t) self.assertTrue(w >= x) self.assertTrue(w >= y) self.assertTrue(w >= z) self.assertTrue(w >= t) self.assertFalse(t >= x) self.assertFalse(t >= y) self.assertFalse(t >= z) self.assertFalse(t >= w)
def test_le(self): """ Testing if override < operator works! :return: """ x = VectorDecimal([5 + self.difference, 3 + self.difference]) y = VectorDecimal([4, 4]) z = VectorDecimal([5, 3]) w = VectorDecimal([6, 4]) t = VectorDecimal([3, 2]) self.assertFalse(x <= y) self.assertTrue(x <= z) self.assertTrue(x <= w) self.assertFalse(x <= t) self.assertFalse(y <= x) self.assertFalse(y <= z) self.assertTrue(y <= w) self.assertFalse(y <= t) self.assertTrue(z <= x) self.assertFalse(z <= y) self.assertTrue(z <= w) self.assertFalse(z <= t) self.assertFalse(w <= x) self.assertFalse(w <= y) self.assertFalse(w <= z) self.assertFalse(w <= t) self.assertTrue(t <= x) self.assertTrue(t <= y) self.assertTrue(t <= z) self.assertTrue(t <= w)
def test_lt(self): """ Testing if override < operator works! :return: """ x = VectorDecimal([5 + self.difference, 3 + self.difference]) y = VectorDecimal([4, 4]) z = VectorDecimal([5, 3]) w = VectorDecimal([6, 4]) t = VectorDecimal([3, 2]) self.assertFalse(x < y) self.assertFalse(x < z) self.assertTrue(x < w) self.assertFalse(x < t) self.assertFalse(y < x) self.assertFalse(y < z) self.assertFalse(y < w) self.assertFalse(y < t) self.assertFalse(z < x) self.assertFalse(z < y) self.assertTrue(z < w) self.assertFalse(z < t) self.assertFalse(w < x) self.assertFalse(w < y) self.assertFalse(w < z) self.assertFalse(w < t) self.assertTrue(t < x) self.assertTrue(t < y) self.assertTrue(t < z) self.assertTrue(t < w)
def test_m3_max(self): """ Testing m3_max function :return: """ # Test problems for problem, solution, _ in [ self.first_quadrant, self.second_quadrant, self.third_quadrant, self.fourth_quadrant, self.all_quadrants ]: # Calc non_dominated Vectors non_dominated = VectorDecimal.m3_max(vectors=problem) # While not is empty while non_dominated: # Extract from non_dominated list and remove it from solution list solution.remove(non_dominated.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution)
def do_step(self) -> bool: """ The agent does a step to learn vectors. :return: """ # If the position is unknown, register it in different information dictionaries. if self.state not in self.q: self.q.update({self.state: dict()}) if self.state not in self.s: self.s.update({self.state: dict()}) if self.state not in self.v: self.v.update({self.state: dict()}) if self.state not in self.indexes_counter: # Initialize counters self.indexes_counter.update({self.state: 0}) # Get an action action = self.select_action() # Do step on environment next_state, reward, is_final_state, info = self.environment.step( action=action) # Convert to decimal vector reward = VectorDecimal(reward) # Increment steps done self.total_steps += 1 self.steps += 1 # if self.total_episodes >= 21238 and (self.state == (0, 0)): # print('Q: \n{} \n\nV: \n{}'.format(self.q, self.v)) # If next_state is a final position and not is register if is_final_state: # If not is register in V, register it if not self.v.get(next_state): self.v.update({ next_state: { # By default finals states has a zero vector with a zero index 0: self.initial_q_value } }) # S(state) -> All known states with its action for the position given. pair_action_states_known_by_state = self.s.get(self.state) # S(state, a) -> All known states for position and action given. states_known_by_state = pair_action_states_known_by_state.get( action, list()) # I_s_k relevant_indexes_of_next_state = self.relevant_indexes_of_state( state=next_state) # S_k in S_{n - 1} next_state_is_in_states_known = next_state in states_known_by_state # Check if sk not in S, and I_s_k is not empty if not next_state_is_in_states_known and relevant_indexes_of_next_state: # Q_n = N_n(state, a) self.new_operation(state=self.state, action=action, reward=reward, next_state=next_state) elif next_state_is_in_states_known: # Q_n = U_n(state, a) self.update_operation(state=self.state, action=action, reward=reward, next_state=next_state) # Check if is necessary update V(state) to improve the performance self.check_if_need_update_v() # Update position self.state = next_state return is_final_state
def calc_frontier_scalarized(self, p: Vector, q: Vector, solutions_known: list = None) -> list: """ This is a search_distance method to calc pareto'state frontier. Return a list of supported solutions costs, this method is only valid to two objectives problems. Applies a dichotomous search to find all supported solutions costs. :param solutions_known: If we know the possible solutions, we can indicate them to the algorithm to improve the training of the agent. If is None, then is ignored. :param p: 2D point :param q: 2D point :return: """ # A new list with p and q result = [p, q] # Create a new stack accumulate = list() # Push a vector with p and q in the stack accumulate.append(tuple(result)) while len(accumulate) > 0: # Pop the next pair of points from the stack. a, b = accumulate.pop() try: # Order points nearest to the center using euclidean distance. a, b = tuple(Vector.order_vectors_by_origin_nearest([a, b])) except ValueError: print('Error to unpack {} and {}'.format(a, b)) continue # Convert to vectors a, b = VectorDecimal(a), VectorDecimal(b) # Decompose points a_x, a_y = a b_x, b_y = b # Calculate the parameters of the new linear objective function (multiply by -1. to convert in maximize # problem) w1 = np.multiply(a_y - b_y, -1.) w2 = np.multiply(b_x - a_x, -1.) # Solve P to find a new solution ang get its cost vector c. c = self.find_c_vector(w1, w2, solutions_known=solutions_known) # Decompose c vector. c_x, c_y = c if (w1 * a_x + w2 * a_y) != (w1 * c_x + w2 * c_y) and c not in result: # c is the cost of a new supported solution # Push new pair in the stack accumulate.append((a, c)) # Push new pair in the stack accumulate.append((c, b)) # Add c to the result result.append(c) # Pareto'state frontier found self.pareto_frontier_found.append(c) return result
def test_m3_max_2_lists_with_repetitions(self): """ Testing m3_max function :return: """ # Prepare Vectors problems = [ ( # Problem self.first_quadrant[0], # Non-dominated uniques self.first_quadrant[1], # Dominated (duplicates included) self.first_quadrant[2] + [ VectorDecimal([2 + self.difference, 4 - self.difference]), VectorDecimal([0, 6]), VectorDecimal([4, 1]) ], # Non-dominated repeated [VectorDecimal([5 + self.difference, 3 - self.difference])]), ( # Problem self.second_quadrant[0], # Non-dominated uniques self.second_quadrant[1], # Dominated (duplicates included) self.second_quadrant[2] + [ VectorDecimal([-6, 6]), VectorDecimal([-4 + self.difference, 2 + self.difference]) ], # Non-dominated repeated [ VectorDecimal([-4 - self.difference, 7 + self.difference]), VectorDecimal([-1, 0]) ]), ( # Problem self.third_quadrant[0], # Non-dominated uniques self.third_quadrant[1], # Dominated (duplicates included) self.third_quadrant[2] + [ VectorDecimal([-7, -1]), VectorDecimal([-4 + self.difference, -2 + self.difference]) ], # Non-dominated repeated [ VectorDecimal([-2 - self.difference, -1 - self.difference]), VectorDecimal([-1, -4]) ]), ( # Problem self.fourth_quadrant[0], # Non-dominated uniques self.fourth_quadrant[1], # Dominated (duplicates included) self.fourth_quadrant[2] + [ VectorDecimal([7 + self.difference, -3 - self.difference]), VectorDecimal([2, -1]) ], # Non-dominated repeated [ VectorDecimal([10 + self.difference, -1 + self.difference]), VectorDecimal([10, -1]) ]), ( # Problem self.all_quadrants[0], # Non-dominated uniques self.all_quadrants[1], # Dominated (duplicates included) self.all_quadrants[2] + [ VectorDecimal([7 + self.difference, -3 - self.difference]), VectorDecimal([-7, -1]), VectorDecimal([-4 + self.difference, -2 + self.difference ]), VectorDecimal([-6, 6]), VectorDecimal([-4 + self.difference, 2 + self.difference]), VectorDecimal([0, 6]), VectorDecimal([4, 1]), VectorDecimal([-1, 0]), VectorDecimal([2 + self.difference, 4 - self.difference]), VectorDecimal([2, -1]), VectorDecimal([-2 - self.difference, -1 - self.difference ]), VectorDecimal([-1, -4]), ], # Non-dominated repeated [ VectorDecimal([10 + self.difference, -1 + self.difference]), VectorDecimal([10, -1]), VectorDecimal([-4 - self.difference, 7 + self.difference]), VectorDecimal([5 + self.difference, 3 - self.difference]) ]) ] for problem, solution_non_dominated_uniques, solution_dominated, solution_non_dominated_repeat in problems: # Apply m3_max_2_lists_with_repetitions algorithm non_dominated_unique, dominated, non_dominated_repeated = VectorDecimal.m3_max_2_lists_with_repetitions( vectors=problem) # While not is empty while non_dominated_unique: # Extract from non_dominated_unique list and remove from solution list solution_non_dominated_uniques.remove( non_dominated_unique.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution_non_dominated_uniques) # While not is empty while dominated: # Extract from dominated list and remove from solution list solution_dominated.remove(dominated.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution_dominated) # While not is empty while non_dominated_repeated: # Extract from non_dominated_repeat list and remove from solution list solution_non_dominated_repeat.remove( non_dominated_repeated.pop()) # After previous process if solution list have any element, then assert is failed. self.assertFalse(solution_non_dominated_repeat)
def test_transition_reward(self): # In this environment doesn't mind initial state to get the reward state = self.environment.observation_space.sample() # Doesn't mind action too. action = self.environment.action_space.sample() # A non-puddle state self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(1, 0) ), VectorDecimal((-1, 0)) ) # Another non-puddle state self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(4, 10) ), VectorDecimal((-1, 0)) ) # Another non-puddle state self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(15, 12) ), VectorDecimal((-1, 0)) ) # State in a border of puddle self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(9, 2) ), VectorDecimal((-1, -1)) ) # Another state in a border of puddle self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(9, 10) ), VectorDecimal((-1, -1)) ) # State in a puddle self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(8, 10) ), VectorDecimal((-1, -2)) ) # State in a corner of puddle self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(9, 3) ), VectorDecimal((-1, -2)) ) # Final state self.assertEqual( self.environment.transition_reward( state=state, action=action, next_state=(19, 0) ), VectorDecimal((10, 0)) )
def test_dominance(self): """ Testing dominance function :return: """ x = VectorDecimal([1, 2, 3]) y = VectorDecimal([4, 5, 6]) self.assertEqual(Dominance.is_dominated, VectorDecimal.dominance(x, y)) self.assertEqual(Dominance.dominate, VectorDecimal.dominance(y, x)) ################################################################################################################ x = VectorDecimal([10, -1]) y = VectorDecimal([2, -1]) self.assertEqual(Dominance.dominate, VectorDecimal.dominance(x, y)) self.assertEqual(Dominance.is_dominated, VectorDecimal.dominance(y, x)) ################################################################################################################ x = VectorDecimal([1, 2]) y = VectorDecimal([0, 3]) self.assertEqual(Dominance.otherwise, VectorDecimal.dominance(x, y)) ################################################################################################################ x = VectorDecimal([1.2, 10.00001]) y = VectorDecimal([1.20001, 10.]) # Are similar self.assertEqual(Dominance.equals, VectorDecimal.dominance(x, y)) ################################################################################################################ y = deepcopy(x) # Are equals self.assertEqual(Dominance.equals, VectorDecimal.dominance(x, y))
def setUp(self): # Vector configuration VectorDecimal.set_decimal_precision(decimal_precision=2) self.first_quadrant = ( [ # Problem VectorDecimal([0, 6]), VectorDecimal([1, 6]), VectorDecimal([2, 5]), VectorDecimal([2, 4]), VectorDecimal([2, 2]), VectorDecimal([3, 4]), VectorDecimal([4, 3]), VectorDecimal([4, 1]), VectorDecimal([5, 3]), VectorDecimal([5, 2]), VectorDecimal([6, 0]), # Repeats VectorDecimal([0, 6]), VectorDecimal([4, 1]), # Similar VectorDecimal([5 + self.difference, 3 - self.difference]), VectorDecimal([2 + self.difference, 4 - self.difference]), ], [ # Non-dominated VectorFloats VectorDecimal([1, 6]), VectorDecimal([2, 5]), VectorDecimal([3, 4]), VectorDecimal([5, 3]), VectorDecimal([6, 0]) ], [ # Dominated VectorFloats VectorDecimal([0, 6]), VectorDecimal([2, 4]), VectorDecimal([2, 2]), VectorDecimal([4, 3]), VectorDecimal([4, 1]), VectorDecimal([5, 2]), ]) self.second_quadrant = ( [ # Problem VectorDecimal([-1, 0]), VectorDecimal([-3, 4]), VectorDecimal([-4, 2]), VectorDecimal([-4, 7]), VectorDecimal([-6, 6]), VectorDecimal([-6, 0]), VectorDecimal([-8, 2]), # Repeats VectorDecimal([-1, 0]), VectorDecimal([-6, 6]), # Similar VectorDecimal([-4 + self.difference, 2 + self.difference]), VectorDecimal([-4 - self.difference, 7 + self.difference]), ], [ # Non-dominated VectorDecimal([-1, 0]), VectorDecimal([-4, 7]), VectorDecimal([-3, 4]), ], [ # Dominated VectorFloats VectorDecimal([-4, 2]), VectorDecimal([-6, 6]), VectorDecimal([-6, 0]), VectorDecimal([-8, 2]), ]) self.third_quadrant = ( [ # Problem VectorDecimal([-1, -4]), VectorDecimal([-2, -1]), VectorDecimal([-3, -6]), VectorDecimal([-4, -2]), VectorDecimal([-5, -4]), VectorDecimal([-7, -1]), # Repeats VectorDecimal([-1, -4]), VectorDecimal([-7, -1]), # Similar VectorDecimal([-2 - self.difference, -1 - self.difference]), VectorDecimal([-4 + self.difference, -2 + self.difference]), ], [ # Non-dominated VectorDecimal([-2, -1]), VectorDecimal([-1, -4]) ], [ # Dominated VectorFloats VectorDecimal([-3, -6]), VectorDecimal([-4, -2]), VectorDecimal([-5, -4]), VectorDecimal([-7, -1]), ]) self.fourth_quadrant = ( [ # Problem VectorDecimal([2, -1]), VectorDecimal([3, -2]), VectorDecimal([1, -4]), VectorDecimal([3, -5]), VectorDecimal([5, -6]), VectorDecimal([7, -3]), VectorDecimal([10, -1]), # Repeats VectorDecimal([2, -1]), VectorDecimal([10, -1]), # Similar VectorDecimal([7 + self.difference, -3 - self.difference]), VectorDecimal([10 + self.difference, -1 + self.difference]), ], [ # Non-dominated VectorDecimal([10, -1]) ], [ # Dominated VectorDecimal([2, -1]), VectorDecimal([3, -2]), VectorDecimal([1, -4]), VectorDecimal([3, -5]), VectorDecimal([5, -6]), VectorDecimal([7, -3]), ]) self.all_quadrants = ( # Problem self.first_quadrant[0] + self.second_quadrant[0] + self.third_quadrant[0] + self.fourth_quadrant[0], [ # Non-dominate VectorDecimal([-4, 7]), VectorDecimal([1, 6]), VectorDecimal([2, 5]), VectorDecimal([3, 4]), VectorDecimal([5, 3]), VectorDecimal([6, 0]), VectorDecimal([10, -1]) ], [ # Dominated VectorDecimal([0, 6]), VectorDecimal([2, 4]), VectorDecimal([2, 2]), VectorDecimal([4, 3]), VectorDecimal([4, 1]), VectorDecimal([5, 2]), VectorDecimal([-1, 0]), VectorDecimal([-3, 4]), VectorDecimal([-4, 2]), VectorDecimal([-6, 6]), VectorDecimal([-6, 0]), VectorDecimal([-8, 2]), VectorDecimal([-1, -4]), VectorDecimal([-2, -1]), VectorDecimal([-3, -6]), VectorDecimal([-4, -2]), VectorDecimal([-5, -4]), VectorDecimal([-7, -1]), VectorDecimal([2, -1]), VectorDecimal([1, -4]), VectorDecimal([3, -2]), VectorDecimal([3, -5]), VectorDecimal([5, -6]), VectorDecimal([7, -3]), ])
def test_all_close(self): """ Testing if two Vectors are similar :return: """ x = VectorDecimal([1, 2, 3, 4]) y = deepcopy(x) self.assertTrue(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1, .3]) y = VectorDecimal([1, .3]) self.assertTrue(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1.2, 10 + self.difference]) y = VectorDecimal([1.2 + self.difference, 10.]) self.assertTrue(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1.2 + self.difference, 10]) y = VectorDecimal([1.2, 10. + self.difference]) self.assertTrue(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1, .3]) y = VectorDecimal([1]) self.assertTrue(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1, .3]) y = VectorDecimal([.3]) self.assertFalse(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1, .3]) y = VectorDecimal([1, 4]) self.assertFalse(VectorDecimal.all_close(x, y)) ################################################################################################################ x = VectorDecimal([1, .3]) y = VectorDecimal([2, .3]) self.assertFalse(VectorDecimal.all_close(x, y))
def test_pow(self): """ Testing if override ** operator works! :return: """ x = VectorDecimal([1, 2, 3.]) y = VectorDecimal([0., -2., 1.]) self.assertEqual(VectorDecimal([1, 0.25, 3]), x**y) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) y = VectorDecimal([0., -3., 1.]) self.assertEqual(VectorDecimal([1, 0.125, 4]), x**y) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) self.assertEqual(VectorDecimal([9, 4, 16]), x**2) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) self.assertEqual(VectorDecimal([9, 4, 16]), x**2.) ################################################################################################################ x = VectorDecimal([1, 2, 3]) y = VectorDecimal([4, 5, 6, 7]) with self.assertRaises(ValueError): x**y y**x
def test_mul(self): """ Testing if override * operator works! :return: """ x = VectorDecimal([1, 2, 3.]) y = VectorDecimal([0., -2., 1.]) self.assertEqual(VectorDecimal([0, -4, 3]), x * y) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) y = VectorDecimal([0., -3., 1.]) self.assertEqual(VectorDecimal([0, -6, 4]), x * y) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) self.assertEqual(VectorDecimal([-6, 4, 8]), x * 2) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) self.assertEqual(VectorDecimal([-6, 4, 8]), x * 2.) ################################################################################################################ x = VectorDecimal([1, 2, 3]) y = VectorDecimal([4, 5, 6, 7]) with self.assertRaises(ValueError): x * y y * x
def test_sub(self): """ Testing if override - operator works! :return: """ x = VectorDecimal([1, 2, 3.]) y = VectorDecimal([0., -2., 1.]) self.assertEqual(VectorDecimal([1, 4., 2.]), x - y) ################################################################################################################ x = VectorDecimal([-3., 0., 4.]) y = VectorDecimal([0., -3., 5.]) self.assertEqual(VectorDecimal([-3, 3., -1.]), x - y) ################################################################################################################ x = VectorDecimal([1, 2, 3]) self.assertEqual(VectorDecimal([0, 1, 2]), x - 1) ################################################################################################################ x = VectorDecimal([1, 2, 3]) self.assertEqual(VectorDecimal([0, 1, 2]), x - 1.) ################################################################################################################ x = VectorDecimal([1, 2, 3]) y = VectorDecimal([4, 5, 6, 7]) with self.assertRaises(ValueError): x - y y - x
def test_add(self): """ Testing if override + operator works! :return: """ x = VectorDecimal([1, 2, 3.]) y = VectorDecimal([0., -2., 1.]) self.assertEqual(VectorDecimal([1, 0., 4.]), x + y) ################################################################################################################ x = VectorDecimal([-3., 2, 4.]) y = VectorDecimal([0., -3., 1.]) self.assertEqual(VectorDecimal([-3, -1., 5.]), x + y) ################################################################################################################ x = VectorDecimal([1, 2, 3]) self.assertEqual(VectorDecimal([2, 3, 4]), x + 1) ################################################################################################################ x = VectorDecimal([1, 2, 3]) self.assertEqual(VectorDecimal([2, 3, 4]), x + 1.) ################################################################################################################ x = VectorDecimal([1, 2, 3]) y = VectorDecimal([4, 5, 6, 7]) with self.assertRaises(ValueError): x + y y + x
def __init__( self, environment: Environment, hv_reference: Vector, alpha: float = 0.1, epsilon: float = 0.1, gamma: float = 1., seed: int = 0, states_to_observe: set = None, max_steps: int = None, evaluation_mechanism: EvaluationMechanism = EvaluationMechanism.HV, graph_types: set = None, initial_value: VectorDecimal = None, convergence_graph: bool = False): """ :param environment: An environment where agent does any operation. :param hv_reference: Reference vector to calc hypervolume :param alpha: Learning rate :param epsilon: Epsilon using in e-greedy policy, to explore more states. :param gamma: Discount factor :param seed: Seed used for np.random.RandomState method. :param states_to_observe: List of states from that we want to get a graphical output. :param max_steps: Limits of steps per episode. :param evaluation_mechanism: Evaluation mechanism used to calc best action to choose. Three values are available: 'C-PQL', 'PO-PQL', 'HV-PQL' :param graph_types: Set of types of graph to generate. :param initial_value: Vector with the algorithm begin to learn (by default zero vector). :param convergence_graph: If is True then algorithm collects data to draw a convergence graph. """ # Types to show a graphs if graph_types is None: graph_types = {GraphType.STEPS, GraphType.MEMORY} # Super call __init__ super().__init__(environment=environment, epsilon=epsilon, gamma=gamma, seed=seed, graph_types=graph_types, states_to_observe=states_to_observe, max_steps=max_steps, initial_value=initial_value) if initial_value is None: self.initial_q_value = VectorDecimal( self.environment.default_reward.zero_vector) # Learning factor assert 0 < alpha <= 1 self.alpha = alpha # Dictionary that stores all q values. # Key: position; Value: second level dictionary. # Second level dictionary: key: action; value: third level dictionary # Third level dictionary: key :index vector (element from cartesian product); # value: q-vector (instance of class IndexVector) self.q = dict() # States known by each position and action self.s = dict() # Return non dominate states for a position given self.v = dict() # Counter to indexes used by each pair (position, action) self.indexes_counter = dict() # Set of states that need be updated self.states_to_update = set() # Evaluation mechanism if evaluation_mechanism in (EvaluationMechanism.HV, EvaluationMechanism.PO, EvaluationMechanism.C): self.evaluation_mechanism = evaluation_mechanism else: raise ValueError('Evaluation mechanism does not valid.') self.hv_reference = hv_reference # Set if we want the graph of the convergence self.convergence_graph = convergence_graph self.convergence_graph_data = list()