def objective_training(self, reference_vectors: list, graph_type: GraphType = None): """ Train until V(s0) value is close to objective value. :param graph_type: :param reference_vectors: :return: """ # Calc current hypervolume current_hypervolume = self._best_hypervolume( self.environment.initial_state) # Calc objective hypervolume objective_hypervolume = uh.calc_hypervolume( vectors=reference_vectors, reference=self.hv_reference) # While current hypervolume is different to objective hypervolume (With a tolerance indicates by # Vector.decimal_precision) do an episode. while not math.isclose(a=current_hypervolume, b=objective_hypervolume, rel_tol=Vector.decimal_precision): # Do an episode self.episode(graph_type=graph_type) # Update hypervolume current_hypervolume = self._best_hypervolume( self.environment.initial_state)
def calculate_hypervolume(self): """ Calc the hypervolume for each action and returns a list of tuples (maximum-hv, [(action, hypervolume)*], summation-hv) :return: """ result = list() maximum = float('-inf') summation = 0 # for each a in actions for a in self.environment.action_space: # Get Q-set from position given for each possible action. q_set = self.q_set(state=self.state, action=a) # Calc hypervolume of Q_set, with reference given, and store in list with action hv = uh.calc_hypervolume(vectors=q_set, reference=self.hv_reference) result.append((a, hv)) if hv > maximum: maximum = hv summation += hv return maximum, result, summation
def _best_hypervolume(self, state: object = None) -> float: """ Return best hypervolume for position given. :return: """ # Check if a position is given state = state if state else self.environment.current_state # Hypervolume list hv = list() # Save previous position previous_state = self.environment.current_state self.environment.current_state = state for a in self.environment.action_space: # Get Q-set from position given for each possible action. q_set = self.q_set(state=state, action=a) # Calc hypervolume of Q_set, with reference given. hv.append( uh.calc_hypervolume(vectors=q_set, reference=self.hv_reference)) # Restore environment correct position self.environment.current_state = previous_state return max(hv)
def chv_evaluation(self, state: object) -> int: """ Calc the hypervolume for the vectors that provide cardinality for each action and returns a tuple (maximum_chv, list of tuples (action, chv), sum of chv) CAUTION: This method assumes actions are integers in a range. :param state: :return: """ # List of all Qs all_q = list() # Getting action_space action_space = self.environment.action_space # for each a in actions for a in action_space: # Get Q-set from position given for each possible action. q_set = self.q_set(state=state, action=a) # for each Q in Q_set(state, a) for q in q_set: all_q.append(IndexVector(index=a, vector=q)) # NDQs <- ND(all_q). Keep only the non-dominating solutions (We want the vectors, so return_vectors must be # True) vectors_dict = IndexVector.actions_occurrences_based_m3_with_repetitions( vectors=all_q, actions=action_space, returns_vectors=True) # Dict where each action has it hypervolume hypervolume_actions = { action: uh.calc_hypervolume(vectors=vectors, reference=self.hv_reference) if len(vectors) > 0 else 0.0 for action, vectors in vectors_dict.items() } # Get max hypervolume max_hypervolume = max(hypervolume_actions.values()) # Get all max actions filter_actions = [ action for action in hypervolume_actions.keys() if hypervolume_actions[action] == max_hypervolume ] # Choose randomly among actions with maximum hypervolume return self.generator.choice(filter_actions)
def hv_graph(data: dict): # Prepare hypervolume to dumps data hv_file = Path(__file__).parent.joinpath('article/output/hv.m') # If any parents doesn't exist, make it. hv_file.parent.mkdir(parents=True, exist_ok=True) with hv_file.open(mode='w+', encoding='UTF-8') as file: file_data = 'figure;\n' file_data += 'hold on;\n\n' file_data += "title('Hypervolume');\n\n" labels = dict() for columns, values in data.items(): for label, information in values.items(): # Convert to vectors vectors = list(map(Vector, information['vectors']['(0, 0)'])) # Calculate hypervolume element = uh.calc_hypervolume(vectors=vectors, reference=vector_reference) # Get previous data previous_data = labels.get(label, {columns: element}) if columns not in previous_data: previous_data.update({columns: element}) # Update labels information labels.update({label: previous_data}) for label, information in labels.items(): file_data += 'X = [{}];\n'.format(', '.join( map(str, information.keys()))) file_data += 'Y = [{}];\n'.format(', '.join( map(str, information.values()))) file_data += "plot(X, Y, 'Color', '{}', 'Marker', '{}');\n\n".format( line_config[label]['color'], line_config[label]['marker']) file_data += "x_label('# of diagonals');\n" file_data += "y_label('Hypervolume');\n" file_data += "\n" file_data += 'legend({});\n'.format(', '.join( "'{}'".format(label) for label in labels.keys())) file_data += 'hold off;\n' file.write(file_data)
def calculate_chv(self): """ Calc the hypervolume for the vectors that provide cardinality for each action and returns a tuple (maximum_chv, list of tuples (action, chv), sum of chv) CAUTION: This method assumes actions are integers in a range. :return: """ # List of all Qs all_q = list() # Getting action_space action_space = self.environment.action_space # for each a in actions for a in action_space: # Get Q-set from position given for each possible action. q_set = self.q_set(state=self.state, action=a) # for each Q in Q_set(state, a) for q in q_set: all_q.append(IndexVector(index=a, vector=q)) # NDQs <- ND(all_q). Keep only the non-dominating solutions (We want the vectors, so return_vectors must be # True) vectors = IndexVector.actions_occurrences_based_m3_with_repetitions( vectors=all_q, actions=action_space, returns_vectors=True) result = [] maximum = -1 summation = 0 for a in action_space: chv = 0 if len(vectors[a]) > 0: chv = uh.calc_hypervolume(vectors=vectors[a], reference=self.hv_reference) result.append((a, chv)) maximum = max(maximum, chv) summation += chv return maximum, result, summation
def _best_hypervolume(self, state: object = None) -> float: """ Return best hypervolume for position given. :return: """ # Check if a position is given state = state if state else self.environment.current_state # Get Q-set from position given for each possible action v = list(self.v.get(state, {}).values()) # If v is empty, default is initial_value variable. v = v if v else [self.initial_q_value] # Getting hypervolume hv = uh.calc_hypervolume(vectors=v, reference=self.hv_reference) return hv
def hypervolume_evaluation(self, state: object) -> int: """ Calc the hypervolume for each action in position given. (HV-PQL) :param state: :return: """ actions = list() max_evaluation = float('-inf') # Getting action_space action_space = self.environment.action_space # for each a in actions for a in action_space: # Get Q-set from position given for each possible action q_set = self.q.get(state, dict()).get( a, {(0, ): IndexVector(index=0, vector=self.initial_q_value)}) # Filter vector from index vectors q_set = [q.vector for q in q_set.values()] # Calc hypervolume of Q_set, with reference given evaluation = uh.calc_hypervolume(vectors=q_set, reference=self.hv_reference) # If current value is close to new value if math.isclose(a=evaluation, b=max_evaluation): # Append another possible action actions.append(a) elif evaluation > max_evaluation: # Create a new list with current key actions = [a] # Update max value max_evaluation = max(max_evaluation, evaluation) # from best actions get one randomly return self.generator.choice(actions)
def update_graph(self, graph_type: GraphType): """ Update specific graph type :param graph_type: :return: """ for state, data in self.graph_info[graph_type].items(): # Calc pareto'state frontier found if not self.pareto_frontier_found: value = self.initial_q_value else: value = uh.calc_hypervolume(vectors=self.pareto_frontier_found, reference=self.hv_reference) # Add to graph train_data data.append(value) # Update dictionary self.graph_info[graph_type].update({state: data})
def hypervolume_evaluation(self, state: object) -> int: """ Calc the hypervolume for each action in the given position, and returns the int representing the action with maximum hypervolume. (Approximate) ties are broken choosing randomly among actions with (approximately) maximum hypervolume. (EvaluationMechanism.HV) :param state: :return: """ actions = list() max_evaluation = float('-inf') # for each a in actions for a in self.environment.action_space: # Get Q-set from position given for each possible action. q_set = self.q_set(state=state, action=a) # Calc hypervolume of Q_set, with reference given. evaluation = uh.calc_hypervolume(vectors=q_set, reference=self.hv_reference) # If current value is close to new value if math.isclose(a=evaluation, b=max_evaluation): # Append another possible action actions.append(a) elif evaluation > max_evaluation: # Create a new list with current key. actions = [a] # Update max value max_evaluation = max(max_evaluation, evaluation) # from best actions get one aleatory. return self.generator.choice(actions)
def has_converged(self, v_k: dict, v_k_1: dict, tolerance: float) -> bool: """ Check if a policy has converged :param v_k: :param v_k_1: :param tolerance: :return: """ # By default converged = False if self.convergence_graph: # List of differences differences = list() for key, vectors_v_k_s in v_k.items(): # Recover vectors from both V'state vectors_v_k_1_s = v_k_1[key] # If the checks get here, we calculate the hypervolume hv_v_k = uh.calc_hypervolume( vectors=vectors_v_k_s, reference=self.environment.hv_reference) hv_v_k_1 = uh.calc_hypervolume( vectors=vectors_v_k_1_s, reference=self.environment.hv_reference) # Check if absolute difference is lower than tolerance differences.append(abs(hv_v_k_1 - hv_v_k)) max_difference = max(differences) converged = max_difference < tolerance self.convergence_graph_data.append(max_difference) else: for key, vectors_v_k_s in v_k.items(): # If all checks are right, convergence will be True, but at the moment... converged = False # Recover vectors from both V'state vectors_v_k_1_s = v_k_1[key] # V_k(state) and V_K_1(state) has different lengths if not (len(vectors_v_k_s) == len(vectors_v_k_1_s)): break # If the checks get here, we calculate the hypervolume hv_v_k = uh.calc_hypervolume( vectors=vectors_v_k_s, reference=self.environment.hv_reference) hv_v_k_1 = uh.calc_hypervolume( vectors=vectors_v_k_1_s, reference=self.environment.hv_reference) # Check if absolute difference is lower than tolerance converged = abs(hv_v_k_1 - hv_v_k) < tolerance # If difference between HV(V_k(state)) and HV(V_k_1(state)) is greater than tolerance, not converged if not converged: break return converged