def load(): # agent_10 = AgentBN.load( # 'bn/models/rge_1583857516_0.01.bin' # ) # sorted_v_s_0_10 = sorted(agent_10.v[((2, 4), (0, 0))], key=lambda k: k[0]) # agent_15 = AgentBN.load( # 'bn/models/rge_1583857532_0.01.bin' # ) # sorted_v_s_0_15 = sorted(agent_15.v[((2, 4), (0, 0))], key=lambda k: k[0]) agent_30: AgentBN = AgentBN.load('bn/models/rge_1583857678_0.01.bin') v_s_0_30 = agent_30.v[((2, 4), (0, 0))] v_s_0_30_nd = Vector.m3_max(set(v_s_0_30)) # agent_10625 = AgentBN.load( # filename='bn/models/rge_1583924116_0.01.bin' # ) # sorted_v_s_0_10625 = sorted(agent_10625.v[((2, 4), (0, 0))], key=lambda k: k[0]) pass
def main(): # Get trained agent print('Training agent...') agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0), False) # Initial vectors v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Show information print('Vectors obtained after m3_max algorithm: ') print(vectors, end='\n\n') # Define a tolerance decimal_precision = 0.0000001 # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) print('Evaluating policies gotten...') # For each vector for vector in vectors: # Specify objective vector objective_vector = vector.copy() print('Recovering policy for objective vector: {}...'.format( objective_vector)) # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) print('Evaluating policy obtaining...', end='\n\n') # Train until converge with `decimal_precision` tolerance. policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=decimal_precision) # Save policy and it evaluation. simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
def main(): # Get trained agent agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0)) # agent: AgentBN = AgentBN.load( # filename='bn/models/rg_1584437328_0.005.bin' # ) v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=0.0000001) for vector in vectors: # Recreate the index objective vector. # objective_vector = IndexVector( # index=vector, vector=trained_agent.v[initial_state][vector] # ) objective_vector = vector.copy() # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=0.0000001) simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
from pathlib import Path from models import Vector vectors_10 = Vector.m3_max( set( map(Vector, [[-0.33, 0.0, 0.0], [-0.16, 0.39, 0.0], [-0.15, 0.0, 0.32], [-0.14, 0.39, 0.0], [-0.1, 0.0, 0.35], [-0.04, 0.35, 0.0], [0.0, 0.0, 0.39], [0.0, 0.0, 0.0]]))) vectors_15 = Vector.m3_max( set( map(Vector, [[-0.45, 0.0, 0.0], [-0.44, 0.0, 0.02], [-0.42, 0.06, 0.02], [-0.39, 0.14, 0.0], [-0.38, 0.15, 0.02], [-0.35, 0.14, 0.14], [-0.34, 0.17, 0.13], [-0.33, 0.0, 0.17], [-0.33, 0.14, 0.17], [-0.33, 0.16, 0.16], [-0.3, 0.27, 0.0], [-0.29, 0.23, 0.15], [-0.29, 0.22, 0.17], [-0.21, 0.27, 0.24], [-0.21, 0.25, 0.27], [-0.2, 0.23, 0.28], [-0.16, 0.44, 0.0], [-0.16, 0.03, 0.33], [-0.16, 0.29, 0.28], [-0.16, 0.28, 0.29], [-0.16, 0.43, 0.02], [-0.15, 0.44, 0.0], [-0.15, 0.0, 0.34], [-0.14, 0.43, 0.02], [-0.14, 0.28, 0.29], [-0.14, 0.29, 0.28], [-0.14, 0.26, 0.3], [-0.09, 0.0, 0.37], [-0.05, 0.24, 0.23], [-0.04, 0.23, 0.23], [0.0, 0.0, 0.39], [0.0, 0.0, 0.0], [0.0, 0.32, 0.0]]))) vectors_30 = Vector.m3_max( set( map(Vector, [[-0.57, 0.04, 0.04], [-0.57, 0.0, 0.0], [-0.57, 0.0, 0.04], [-0.57, 0.04, 0.0], [-0.53, 0.09, 0.04], [-0.52, 0.0, 0.09], [-0.52, 0.03, 0.09], [-0.51, 0.07, 0.09], [-0.24, 0.27, 0.26],
def filter_vectors(vectors: set) -> list: # ND[vectors] return Vector.m3_max(vectors=vectors)
def simulate_state(self, state: object) -> set: """ Given an state iterates about itself, extracting all possible actions, the reachable states from it, and calculating it reward. :param state: :return: """ # Set current state self.environment.current_state = state # Set of vectors vectors = set() # For each possible action for action in self.environment.action_space: # Get all reachable states reachable_states = self.environment.reachable_states(state=state, action=action) # Set of vectors total_vectors = set() # Associate states and vectors associate_states = list() associate_vectors = list() for reachable_state in reachable_states: # If next_state is unknown create with a zero vector set. if reachable_state not in self.states_vectors: self.states_vectors.update({ reachable_state: {self.environment.default_reward.zero_vector} }) # Calculate reward reward = self.environment.transition_reward( state=state, action=action, next_state=reachable_state) # Get previous vectors accumulated_vectors = set( map(lambda x: x + reward, self.states_vectors[reachable_state])) associate_states.append(reachable_state) associate_vectors.append(accumulated_vectors) # Add current vectors to total vectors total_vectors = total_vectors.union(accumulated_vectors) self.states_vectors.update({state: total_vectors}) # For each next state for product_vectors in itertools.product(*associate_vectors): # Extract zero vector vector = self.environment.default_reward.zero_vector for i, reward in enumerate(product_vectors): # Next state reachable_state = associate_states[i] # Calculate probability probability = self.environment.transition_probability( state=state, action=action, next_state=reachable_state) # Calc total vector vector += (reward * probability) # Add to set of vectors vectors.add(vector) if self.limited_precision: vectors = map( lambda x: un.round_with_precision(x, Vector.decimal_precision), vectors) # Return all vectors found return set(Vector.m3_max(vectors))