def train_agent( self, env: UnityEnvironment, agent: Agent, verbose: bool = True, exit_when_solved: bool = True, number_episodes: int = 1000, **kwargs, ) -> None: agent.set_train_mode(True) brain_name = env.brain_names[0] for _ in range(number_episodes): episode_number = len(self.scores) + 1 env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] score = 0 while True: action = agent.act(state) env_info = env.step(action)[brain_name] reward = env_info.rewards[0] next_state = env_info.vector_observations[0] done = env_info.local_done[0] agent.step(state, action, reward, next_state, done) score += reward state = next_state if done: break self.scores.append(score) average_score_window = np.mean( self.scores[-self.score_window_size:]) if verbose: print( f"\rEpisode {episode_number}\tAverage Score: {average_score_window:.2f}", end="", ) if episode_number % 100 == 0: print( f"\rEpisode {episode_number}\tAverage Score: {average_score_window:.2f}", ) if (exit_when_solved and episode_number >= self.score_window_size and average_score_window >= self.score_threshold): if verbose: print( f"\rEnvironment solved in {len(self.scores)} episodes! ", ) break
def loop( env: gym.Env, agent: agents.Agent, num_episodes: int, epsilon: float, should_learn: bool, ): wins = 0 episode = 1 observation = env.reset() env.render() while episode <= num_episodes: if random() < epsilon: action = env.action_space.sample() else: action = agent.act(observation) new_observation, reward, done, info = env.step(action) if should_learn: agent.learn(observation, action, new_observation, reward) env.render() print("\tEpisodes:", episode, "\tWins:", wins) print("\tWin Ratio:", wins / episode) if done: observation = env.reset() env.render() episode += 1 if reward: wins += 1 else: observation = new_observation env.close()
# Q learning Algorithm profits = np.zeros((ITER_BREAK + 2, NUM_EPISODES + 2)) for ep in range(NUM_EPISODES): print(ep) # 1: initialise Qs env.reset() agent1.reset() agent2.reset() iter_no = 0 s_next = 0 while True: iter_no += 1 eps = 1 - np.exp(-BETA * (iter_no)) # 2: agents choose actions simultanously. action1 = agent1.act(eps) action2 = agent2.act(eps) action = action1 * nA + action2 # 3: outcomes are calculated s = s_next s_next, reward_n, done, prob = env.step(action) # 4: Bellman updates agent1.value_update(s, action1, reward_n[0], s_next) agent2.value_update(s, action2, reward_n[1], s_next) profits[iter_no][ep] = reward_n[0] # 5: repeat until convergence if iter_no > ITER_BREAK or agent1.length_opt_act > CONV: if agent1.length_opt_act > CONV: print("yay") print(iter_no) break
class Environment: def __init__(self, a_params, p_params, f_params, vel, handlers=None, view=True, std_dev=0, frict=0.05): ''' Environment class that contains all necessary components to configure and run scenarios. a_params::dict -- parameters for the Blue Agent p_params::dict -- parameters for the Green Agent f_params::dict -- parameters for the Fireball vel::tuple -- velcoties associated with each agent in the scenario handlers::tuple -- optional collision handlers view::bool -- flag for whether you want to view the scenario or not frict::float -- friction value for pymunk physics std_dev::float -- standard deviation value for noisy counterfactual simulation ''' self.view = view self.std_dev = std_dev # Objects in environent self.agent = Agent(a_params['loc'][0], a_params['loc'][1], a_params['color'], a_params['coll'], a_params['moves']) self.patient = Agent(p_params['loc'][0], p_params['loc'][1], p_params['color'], p_params['coll'], p_params['moves']) self.fireball = Agent(f_params['loc'][0], f_params['loc'][1], f_params['color'], f_params['coll'], f_params['moves']) # Initial location of objects in environment self.p_loc = p_params['loc'] self.a_loc = a_params['loc'] self.f_loc = f_params['loc'] # Pymunk space friction self.friction = frict # Agent velocities self.vel = vel self.pf_lock = False self.af_lock = False self.ap_lock = False # Engine parameters self.space = None self.screen = None self.options = None self.clock = None # Collision handlers self.coll_handlers = [x for x in handlers] if handlers else handlers # Values needed for rendering the scenario in Blender self.tick = 0 self.agent_collision = None self.agent_patient_collision = None self.agent_fireball_collision = None self.patient_fireball_collision = 0 self.position_dict = {'agent': [], 'patient': [], 'fireball': []} self.screen_size = (1000, 600) # Configure and run environment self.configure() def configure(self): ''' Configuration method for Environments. Sets up the pymunk space for scenarios. ''' # Configure pymunk space and pygame engine parameters (if any) if self.view: pygame.init() self.screen = pygame.display.set_mode((1000, 600)) self.options = pymunk.pygame_util.DrawOptions(self.screen) self.clock = pygame.time.Clock() self.space = pymunk.Space() self.space.damping = self.friction # Configure collision handlers (if any) if self.coll_handlers: for ob1, ob2, rem in self.coll_handlers: ch = self.space.add_collision_handler(ob1, ob2) ch.data["surface"] = self.screen ch.post_solve = rem # Add agents to the pymunk space self.space.add(self.agent.body, self.agent.shape, self.patient.body, self.patient.shape, self.fireball.body, self.fireball.shape) def update_blender_values(self): ''' All scenarios are rendered in the physics engine Blender. In order to do this, we store relevant values such as object position, simulation tick count, and collision in a JSON file. This file is passed into a bash script that uses it to render the relevant scenario in Blender. This method is used to update the JSON files for each scenario. ''' # Append positional information to the dict self.position_dict['agent'].append({ 'x': self.agent.body.position[0], 'y': self.agent.body.position[1] }) self.position_dict['patient'].append({ 'x': self.patient.body.position[0], 'y': self.patient.body.position[1] }) self.position_dict['fireball'].append({ 'x': self.fireball.body.position[0], 'y': self.fireball.body.position[1] }) # Record when the Agent collides with someone else if handlers.PF_COLLISION and not self.pf_lock: self.agent_collision = self.tick self.pf_lock = True if handlers.AP_COLLISION and not self.ap_lock: self.agent_patient_collision = self.tick self.ap_lock = True if handlers.AF_COLLISION and not self.af_lock: self.agent_fireball_collision = self.tick self.af_lock = True def run(self, video=False, filename=""): ''' Forward method for Environments. Actually runs the scenarios you view on (or off) screen. video::bool -- whether you want to record the simulation filename::str -- the name of the video file ''' # Agent velocities a_vel, p_vel, f_vel = self.vel # Agent action generators (yield actions of agents) a_generator = self.agent.act(a_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) p_generator = self.patient.act(p_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) f_generator = self.fireball.act(f_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) # Running flag running = True # Video creation save_screen = make_video(self.screen) # Main loop. Run simulation until collision between Green Agent # and Fireball while running and not handlers.PF_COLLISION: try: # Generate the next tick in the simulation for each object next(a_generator) next(p_generator) next(f_generator) # Render space on screen (if requested) if self.view: self.screen.fill((255, 255, 255)) self.space.debug_draw(self.options) pygame.display.flip() self.clock.tick(50) self.space.step(1 / 50.0) # Update the values for the Blender JSON file self.update_blender_values() # Increment the simulation tick self.tick += 1 if video: next(save_screen) except Exception as e: running = False if self.view: pygame.quit() pygame.display.quit() # Record whether Green Agent and Fireball collision occurred self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0 # Reset collision handler handlers.PF_COLLISION = [] handlers.AP_COLLISION = [] handlers.AF_COLLISION = [] if video: vid_from_img(filename) def counterfactual_run(self, std_dev, video=False, filename=''): ''' Forward method for Environments. Actually runs the scenarios you view on (or off) screen. std_dev::float -- noise parameter for simulation video::bool -- whether you want to record the simulation filename::str -- file name for video ''' # We remove the agent from the environment self.space.remove(self.space.shapes[0]) self.space.remove(self.space.bodies[0]) # Reinitialize pygame pygame.init() # If viewing, draw simulaiton to screen if self.view: pygame.init() self.screen = pygame.display.set_mode((1000, 600)) self.options = pymunk.pygame_util.DrawOptions(self.screen) self.clock = pygame.time.Clock() # Set noise parameter self.std_dev = std_dev save_screen = make_video(self.screen) # Agent velocities _, p_vel, f_vel = self.vel # Counterfactual ticks for agents self.patient.counterfactual_tick = self.agent_patient_collision self.fireball.counterfactual_tick = self.agent_fireball_collision # Agent action generators (yield actions of agents) p_generator = self.patient.act(p_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) f_generator = self.fireball.act(f_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) # Running flag running = True # Main loop. Run simulation until collision between Green Agent # and Fireball while running and not handlers.PF_COLLISION: try: # Generate the next tick in the simulation for each object next(p_generator) next(f_generator) # Render space on screen (if requested) if self.view: self.screen.fill((255, 255, 255)) self.space.debug_draw(self.options) pygame.display.flip() self.clock.tick(50) self.space.step(1 / 50.0) # Update the values for the Blender JSON file self.update_blender_values() # Increment the simulation tick self.tick += 1 # Increment ticks in agents self.patient.tick = self.tick self.fireball.tick = self.tick if video: next(save_screen) except: running = False if self.view: pygame.quit() pygame.display.quit() # Record whether Green Agent and Fireball collision occurred self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0 # Reset collision handler handlers.PF_COLLISION = [] handlers.AP_COLLISION = [] handlers.AF_COLLISION = [] if video: vid_from_img(filename)