def trace_genomes(self, pop: Population, given_genome: Genome = None, parallel: bool = True): """ Create blueprints that contain the walking-traces for all the requested mazes. :param pop: Population object :param given_genome: Single genomes for which the trace must be made :param parallel: Create the traces in parallel """ multi_env = get_multi_env(pop=pop, game_config=self.game_config) if len(self.games) > 20 and given_genome is None: raise Exception( "It is not advised to evaluate on more than 20 at once") elif len(self.games) > 100: raise Exception( "It is not advised to evaluate on more than 100 at once") # Set the games for which traces will be made multi_env.set_games(self.games, noise=False) # Fetch the dictionary of genomes genomes = [(given_genome.key, given_genome)] if given_genome else list( iteritems(pop.population)) if parallel: # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - self.unused_cpu) manager = mp.Manager() return_dict = manager.dict() # Evaluate the genomes for genome in genomes: pool.apply_async(func=multi_env.trace_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished else: # Train sequentially return_dict = dict() for genome in tqdm(genomes, desc="sequential evaluating"): multi_env.trace_genome(genome, return_dict) # Create blueprint of final result game_objects = [get_game(g, cfg=self.game_config) for g in self.games] path = get_subfolder( f"population{'_backup' if pop.use_backup else ''}/storage/{pop.folder_name}/{pop}/", 'images') path = get_subfolder(path, 'games') create_traces( traces=return_dict, games=game_objects, gen=pop.generation, save_path=path, save_name=f'trace_{given_genome.key}' if given_genome else 'trace', )
def set_games(self, games: list, noise: bool = False): """ Set the games-set with new games. :param games: List of Game-IDs :param noise: Add noise to the games """ self.games = np.asarray( [get_game(g, cfg=self.game_config, noise=noise) for g in games]) self.batch_size = len(games) if noise: [g.randomize() for g in self.games]
def blueprint_genomes(self, pop: Population, parallel: bool = True): """ Create blueprints for all the requested mazes. :param pop: Population object :param parallel: Evaluate the population in parallel """ multi_env = get_multi_env(pop=pop, game_config=self.game_config) if len(self.games) > 100: raise Exception( "It is not advised to evaluate on more than 100 at once") multi_env.set_games(self.games, noise=False) # Fetch the dictionary of genomes genomes = list(iteritems(pop.population)) if parallel: # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - self.unused_cpu) manager = mp.Manager() return_dict = manager.dict() # Evaluate the genomes for genome in genomes: pool.apply_async(func=multi_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished else: # Evaluate sequentially return_dict = dict() for genome in genomes: multi_env.eval_genome(genome, return_dict) # Create blueprint of final result game_objects = [get_game(g, cfg=self.game_config) for g in self.games] path = get_subfolder( f"population{'_backup' if pop.use_backup else ''}/storage/{pop.folder_name}/{pop}/", 'images') path = get_subfolder(path, 'games') create_blueprints( final_observations=return_dict, games=game_objects, gen=pop.generation, save_path=path, )
if __name__ == '__main__': os.chdir("../..") cfg = Config() games = [0] # Game Dummy # Experiment 1 exp1_train, exp1_eval = get_game_ids(1) games += exp1_train[:1] # Repeats itself games += exp1_eval # Experiment 2 exp2_train, exp2_eval = get_game_ids(2) games += exp2_train[:1] # Repeats itself games += exp2_eval # Experiment 3 exp3_train, exp3_eval = get_game_ids(3) games += exp3_train[:1] # Repeats itself games += exp3_eval # Experiment 6 exp6_train, exp6_eval = get_game_ids(6) games += exp6_train[:1] # Repeats itself games += exp6_eval # Create the visualizations for g_id in tqdm(games): g = get_game(g_id, cfg=cfg) game_blueprint(g, show=False)
def get_positions(genome: Genome, gid: int, debug: bool = False, duration: int = 60): """Get the position of the genome at every 0.5 seconds during the given simulation.""" cfg = Config() cfg.game.duration = duration cfg.update() # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene ]) >= 1 # Get the game game = get_game(i=gid, cfg=cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=cfg.genome, batch_size=1, initial_read=state, ) # Containers to monitor position = [] target_found = [] score = 0 # Initialize the containers position.append(game.player.pos.get_tuple()) if debug: print(f"Step: {step_num}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> Score: {score!r}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == duration * cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers position.append(game.player.pos.get_tuple()) if debug: print(f"Step: {step_num}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> Score: {score!r}") return position, game
def main(population: Population, game_id: int, genome: Genome = None, game_cfg: Config = None, average: int = 1, debug: bool = False): """ Monitor the genome on the following elements: * Position * Hidden state of SRU (Ht) * Actuation of both wheels * Distance * Delta distance """ # Make sure all parameters are set if not genome: genome = population.best_genome if not game_cfg: game_cfg = pop.config # Check if valid genome (contains at least one hidden SRU, first SRU is monitored) - also possible for fixed RNNs a = len([n for n in genome.get_used_nodes().values() if type(n) == SimpleRnnNodeGene]) >= 1 b = len([n for n in genome.get_used_nodes().values() if type(n) == FixedRnnNodeGene]) >= 1 assert a or b # Get the game game = get_game(game_id, cfg=game_cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net(genome=genome, genome_config=population.config.genome, batch_size=1, initial_read=state, ) # Containers to monitor actuation = [] distance = [] delta_distance = [] position = [] Ht = [] target_found = [] score = 0 # Initialize the containers actuation.append([0, 0]) distance.append(state[0]) delta_distance.append(0) position.append(game.player.pos.get_tuple()) Ht.append(net.rnn_state[0, 0, 0]) if debug: print(f"Step: {step_num}") print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}") print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}") print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}") print(f"\t> SRU state: Ht={round(Ht[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == game_cfg.game.duration * game_cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers actuation.append(action[0]) distance.append(state[0]) delta_distance.append(distance[-2] - distance[-1]) position.append(game.player.pos.get_tuple()) Ht.append(net.rnn_state[0, 0, 0]) if debug: print(f"Step: {step_num}") print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}") print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}") print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}") print(f"\t> SRU state: Ht={round(Ht[-1], 5)}") if average > 1: # Average out the noise x, y = zip(*actuation) x = SMA(x, window=average) y = SMA(y, window=average) actuation = list(zip(x, y)) distance = SMA(distance, window=average) delta_distance = SMA(delta_distance, window=average) Ht = SMA(Ht, window=average) # Resolve weird artifacts at the beginning for i in range(average, 0, -1): actuation[i - 1] = actuation[i] distance[i - 1] = distance[i] delta_distance[i - 1] = delta_distance[i] Ht[i - 1] = Ht[i] # Visualize the monitored values path = get_subfolder(f"population{'_backup' if population.use_backup else ''}/" f"storage/" f"{population.folder_name}/" f"{population}/", "images") path = get_subfolder(path, f"monitor") path = get_subfolder(path, f"{genome.key}") path = get_subfolder(path, f"{game_id}") visualize_actuation(actuation, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}actuation.png") visualize_distance(distance, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}distance.png") visualize_hidden_state(Ht, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}hidden_state.png") visualize_position(position, game=game, save_path=f"{path}trace.png") merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
def monitor_activation(genome: Genome, gid: int, debug: bool = False, duration: int = 60): """ Monitor the activation of the candidate hidden state. Note: game is started again, no worries since deterministic. """ cfg = Config() cfg.game.duration = duration cfg.update() # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene ]) >= 1 # Get the game game = get_game(i=gid, cfg=cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=cfg.genome, batch_size=1, initial_read=state, ) # Containers to monitor Ht = [] Ht_tilde = [] target_found = [] score = 0 # Initialize the containers ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0], x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) if debug: print(f"Step: {step_num}") print(f"\t> Hidden state: {round(Ht[-1], 5)}") print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == duration * cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0], x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) if debug: print(f"Step: {step_num}") print(f"\t> Hidden state: {round(Ht[-1], 5)}") print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}") return Ht_tilde, Ht, target_found
def main(population: Population, game_id: int, genome: Genome = None, game_cfg: Config = None, debug: bool = False): """ Monitor the genome on the following elements: * Position * Update gate (Zt) * Hidden state of GRU (Ht) * Actuation of both wheels * Distance """ # Make sure all parameters are set if not genome: genome = population.best_genome if not game_cfg: game_cfg = pop.config # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNoResetNodeGene ]) >= 1 # Get the game game = get_game(game_id, cfg=game_cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=population.config.genome, batch_size=1, initial_read=state, ) # Containers to monitor actuation = [] distance = [] position = [] Ht = [] Ht_tilde = [] Zt = [] target_found = [] score = 0 # Initialize the containers actuation.append([0, 0]) distance.append(state[0]) position.append(game.player.pos.get_tuple()) ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) Zt.append(zt) if debug: print(f"Step: {step_num}") print( f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}" ) print(f"\t> Distance: {round(distance[-1], 5)}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> GRU states: " f"\t\tHt={round(Ht[-1], 5)}" f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}" f"\t\tZt={round(Zt[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == game_cfg.game.duration * game_cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers actuation.append(action[0]) distance.append(state[0]) position.append(game.player.pos.get_tuple()) ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) Zt.append(zt) if debug: print(f"Step: {step_num}") print( f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}" ) print(f"\t> Distance: {round(distance[-1], 5)}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> GRU states: " f"\t\tHt={round(Ht[-1], 5)}" f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}" f"\t\tZt={round(Zt[-1], 5)}") # Visualize the monitored values path = get_subfolder( f"population{'_backup' if population.use_backup else ''}/" f"storage/" f"{population.folder_name}/" f"{population}/", "images") path = get_subfolder(path, f"monitor") path = get_subfolder(path, f"{genome.key}") path = get_subfolder(path, f"{game_id}") visualize_actuation(actuation, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}actuation.png") visualize_distance(distance, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}distance.png") visualize_hidden_state(Ht, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}hidden_state.png") visualize_candidate_hidden_state( Ht_tilde, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}candidate_hidden_state.png") visualize_update_gate(Zt, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}update_gate.png") visualize_position(position, game=game, save_path=f"{path}trace.png") merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
def visualize(self, genome, game_id: int): """ Visualize the performance of a single genome. :param genome: Tuple (genome_id, genome_class) :param game_id: ID of the game that will be used for evaluation """ # Create the requested game game: Game = get_game(game_id, cfg=self.game_config) self.p2m = game.game_config.p2m # Create space in which game will be played window = pyglet.window.Window( game.x_axis * self.p2m, game.y_axis * self.p2m, "Robot Simulator - Game {id:03d}".format(id=game_id), resizable=False, visible=True) window.set_location(100, 100) pyglet.gl.glClearColor(1, 1, 1, 1) # Setup the requested game self.state = game.reset()[D_SENSOR_LIST] self.finished = False self.score = 0 # Make the network used during visualization net = make_net( genome=genome, genome_config=self.pop_config.genome, batch_size=1, initial_read=self.state, ) # Create the visualize-environment space = pymunk.Space() options = DrawOptions() # Draw static objects - walls if game.wall_bound: x_axis = game.x_axis y_axis = game.y_axis corners = [(0, 0), (0, y_axis * self.p2m), (x_axis * self.p2m, y_axis * self.p2m), (x_axis * self.p2m, 0)] for c in range(4): wall_shape = pymunk.Segment(space.static_body, a=corners[c], b=corners[(c + 1) % 4], radius=0.1 * self.p2m) # 5cm walls wall_shape.color = (0, 0, 0) space.add(wall_shape) # Draw static objects - target target_body = pymunk.Body(body_type=pymunk.Body.KINEMATIC) target_body.position = game.target * self.p2m target_shape = pymunk.Circle(body=target_body, radius=game.bot_config.radius * self.p2m * 3) # TODO: Thick boi target_shape.sensor = True target_shape.color = (0, 128, 0) space.add(target_body, target_shape) # Init player m = pymunk.moment_for_circle(mass=2, inner_radius=0, outer_radius=game.bot_config.radius * self.p2m) player_body = pymunk.Body(mass=1, moment=m) player_body.position = game.player.pos * self.p2m player_body.angle = game.player.angle player_shape = pymunk.Circle(body=player_body, radius=game.bot_config.radius * self.p2m * 3) # TODO: Thick boi player_shape.color = (255, 0, 0) space.add(player_body, player_shape) label = pyglet.text.Label(f'{self.time}', font_size=16, color=(100, 100, 100, 100), x=window.width - 20, y=window.height - 20, anchor_x='center', anchor_y='center') @window.event def on_draw(): window.clear() label.draw() space.debug_draw(options=options) if self.mouse_enabled: @window.event def on_mouse_press(x, y, *_): # Add new circle on mouse-clicked position game.target.x = x / game.game_config.p2m game.target.y = y / game.game_config.p2m target_body.position = game.target * self.p2m def update_method(_): # Input dt ignored dt = 1 / game.game_config.fps self.time += dt label.text = str(int(self.time)) # Stop when target is reached if not self.finished: # Query the game for the next action action = net(np.asarray([self.state])) if self.debug: print(f"Passed time: {round(dt, 3)}") print( f"Location: x={round(player_body.position.x / self.p2m, 2)}, " f"y={round(player_body.position.y / self.p2m, 2)}") print(f"Orientation: {round(player_body.angle, 2)}") print("Action: lw={l}, rw={r}".format( l=round(action[0][0], 3), r=round(action[0][1], 3))) print("Observation:", [round(s, 3) for s in self.state]) # Progress game by one step obs = game.step_dt(dt=dt, l=action[0][0], r=action[0][1]) self.finished = obs[D_DONE] self.state = obs[D_SENSOR_LIST] # Update space's player coordinates and angle player_body.position = game.player.pos * self.p2m player_body.angle = game.player.angle # Check if score has increased if game.score > self.score: self.score = game.score target_body.position = game.target * self.p2m space.step(dt) # Run the game time.sleep(5) # TODO: Waiting time to start recording pyglet.clock.schedule_interval( update_method, 1.0 / (game.game_config.fps * self.speedup)) pyglet.app.run()