def eval_genome( self, genome, return_dict=None, ): """ Evaluate a single genome in a pre-defined game-environment. :param genome: Tuple (genome_id, genome_class) :param return_dict: Dictionary used to return observations corresponding the genome """ # Split up genome by id and genome itself genome_id, genome = genome # Ask for each of the games the starting-state states = np.asarray([g.reset()[D_SENSOR_LIST] for g in self.games]) # Finished-state for each of the games is set to false finished = np.repeat(False, self.batch_size) # Create the network used to query on, initialize it with the first-game's readings (good approximation) net = make_net( genome=genome, genome_config=self.pop_config.genome, batch_size=self.batch_size, initial_read=states[0], ) # Start iterating the environments step_num = 0 while True: # Check if maximum iterations is reached if step_num == self.max_steps: break # Determine the actions made by the agent for each of the states actions = net(states) # Check if each game received an action assert len(actions) == len(self.games) for i, (g, a, f) in enumerate(zip(self.games, actions, finished)): # Ignore if game has finished if not f: # Proceed the game with one step, based on the predicted action obs = g.step(l=a[0], r=a[1]) finished[i] = obs[D_DONE] # Update the candidate's current state states[i] = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if all(finished): break step_num += 1 # Return the final observations if return_dict is not None: return_dict[genome_id] = [g.close() for g in self.games]
def get_positions(genome: Genome, gid: int, debug: bool = False, duration: int = 60): """Get the position of the genome at every 0.5 seconds during the given simulation.""" cfg = Config() cfg.game.duration = duration cfg.update() # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene ]) >= 1 # Get the game game = get_game(i=gid, cfg=cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=cfg.genome, batch_size=1, initial_read=state, ) # Containers to monitor position = [] target_found = [] score = 0 # Initialize the containers position.append(game.player.pos.get_tuple()) if debug: print(f"Step: {step_num}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> Score: {score!r}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == duration * cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers position.append(game.player.pos.get_tuple()) if debug: print(f"Step: {step_num}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> Score: {score!r}") return position, game
def main(population: Population, game_id: int, genome: Genome = None, game_cfg: Config = None, average: int = 1, debug: bool = False): """ Monitor the genome on the following elements: * Position * Hidden state of SRU (Ht) * Actuation of both wheels * Distance * Delta distance """ # Make sure all parameters are set if not genome: genome = population.best_genome if not game_cfg: game_cfg = pop.config # Check if valid genome (contains at least one hidden SRU, first SRU is monitored) - also possible for fixed RNNs a = len([n for n in genome.get_used_nodes().values() if type(n) == SimpleRnnNodeGene]) >= 1 b = len([n for n in genome.get_used_nodes().values() if type(n) == FixedRnnNodeGene]) >= 1 assert a or b # Get the game game = get_game(game_id, cfg=game_cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net(genome=genome, genome_config=population.config.genome, batch_size=1, initial_read=state, ) # Containers to monitor actuation = [] distance = [] delta_distance = [] position = [] Ht = [] target_found = [] score = 0 # Initialize the containers actuation.append([0, 0]) distance.append(state[0]) delta_distance.append(0) position.append(game.player.pos.get_tuple()) Ht.append(net.rnn_state[0, 0, 0]) if debug: print(f"Step: {step_num}") print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}") print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}") print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}") print(f"\t> SRU state: Ht={round(Ht[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == game_cfg.game.duration * game_cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers actuation.append(action[0]) distance.append(state[0]) delta_distance.append(distance[-2] - distance[-1]) position.append(game.player.pos.get_tuple()) Ht.append(net.rnn_state[0, 0, 0]) if debug: print(f"Step: {step_num}") print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}") print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}") print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}") print(f"\t> SRU state: Ht={round(Ht[-1], 5)}") if average > 1: # Average out the noise x, y = zip(*actuation) x = SMA(x, window=average) y = SMA(y, window=average) actuation = list(zip(x, y)) distance = SMA(distance, window=average) delta_distance = SMA(delta_distance, window=average) Ht = SMA(Ht, window=average) # Resolve weird artifacts at the beginning for i in range(average, 0, -1): actuation[i - 1] = actuation[i] distance[i - 1] = distance[i] delta_distance[i - 1] = delta_distance[i] Ht[i - 1] = Ht[i] # Visualize the monitored values path = get_subfolder(f"population{'_backup' if population.use_backup else ''}/" f"storage/" f"{population.folder_name}/" f"{population}/", "images") path = get_subfolder(path, f"monitor") path = get_subfolder(path, f"{genome.key}") path = get_subfolder(path, f"{game_id}") visualize_actuation(actuation, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}actuation.png") visualize_distance(distance, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}distance.png") visualize_hidden_state(Ht, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}hidden_state.png") visualize_position(position, game=game, save_path=f"{path}trace.png") merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
def monitor_activation(genome: Genome, gid: int, debug: bool = False, duration: int = 60): """ Monitor the activation of the candidate hidden state. Note: game is started again, no worries since deterministic. """ cfg = Config() cfg.game.duration = duration cfg.update() # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene ]) >= 1 # Get the game game = get_game(i=gid, cfg=cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=cfg.genome, batch_size=1, initial_read=state, ) # Containers to monitor Ht = [] Ht_tilde = [] target_found = [] score = 0 # Initialize the containers ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0], x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) if debug: print(f"Step: {step_num}") print(f"\t> Hidden state: {round(Ht[-1], 5)}") print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == duration * cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0], x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) if debug: print(f"Step: {step_num}") print(f"\t> Hidden state: {round(Ht[-1], 5)}") print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}") return Ht_tilde, Ht, target_found
def trace_genome( self, genome, return_dict=None, ): """ Get the trace of a single genome for a pre-defined game-environment. Due to performance reasons, only one trace-point is saved each second of simulation-time. :param genome: Tuple (genome_id, genome_class) :param return_dict: Dictionary used to return the traces corresponding the genome-game combination """ # Split up genome by id and genome itself genome_id, genome = genome # Ask for each of the games the starting-state states = np.asarray([g.reset()[D_SENSOR_LIST] for g in self.games]) # Initialize the traces traces = [[g.player.pos.get_tuple()] for g in self.games] # Finished-state for each of the games is set to false finished = np.repeat(False, self.batch_size) # Create the network used to query on, initialize it with the first-game's readings (good approximation) net = make_net( genome=genome, genome_config=self.pop_config.genome, batch_size=self.batch_size, initial_read=states[0], ) # Start iterating the environments step_num = 0 while True: # Check if maximum iterations is reached if step_num == self.max_steps: break # Determine the actions made by the agent for each of the states actions = net(states) # Check if each game received an action assert len(actions) == len(self.games) for i, (g, a, f) in enumerate(zip(self.games, actions, finished)): # Do not advance the player if target is reached if f: if step_num % self.game_config.game.fps == 0: traces[i].append(g.player.pos.get_tuple()) continue # Proceed the game with one step, based on the predicted action obs = g.step(l=a[0], r=a[1]) finished[i] = obs[D_DONE] # Update the candidate's current state states[i] = obs[D_SENSOR_LIST] # Update the trace if step_num % self.game_config.game.fps == 0: traces[i].append(g.player.pos.get_tuple()) # Next step step_num += 1 # Return the final observations if return_dict is not None: return_dict[genome_id] = traces
def main(population: Population, game_id: int, genome: Genome = None, game_cfg: Config = None, debug: bool = False): """ Monitor the genome on the following elements: * Position * Update gate (Zt) * Hidden state of GRU (Ht) * Actuation of both wheels * Distance """ # Make sure all parameters are set if not genome: genome = population.best_genome if not game_cfg: game_cfg = pop.config # Check if valid genome (contains at least one hidden GRU, first GRU is monitored) assert len([ n for n in genome.get_used_nodes().values() if type(n) == GruNoResetNodeGene ]) >= 1 # Get the game game = get_game(game_id, cfg=game_cfg, noise=False) state = game.reset()[D_SENSOR_LIST] step_num = 0 # Create the network net = make_net( genome=genome, genome_config=population.config.genome, batch_size=1, initial_read=state, ) # Containers to monitor actuation = [] distance = [] position = [] Ht = [] Ht_tilde = [] Zt = [] target_found = [] score = 0 # Initialize the containers actuation.append([0, 0]) distance.append(state[0]) position.append(game.player.pos.get_tuple()) ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) Zt.append(zt) if debug: print(f"Step: {step_num}") print( f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}" ) print(f"\t> Distance: {round(distance[-1], 5)}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> GRU states: " f"\t\tHt={round(Ht[-1], 5)}" f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}" f"\t\tZt={round(Zt[-1], 5)}") # Start monitoring while True: # Check if maximum iterations is reached if step_num == game_cfg.game.duration * game_cfg.game.fps: break # Determine the actions made by the agent for each of the states action = net(np.asarray([state])) # Check if each game received an action assert len(action) == 1 # Proceed the game with one step, based on the predicted action obs = game.step(l=action[0][0], r=action[0][1]) finished = obs[D_DONE] # Update the score-count if game.score > score: target_found.append(step_num) score = game.score # Update the candidate's current state state = obs[D_SENSOR_LIST] # Stop if agent reached target in all the games if finished: break step_num += 1 # Update the containers actuation.append(action[0]) distance.append(state[0]) position.append(game.player.pos.get_tuple()) ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state])) Ht.append(ht) Ht_tilde.append(ht_tilde) Zt.append(zt) if debug: print(f"Step: {step_num}") print( f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}" ) print(f"\t> Distance: {round(distance[-1], 5)}") print( f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}" ) print(f"\t> GRU states: " f"\t\tHt={round(Ht[-1], 5)}" f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}" f"\t\tZt={round(Zt[-1], 5)}") # Visualize the monitored values path = get_subfolder( f"population{'_backup' if population.use_backup else ''}/" f"storage/" f"{population.folder_name}/" f"{population}/", "images") path = get_subfolder(path, f"monitor") path = get_subfolder(path, f"{genome.key}") path = get_subfolder(path, f"{game_id}") visualize_actuation(actuation, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}actuation.png") visualize_distance(distance, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}distance.png") visualize_hidden_state(Ht, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}hidden_state.png") visualize_candidate_hidden_state( Ht_tilde, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}candidate_hidden_state.png") visualize_update_gate(Zt, target_found=target_found, game_cfg=game_cfg.game, save_path=f"{path}update_gate.png") visualize_position(position, game=game, save_path=f"{path}trace.png") merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
def visualize(self, genome, game_id: int): """ Visualize the performance of a single genome. :param genome: Tuple (genome_id, genome_class) :param game_id: ID of the game that will be used for evaluation """ # Create the requested game game: Game = get_game(game_id, cfg=self.game_config) self.p2m = game.game_config.p2m # Create space in which game will be played window = pyglet.window.Window( game.x_axis * self.p2m, game.y_axis * self.p2m, "Robot Simulator - Game {id:03d}".format(id=game_id), resizable=False, visible=True) window.set_location(100, 100) pyglet.gl.glClearColor(1, 1, 1, 1) # Setup the requested game self.state = game.reset()[D_SENSOR_LIST] self.finished = False self.score = 0 # Make the network used during visualization net = make_net( genome=genome, genome_config=self.pop_config.genome, batch_size=1, initial_read=self.state, ) # Create the visualize-environment space = pymunk.Space() options = DrawOptions() # Draw static objects - walls if game.wall_bound: x_axis = game.x_axis y_axis = game.y_axis corners = [(0, 0), (0, y_axis * self.p2m), (x_axis * self.p2m, y_axis * self.p2m), (x_axis * self.p2m, 0)] for c in range(4): wall_shape = pymunk.Segment(space.static_body, a=corners[c], b=corners[(c + 1) % 4], radius=0.1 * self.p2m) # 5cm walls wall_shape.color = (0, 0, 0) space.add(wall_shape) # Draw static objects - target target_body = pymunk.Body(body_type=pymunk.Body.KINEMATIC) target_body.position = game.target * self.p2m target_shape = pymunk.Circle(body=target_body, radius=game.bot_config.radius * self.p2m * 3) # TODO: Thick boi target_shape.sensor = True target_shape.color = (0, 128, 0) space.add(target_body, target_shape) # Init player m = pymunk.moment_for_circle(mass=2, inner_radius=0, outer_radius=game.bot_config.radius * self.p2m) player_body = pymunk.Body(mass=1, moment=m) player_body.position = game.player.pos * self.p2m player_body.angle = game.player.angle player_shape = pymunk.Circle(body=player_body, radius=game.bot_config.radius * self.p2m * 3) # TODO: Thick boi player_shape.color = (255, 0, 0) space.add(player_body, player_shape) label = pyglet.text.Label(f'{self.time}', font_size=16, color=(100, 100, 100, 100), x=window.width - 20, y=window.height - 20, anchor_x='center', anchor_y='center') @window.event def on_draw(): window.clear() label.draw() space.debug_draw(options=options) if self.mouse_enabled: @window.event def on_mouse_press(x, y, *_): # Add new circle on mouse-clicked position game.target.x = x / game.game_config.p2m game.target.y = y / game.game_config.p2m target_body.position = game.target * self.p2m def update_method(_): # Input dt ignored dt = 1 / game.game_config.fps self.time += dt label.text = str(int(self.time)) # Stop when target is reached if not self.finished: # Query the game for the next action action = net(np.asarray([self.state])) if self.debug: print(f"Passed time: {round(dt, 3)}") print( f"Location: x={round(player_body.position.x / self.p2m, 2)}, " f"y={round(player_body.position.y / self.p2m, 2)}") print(f"Orientation: {round(player_body.angle, 2)}") print("Action: lw={l}, rw={r}".format( l=round(action[0][0], 3), r=round(action[0][1], 3))) print("Observation:", [round(s, 3) for s in self.state]) # Progress game by one step obs = game.step_dt(dt=dt, l=action[0][0], r=action[0][1]) self.finished = obs[D_DONE] self.state = obs[D_SENSOR_LIST] # Update space's player coordinates and angle player_body.position = game.player.pos * self.p2m player_body.angle = game.player.angle # Check if score has increased if game.score > self.score: self.score = game.score target_body.position = game.target * self.p2m space.step(dt) # Run the game time.sleep(5) # TODO: Waiting time to start recording pyglet.clock.schedule_interval( update_method, 1.0 / (game.game_config.fps * self.speedup)) pyglet.app.run()