예제 #1
0
    def trace_genomes(self,
                      pop: Population,
                      given_genome: Genome = None,
                      parallel: bool = True):
        """
        Create blueprints that contain the walking-traces for all the requested mazes.

        :param pop: Population object
        :param given_genome: Single genomes for which the trace must be made
        :param parallel: Create the traces in parallel
        """
        multi_env = get_multi_env(pop=pop, game_config=self.game_config)
        if len(self.games) > 20 and given_genome is None:
            raise Exception(
                "It is not advised to evaluate on more than 20 at once")
        elif len(self.games) > 100:
            raise Exception(
                "It is not advised to evaluate on more than 100 at once")

        # Set the games for which traces will be made
        multi_env.set_games(self.games, noise=False)

        # Fetch the dictionary of genomes
        genomes = [(given_genome.key, given_genome)] if given_genome else list(
            iteritems(pop.population))

        if parallel:
            # Initialize the evaluation-pool
            pool = mp.Pool(mp.cpu_count() - self.unused_cpu)
            manager = mp.Manager()
            return_dict = manager.dict()

            # Evaluate the genomes
            for genome in genomes:
                pool.apply_async(func=multi_env.trace_genome,
                                 args=(genome, return_dict))
            pool.close()  # Close the pool
            pool.join()  # Postpone continuation until everything is finished
        else:  # Train sequentially
            return_dict = dict()
            for genome in tqdm(genomes, desc="sequential evaluating"):
                multi_env.trace_genome(genome, return_dict)

        # Create blueprint of final result
        game_objects = [get_game(g, cfg=self.game_config) for g in self.games]
        path = get_subfolder(
            f"population{'_backup' if pop.use_backup else ''}/storage/{pop.folder_name}/{pop}/",
            'images')
        path = get_subfolder(path, 'games')
        create_traces(
            traces=return_dict,
            games=game_objects,
            gen=pop.generation,
            save_path=path,
            save_name=f'trace_{given_genome.key}' if given_genome else 'trace',
        )
예제 #2
0
 def set_games(self, games: list, noise: bool = False):
     """
     Set the games-set with new games.
     
     :param games: List of Game-IDs
     :param noise: Add noise to the games
     """
     self.games = np.asarray(
         [get_game(g, cfg=self.game_config, noise=noise) for g in games])
     self.batch_size = len(games)
     if noise: [g.randomize() for g in self.games]
예제 #3
0
    def blueprint_genomes(self, pop: Population, parallel: bool = True):
        """
        Create blueprints for all the requested mazes.

        :param pop: Population object
        :param parallel: Evaluate the population in parallel
        """
        multi_env = get_multi_env(pop=pop, game_config=self.game_config)
        if len(self.games) > 100:
            raise Exception(
                "It is not advised to evaluate on more than 100 at once")

        multi_env.set_games(self.games, noise=False)

        # Fetch the dictionary of genomes
        genomes = list(iteritems(pop.population))

        if parallel:
            # Initialize the evaluation-pool
            pool = mp.Pool(mp.cpu_count() - self.unused_cpu)
            manager = mp.Manager()
            return_dict = manager.dict()

            # Evaluate the genomes
            for genome in genomes:
                pool.apply_async(func=multi_env.eval_genome,
                                 args=(genome, return_dict))
            pool.close()  # Close the pool
            pool.join()  # Postpone continuation until everything is finished
        else:  # Evaluate sequentially
            return_dict = dict()
            for genome in genomes:
                multi_env.eval_genome(genome, return_dict)

        # Create blueprint of final result
        game_objects = [get_game(g, cfg=self.game_config) for g in self.games]
        path = get_subfolder(
            f"population{'_backup' if pop.use_backup else ''}/storage/{pop.folder_name}/{pop}/",
            'images')
        path = get_subfolder(path, 'games')
        create_blueprints(
            final_observations=return_dict,
            games=game_objects,
            gen=pop.generation,
            save_path=path,
        )
예제 #4
0
if __name__ == '__main__':
    os.chdir("../..")
    cfg = Config()
    games = [0]  # Game Dummy

    # Experiment 1
    exp1_train, exp1_eval = get_game_ids(1)
    games += exp1_train[:1]  # Repeats itself
    games += exp1_eval

    # Experiment 2
    exp2_train, exp2_eval = get_game_ids(2)
    games += exp2_train[:1]  # Repeats itself
    games += exp2_eval

    # Experiment 3
    exp3_train, exp3_eval = get_game_ids(3)
    games += exp3_train[:1]  # Repeats itself
    games += exp3_eval

    # Experiment 6
    exp6_train, exp6_eval = get_game_ids(6)
    games += exp6_train[:1]  # Repeats itself
    games += exp6_eval

    # Create the visualizations
    for g_id in tqdm(games):
        g = get_game(g_id, cfg=cfg)
        game_blueprint(g, show=False)
예제 #5
0
def get_positions(genome: Genome,
                  gid: int,
                  debug: bool = False,
                  duration: int = 60):
    """Get the position of the genome at every 0.5 seconds during the given simulation."""
    cfg = Config()
    cfg.game.duration = duration
    cfg.update()

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene
    ]) >= 1

    # Get the game
    game = get_game(i=gid, cfg=cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=cfg.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    position = []
    target_found = []
    score = 0

    # Initialize the containers
    position.append(game.player.pos.get_tuple())
    if debug:
        print(f"Step: {step_num}")
        print(
            f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
        )
        print(f"\t> Score:    {score!r}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == duration * cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        position.append(game.player.pos.get_tuple())
        if debug:
            print(f"Step: {step_num}")
            print(
                f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
            )
            print(f"\t> Score:    {score!r}")
    return position, game
예제 #6
0
def main(population: Population,
         game_id: int,
         genome: Genome = None,
         game_cfg: Config = None,
         average: int = 1,
         debug: bool = False):
    """
    Monitor the genome on the following elements:
        * Position
        * Hidden state of SRU (Ht)
        * Actuation of both wheels
        * Distance
        * Delta distance
    """
    # Make sure all parameters are set
    if not genome: genome = population.best_genome
    if not game_cfg: game_cfg = pop.config
    
    # Check if valid genome (contains at least one hidden SRU, first SRU is monitored) - also possible for fixed RNNs
    a = len([n for n in genome.get_used_nodes().values() if type(n) == SimpleRnnNodeGene]) >= 1
    b = len([n for n in genome.get_used_nodes().values() if type(n) == FixedRnnNodeGene]) >= 1
    assert a or b
    
    # Get the game
    game = get_game(game_id, cfg=game_cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0
    
    # Create the network
    net = make_net(genome=genome,
                   genome_config=population.config.genome,
                   batch_size=1,
                   initial_read=state,
                   )
    
    # Containers to monitor
    actuation = []
    distance = []
    delta_distance = []
    position = []
    Ht = []
    target_found = []
    score = 0
    
    # Initialize the containers
    actuation.append([0, 0])
    distance.append(state[0])
    delta_distance.append(0)
    position.append(game.player.pos.get_tuple())
    Ht.append(net.rnn_state[0, 0, 0])
    if debug:
        print(f"Step: {step_num}")
        print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}")
        print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}")
        print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}")
        print(f"\t> SRU state: Ht={round(Ht[-1], 5)}")
    
    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == game_cfg.game.duration * game_cfg.game.fps: break
        
        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))
        
        # Check if each game received an action
        assert len(action) == 1
        
        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]
        
        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score
        
        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]
        
        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1
        
        # Update the containers
        actuation.append(action[0])
        distance.append(state[0])
        delta_distance.append(distance[-2] - distance[-1])
        position.append(game.player.pos.get_tuple())
        Ht.append(net.rnn_state[0, 0, 0])
        if debug:
            print(f"Step: {step_num}")
            print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}")
            print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}")
            print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}")
            print(f"\t> SRU state: Ht={round(Ht[-1], 5)}")
    
    if average > 1:
        # Average out the noise
        x, y = zip(*actuation)
        x = SMA(x, window=average)
        y = SMA(y, window=average)
        actuation = list(zip(x, y))
        distance = SMA(distance, window=average)
        delta_distance = SMA(delta_distance, window=average)
        Ht = SMA(Ht, window=average)
        
        # Resolve weird artifacts at the beginning
        for i in range(average, 0, -1):
            actuation[i - 1] = actuation[i]
            distance[i - 1] = distance[i]
            delta_distance[i - 1] = delta_distance[i]
            Ht[i - 1] = Ht[i]
    
    # Visualize the monitored values
    path = get_subfolder(f"population{'_backup' if population.use_backup else ''}/"
                         f"storage/"
                         f"{population.folder_name}/"
                         f"{population}/", "images")
    path = get_subfolder(path, f"monitor")
    path = get_subfolder(path, f"{genome.key}")
    path = get_subfolder(path, f"{game_id}")
    visualize_actuation(actuation,
                        target_found=target_found,
                        game_cfg=game_cfg.game,
                        save_path=f"{path}actuation.png")
    visualize_distance(distance,
                       target_found=target_found,
                       game_cfg=game_cfg.game,
                       save_path=f"{path}distance.png")
    visualize_hidden_state(Ht,
                           target_found=target_found,
                           game_cfg=game_cfg.game,
                           save_path=f"{path}hidden_state.png")
    visualize_position(position,
                       game=game,
                       save_path=f"{path}trace.png")
    merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
예제 #7
0
def monitor_activation(genome: Genome,
                       gid: int,
                       debug: bool = False,
                       duration: int = 60):
    """
    Monitor the activation of the candidate hidden state. Note: game is started again, no worries since deterministic.
    """
    cfg = Config()
    cfg.game.duration = duration
    cfg.update()

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene
    ]) >= 1

    # Get the game
    game = get_game(i=gid, cfg=cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=cfg.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    Ht = []
    Ht_tilde = []
    target_found = []
    score = 0

    # Initialize the containers
    ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0],
                                        x=np.asarray([state]))
    Ht.append(ht)
    Ht_tilde.append(ht_tilde)
    if debug:
        print(f"Step: {step_num}")
        print(f"\t> Hidden state: {round(Ht[-1], 5)}")
        print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == duration * cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0],
                                            x=np.asarray([state]))
        Ht.append(ht)
        Ht_tilde.append(ht_tilde)
        if debug:
            print(f"Step: {step_num}")
            print(f"\t> Hidden state: {round(Ht[-1], 5)}")
            print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}")
    return Ht_tilde, Ht, target_found
def main(population: Population,
         game_id: int,
         genome: Genome = None,
         game_cfg: Config = None,
         debug: bool = False):
    """
    Monitor the genome on the following elements:
        * Position
        * Update gate (Zt)
        * Hidden state of GRU (Ht)
        * Actuation of both wheels
        * Distance
    """
    # Make sure all parameters are set
    if not genome: genome = population.best_genome
    if not game_cfg: game_cfg = pop.config

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values()
        if type(n) == GruNoResetNodeGene
    ]) >= 1

    # Get the game
    game = get_game(game_id, cfg=game_cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=population.config.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    actuation = []
    distance = []
    position = []
    Ht = []
    Ht_tilde = []
    Zt = []
    target_found = []
    score = 0

    # Initialize the containers
    actuation.append([0, 0])
    distance.append(state[0])
    position.append(game.player.pos.get_tuple())
    ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state]))
    Ht.append(ht)
    Ht_tilde.append(ht_tilde)
    Zt.append(zt)
    if debug:
        print(f"Step: {step_num}")
        print(
            f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}"
        )
        print(f"\t> Distance: {round(distance[-1], 5)}")
        print(
            f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
        )
        print(f"\t> GRU states: "
              f"\t\tHt={round(Ht[-1], 5)}"
              f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}"
              f"\t\tZt={round(Zt[-1], 5)}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == game_cfg.game.duration * game_cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        actuation.append(action[0])
        distance.append(state[0])
        position.append(game.player.pos.get_tuple())
        ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state]))
        Ht.append(ht)
        Ht_tilde.append(ht_tilde)
        Zt.append(zt)
        if debug:
            print(f"Step: {step_num}")
            print(
                f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}"
            )
            print(f"\t> Distance: {round(distance[-1], 5)}")
            print(
                f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
            )
            print(f"\t> GRU states: "
                  f"\t\tHt={round(Ht[-1], 5)}"
                  f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}"
                  f"\t\tZt={round(Zt[-1], 5)}")

    # Visualize the monitored values
    path = get_subfolder(
        f"population{'_backup' if population.use_backup else ''}/"
        f"storage/"
        f"{population.folder_name}/"
        f"{population}/", "images")
    path = get_subfolder(path, f"monitor")
    path = get_subfolder(path, f"{genome.key}")
    path = get_subfolder(path, f"{game_id}")
    visualize_actuation(actuation,
                        target_found=target_found,
                        game_cfg=game_cfg.game,
                        save_path=f"{path}actuation.png")
    visualize_distance(distance,
                       target_found=target_found,
                       game_cfg=game_cfg.game,
                       save_path=f"{path}distance.png")
    visualize_hidden_state(Ht,
                           target_found=target_found,
                           game_cfg=game_cfg.game,
                           save_path=f"{path}hidden_state.png")
    visualize_candidate_hidden_state(
        Ht_tilde,
        target_found=target_found,
        game_cfg=game_cfg.game,
        save_path=f"{path}candidate_hidden_state.png")
    visualize_update_gate(Zt,
                          target_found=target_found,
                          game_cfg=game_cfg.game,
                          save_path=f"{path}update_gate.png")
    visualize_position(position, game=game, save_path=f"{path}trace.png")
    merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
    def visualize(self, genome, game_id: int):
        """
        Visualize the performance of a single genome.
        
        :param genome: Tuple (genome_id, genome_class)
        :param game_id: ID of the game that will be used for evaluation
        """
        # Create the requested game
        game: Game = get_game(game_id, cfg=self.game_config)
        self.p2m = game.game_config.p2m

        # Create space in which game will be played
        window = pyglet.window.Window(
            game.x_axis * self.p2m,
            game.y_axis * self.p2m,
            "Robot Simulator - Game {id:03d}".format(id=game_id),
            resizable=False,
            visible=True)
        window.set_location(100, 100)
        pyglet.gl.glClearColor(1, 1, 1, 1)

        # Setup the requested game
        self.state = game.reset()[D_SENSOR_LIST]
        self.finished = False
        self.score = 0

        # Make the network used during visualization
        net = make_net(
            genome=genome,
            genome_config=self.pop_config.genome,
            batch_size=1,
            initial_read=self.state,
        )

        # Create the visualize-environment
        space = pymunk.Space()
        options = DrawOptions()

        # Draw static objects - walls
        if game.wall_bound:
            x_axis = game.x_axis
            y_axis = game.y_axis
            corners = [(0, 0), (0, y_axis * self.p2m),
                       (x_axis * self.p2m, y_axis * self.p2m),
                       (x_axis * self.p2m, 0)]
            for c in range(4):
                wall_shape = pymunk.Segment(space.static_body,
                                            a=corners[c],
                                            b=corners[(c + 1) % 4],
                                            radius=0.1 * self.p2m)  # 5cm walls
                wall_shape.color = (0, 0, 0)
                space.add(wall_shape)

        # Draw static objects - target
        target_body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
        target_body.position = game.target * self.p2m
        target_shape = pymunk.Circle(body=target_body,
                                     radius=game.bot_config.radius * self.p2m *
                                     3)  # TODO: Thick boi
        target_shape.sensor = True
        target_shape.color = (0, 128, 0)
        space.add(target_body, target_shape)

        # Init player
        m = pymunk.moment_for_circle(mass=2,
                                     inner_radius=0,
                                     outer_radius=game.bot_config.radius *
                                     self.p2m)
        player_body = pymunk.Body(mass=1, moment=m)
        player_body.position = game.player.pos * self.p2m
        player_body.angle = game.player.angle
        player_shape = pymunk.Circle(body=player_body,
                                     radius=game.bot_config.radius * self.p2m *
                                     3)  # TODO: Thick boi
        player_shape.color = (255, 0, 0)
        space.add(player_body, player_shape)
        label = pyglet.text.Label(f'{self.time}',
                                  font_size=16,
                                  color=(100, 100, 100, 100),
                                  x=window.width - 20,
                                  y=window.height - 20,
                                  anchor_x='center',
                                  anchor_y='center')

        @window.event
        def on_draw():
            window.clear()
            label.draw()
            space.debug_draw(options=options)

        if self.mouse_enabled:

            @window.event
            def on_mouse_press(x, y, *_):
                # Add new circle on mouse-clicked position
                game.target.x = x / game.game_config.p2m
                game.target.y = y / game.game_config.p2m
                target_body.position = game.target * self.p2m

        def update_method(_):  # Input dt ignored
            dt = 1 / game.game_config.fps
            self.time += dt
            label.text = str(int(self.time))

            # Stop when target is reached
            if not self.finished:
                # Query the game for the next action
                action = net(np.asarray([self.state]))
                if self.debug:
                    print(f"Passed time: {round(dt, 3)}")
                    print(
                        f"Location: x={round(player_body.position.x / self.p2m, 2)}, "
                        f"y={round(player_body.position.y / self.p2m, 2)}")
                    print(f"Orientation: {round(player_body.angle, 2)}")
                    print("Action: lw={l}, rw={r}".format(
                        l=round(action[0][0], 3), r=round(action[0][1], 3)))
                    print("Observation:", [round(s, 3) for s in self.state])

                # Progress game by one step
                obs = game.step_dt(dt=dt, l=action[0][0], r=action[0][1])
                self.finished = obs[D_DONE]
                self.state = obs[D_SENSOR_LIST]

                # Update space's player coordinates and angle
                player_body.position = game.player.pos * self.p2m
                player_body.angle = game.player.angle

                # Check if score has increased
                if game.score > self.score:
                    self.score = game.score
                    target_body.position = game.target * self.p2m
            space.step(dt)

        # Run the game
        time.sleep(5)  # TODO: Waiting time to start recording
        pyglet.clock.schedule_interval(
            update_method, 1.0 / (game.game_config.fps * self.speedup))
        pyglet.app.run()