コード例 #1
0
ファイル: env_multi.py プロジェクト: RubenPants/EvolvableRNN
    def eval_genome(
        self,
        genome,
        return_dict=None,
    ):
        """
        Evaluate a single genome in a pre-defined game-environment.
        
        :param genome: Tuple (genome_id, genome_class)
        :param return_dict: Dictionary used to return observations corresponding the genome
        """
        # Split up genome by id and genome itself
        genome_id, genome = genome

        # Ask for each of the games the starting-state
        states = np.asarray([g.reset()[D_SENSOR_LIST] for g in self.games])

        # Finished-state for each of the games is set to false
        finished = np.repeat(False, self.batch_size)

        # Create the network used to query on, initialize it with the first-game's readings (good approximation)
        net = make_net(
            genome=genome,
            genome_config=self.pop_config.genome,
            batch_size=self.batch_size,
            initial_read=states[0],
        )

        # Start iterating the environments
        step_num = 0
        while True:
            # Check if maximum iterations is reached
            if step_num == self.max_steps: break

            # Determine the actions made by the agent for each of the states
            actions = net(states)

            # Check if each game received an action
            assert len(actions) == len(self.games)

            for i, (g, a, f) in enumerate(zip(self.games, actions, finished)):
                # Ignore if game has finished
                if not f:
                    # Proceed the game with one step, based on the predicted action
                    obs = g.step(l=a[0], r=a[1])
                    finished[i] = obs[D_DONE]

                    # Update the candidate's current state
                    states[i] = obs[D_SENSOR_LIST]

            # Stop if agent reached target in all the games
            if all(finished): break
            step_num += 1

        # Return the final observations
        if return_dict is not None:
            return_dict[genome_id] = [g.close() for g in self.games]
コード例 #2
0
ファイル: trace.py プロジェクト: RubenPants/EvolvableRNN
def get_positions(genome: Genome,
                  gid: int,
                  debug: bool = False,
                  duration: int = 60):
    """Get the position of the genome at every 0.5 seconds during the given simulation."""
    cfg = Config()
    cfg.game.duration = duration
    cfg.update()

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene
    ]) >= 1

    # Get the game
    game = get_game(i=gid, cfg=cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=cfg.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    position = []
    target_found = []
    score = 0

    # Initialize the containers
    position.append(game.player.pos.get_tuple())
    if debug:
        print(f"Step: {step_num}")
        print(
            f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
        )
        print(f"\t> Score:    {score!r}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == duration * cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        position.append(game.player.pos.get_tuple())
        if debug:
            print(f"Step: {step_num}")
            print(
                f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
            )
            print(f"\t> Score:    {score!r}")
    return position, game
コード例 #3
0
def main(population: Population,
         game_id: int,
         genome: Genome = None,
         game_cfg: Config = None,
         average: int = 1,
         debug: bool = False):
    """
    Monitor the genome on the following elements:
        * Position
        * Hidden state of SRU (Ht)
        * Actuation of both wheels
        * Distance
        * Delta distance
    """
    # Make sure all parameters are set
    if not genome: genome = population.best_genome
    if not game_cfg: game_cfg = pop.config
    
    # Check if valid genome (contains at least one hidden SRU, first SRU is monitored) - also possible for fixed RNNs
    a = len([n for n in genome.get_used_nodes().values() if type(n) == SimpleRnnNodeGene]) >= 1
    b = len([n for n in genome.get_used_nodes().values() if type(n) == FixedRnnNodeGene]) >= 1
    assert a or b
    
    # Get the game
    game = get_game(game_id, cfg=game_cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0
    
    # Create the network
    net = make_net(genome=genome,
                   genome_config=population.config.genome,
                   batch_size=1,
                   initial_read=state,
                   )
    
    # Containers to monitor
    actuation = []
    distance = []
    delta_distance = []
    position = []
    Ht = []
    target_found = []
    score = 0
    
    # Initialize the containers
    actuation.append([0, 0])
    distance.append(state[0])
    delta_distance.append(0)
    position.append(game.player.pos.get_tuple())
    Ht.append(net.rnn_state[0, 0, 0])
    if debug:
        print(f"Step: {step_num}")
        print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}")
        print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}")
        print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}")
        print(f"\t> SRU state: Ht={round(Ht[-1], 5)}")
    
    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == game_cfg.game.duration * game_cfg.game.fps: break
        
        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))
        
        # Check if each game received an action
        assert len(action) == 1
        
        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]
        
        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score
        
        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]
        
        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1
        
        # Update the containers
        actuation.append(action[0])
        distance.append(state[0])
        delta_distance.append(distance[-2] - distance[-1])
        position.append(game.player.pos.get_tuple())
        Ht.append(net.rnn_state[0, 0, 0])
        if debug:
            print(f"Step: {step_num}")
            print(f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}")
            print(f"\t> Distance: {round(distance[-1], 5)} - Delta distance: {round(delta_distance[-1], 5)}")
            print(f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}")
            print(f"\t> SRU state: Ht={round(Ht[-1], 5)}")
    
    if average > 1:
        # Average out the noise
        x, y = zip(*actuation)
        x = SMA(x, window=average)
        y = SMA(y, window=average)
        actuation = list(zip(x, y))
        distance = SMA(distance, window=average)
        delta_distance = SMA(delta_distance, window=average)
        Ht = SMA(Ht, window=average)
        
        # Resolve weird artifacts at the beginning
        for i in range(average, 0, -1):
            actuation[i - 1] = actuation[i]
            distance[i - 1] = distance[i]
            delta_distance[i - 1] = delta_distance[i]
            Ht[i - 1] = Ht[i]
    
    # Visualize the monitored values
    path = get_subfolder(f"population{'_backup' if population.use_backup else ''}/"
                         f"storage/"
                         f"{population.folder_name}/"
                         f"{population}/", "images")
    path = get_subfolder(path, f"monitor")
    path = get_subfolder(path, f"{genome.key}")
    path = get_subfolder(path, f"{game_id}")
    visualize_actuation(actuation,
                        target_found=target_found,
                        game_cfg=game_cfg.game,
                        save_path=f"{path}actuation.png")
    visualize_distance(distance,
                       target_found=target_found,
                       game_cfg=game_cfg.game,
                       save_path=f"{path}distance.png")
    visualize_hidden_state(Ht,
                           target_found=target_found,
                           game_cfg=game_cfg.game,
                           save_path=f"{path}hidden_state.png")
    visualize_position(position,
                       game=game,
                       save_path=f"{path}trace.png")
    merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
コード例 #4
0
def monitor_activation(genome: Genome,
                       gid: int,
                       debug: bool = False,
                       duration: int = 60):
    """
    Monitor the activation of the candidate hidden state. Note: game is started again, no worries since deterministic.
    """
    cfg = Config()
    cfg.game.duration = duration
    cfg.update()

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values() if type(n) == GruNodeGene
    ]) >= 1

    # Get the game
    game = get_game(i=gid, cfg=cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=cfg.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    Ht = []
    Ht_tilde = []
    target_found = []
    score = 0

    # Initialize the containers
    ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0],
                                        x=np.asarray([state]))
    Ht.append(ht)
    Ht_tilde.append(ht_tilde)
    if debug:
        print(f"Step: {step_num}")
        print(f"\t> Hidden state: {round(Ht[-1], 5)}")
        print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == duration * cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        ht, ht_tilde, _, _ = get_gru_states(gru=net.rnn_array[0],
                                            x=np.asarray([state]))
        Ht.append(ht)
        Ht_tilde.append(ht_tilde)
        if debug:
            print(f"Step: {step_num}")
            print(f"\t> Hidden state: {round(Ht[-1], 5)}")
            print(f"\t> Candidate hidden state: {round(Ht_tilde[-1], 5)}")
    return Ht_tilde, Ht, target_found
コード例 #5
0
ファイル: env_multi.py プロジェクト: RubenPants/EvolvableRNN
    def trace_genome(
        self,
        genome,
        return_dict=None,
    ):
        """
        Get the trace of a single genome for a pre-defined game-environment. Due to performance reasons, only one
        trace-point is saved each second of simulation-time.
        
        :param genome: Tuple (genome_id, genome_class)
        :param return_dict: Dictionary used to return the traces corresponding the genome-game combination
        """
        # Split up genome by id and genome itself
        genome_id, genome = genome

        # Ask for each of the games the starting-state
        states = np.asarray([g.reset()[D_SENSOR_LIST] for g in self.games])

        # Initialize the traces
        traces = [[g.player.pos.get_tuple()] for g in self.games]

        # Finished-state for each of the games is set to false
        finished = np.repeat(False, self.batch_size)

        # Create the network used to query on, initialize it with the first-game's readings (good approximation)
        net = make_net(
            genome=genome,
            genome_config=self.pop_config.genome,
            batch_size=self.batch_size,
            initial_read=states[0],
        )

        # Start iterating the environments
        step_num = 0
        while True:
            # Check if maximum iterations is reached
            if step_num == self.max_steps: break

            # Determine the actions made by the agent for each of the states
            actions = net(states)

            # Check if each game received an action
            assert len(actions) == len(self.games)

            for i, (g, a, f) in enumerate(zip(self.games, actions, finished)):
                # Do not advance the player if target is reached
                if f:
                    if step_num % self.game_config.game.fps == 0:
                        traces[i].append(g.player.pos.get_tuple())
                    continue

                # Proceed the game with one step, based on the predicted action
                obs = g.step(l=a[0], r=a[1])
                finished[i] = obs[D_DONE]

                # Update the candidate's current state
                states[i] = obs[D_SENSOR_LIST]

                # Update the trace
                if step_num % self.game_config.game.fps == 0:
                    traces[i].append(g.player.pos.get_tuple())

            # Next step
            step_num += 1

        # Return the final observations
        if return_dict is not None: return_dict[genome_id] = traces
コード例 #6
0
def main(population: Population,
         game_id: int,
         genome: Genome = None,
         game_cfg: Config = None,
         debug: bool = False):
    """
    Monitor the genome on the following elements:
        * Position
        * Update gate (Zt)
        * Hidden state of GRU (Ht)
        * Actuation of both wheels
        * Distance
    """
    # Make sure all parameters are set
    if not genome: genome = population.best_genome
    if not game_cfg: game_cfg = pop.config

    # Check if valid genome (contains at least one hidden GRU, first GRU is monitored)
    assert len([
        n for n in genome.get_used_nodes().values()
        if type(n) == GruNoResetNodeGene
    ]) >= 1

    # Get the game
    game = get_game(game_id, cfg=game_cfg, noise=False)
    state = game.reset()[D_SENSOR_LIST]
    step_num = 0

    # Create the network
    net = make_net(
        genome=genome,
        genome_config=population.config.genome,
        batch_size=1,
        initial_read=state,
    )

    # Containers to monitor
    actuation = []
    distance = []
    position = []
    Ht = []
    Ht_tilde = []
    Zt = []
    target_found = []
    score = 0

    # Initialize the containers
    actuation.append([0, 0])
    distance.append(state[0])
    position.append(game.player.pos.get_tuple())
    ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state]))
    Ht.append(ht)
    Ht_tilde.append(ht_tilde)
    Zt.append(zt)
    if debug:
        print(f"Step: {step_num}")
        print(
            f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}"
        )
        print(f"\t> Distance: {round(distance[-1], 5)}")
        print(
            f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
        )
        print(f"\t> GRU states: "
              f"\t\tHt={round(Ht[-1], 5)}"
              f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}"
              f"\t\tZt={round(Zt[-1], 5)}")

    # Start monitoring
    while True:
        # Check if maximum iterations is reached
        if step_num == game_cfg.game.duration * game_cfg.game.fps: break

        # Determine the actions made by the agent for each of the states
        action = net(np.asarray([state]))

        # Check if each game received an action
        assert len(action) == 1

        # Proceed the game with one step, based on the predicted action
        obs = game.step(l=action[0][0], r=action[0][1])
        finished = obs[D_DONE]

        # Update the score-count
        if game.score > score:
            target_found.append(step_num)
            score = game.score

        # Update the candidate's current state
        state = obs[D_SENSOR_LIST]

        # Stop if agent reached target in all the games
        if finished: break
        step_num += 1

        # Update the containers
        actuation.append(action[0])
        distance.append(state[0])
        position.append(game.player.pos.get_tuple())
        ht, ht_tilde, zt = get_gru_states(net=net, x=np.asarray([state]))
        Ht.append(ht)
        Ht_tilde.append(ht_tilde)
        Zt.append(zt)
        if debug:
            print(f"Step: {step_num}")
            print(
                f"\t> Actuation: {(round(actuation[-1][0], 5), round(actuation[-1][1], 5))!r}"
            )
            print(f"\t> Distance: {round(distance[-1], 5)}")
            print(
                f"\t> Position: {(round(position[-1][0], 2), round(position[-1][1], 2))!r}"
            )
            print(f"\t> GRU states: "
                  f"\t\tHt={round(Ht[-1], 5)}"
                  f"\t\tHt_tilde={round(Ht_tilde[-1], 5)}"
                  f"\t\tZt={round(Zt[-1], 5)}")

    # Visualize the monitored values
    path = get_subfolder(
        f"population{'_backup' if population.use_backup else ''}/"
        f"storage/"
        f"{population.folder_name}/"
        f"{population}/", "images")
    path = get_subfolder(path, f"monitor")
    path = get_subfolder(path, f"{genome.key}")
    path = get_subfolder(path, f"{game_id}")
    visualize_actuation(actuation,
                        target_found=target_found,
                        game_cfg=game_cfg.game,
                        save_path=f"{path}actuation.png")
    visualize_distance(distance,
                       target_found=target_found,
                       game_cfg=game_cfg.game,
                       save_path=f"{path}distance.png")
    visualize_hidden_state(Ht,
                           target_found=target_found,
                           game_cfg=game_cfg.game,
                           save_path=f"{path}hidden_state.png")
    visualize_candidate_hidden_state(
        Ht_tilde,
        target_found=target_found,
        game_cfg=game_cfg.game,
        save_path=f"{path}candidate_hidden_state.png")
    visualize_update_gate(Zt,
                          target_found=target_found,
                          game_cfg=game_cfg.game,
                          save_path=f"{path}update_gate.png")
    visualize_position(position, game=game, save_path=f"{path}trace.png")
    merge(f"Monitored genome={genome.key} on game={game.id}", path=path)
コード例 #7
0
    def visualize(self, genome, game_id: int):
        """
        Visualize the performance of a single genome.
        
        :param genome: Tuple (genome_id, genome_class)
        :param game_id: ID of the game that will be used for evaluation
        """
        # Create the requested game
        game: Game = get_game(game_id, cfg=self.game_config)
        self.p2m = game.game_config.p2m

        # Create space in which game will be played
        window = pyglet.window.Window(
            game.x_axis * self.p2m,
            game.y_axis * self.p2m,
            "Robot Simulator - Game {id:03d}".format(id=game_id),
            resizable=False,
            visible=True)
        window.set_location(100, 100)
        pyglet.gl.glClearColor(1, 1, 1, 1)

        # Setup the requested game
        self.state = game.reset()[D_SENSOR_LIST]
        self.finished = False
        self.score = 0

        # Make the network used during visualization
        net = make_net(
            genome=genome,
            genome_config=self.pop_config.genome,
            batch_size=1,
            initial_read=self.state,
        )

        # Create the visualize-environment
        space = pymunk.Space()
        options = DrawOptions()

        # Draw static objects - walls
        if game.wall_bound:
            x_axis = game.x_axis
            y_axis = game.y_axis
            corners = [(0, 0), (0, y_axis * self.p2m),
                       (x_axis * self.p2m, y_axis * self.p2m),
                       (x_axis * self.p2m, 0)]
            for c in range(4):
                wall_shape = pymunk.Segment(space.static_body,
                                            a=corners[c],
                                            b=corners[(c + 1) % 4],
                                            radius=0.1 * self.p2m)  # 5cm walls
                wall_shape.color = (0, 0, 0)
                space.add(wall_shape)

        # Draw static objects - target
        target_body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
        target_body.position = game.target * self.p2m
        target_shape = pymunk.Circle(body=target_body,
                                     radius=game.bot_config.radius * self.p2m *
                                     3)  # TODO: Thick boi
        target_shape.sensor = True
        target_shape.color = (0, 128, 0)
        space.add(target_body, target_shape)

        # Init player
        m = pymunk.moment_for_circle(mass=2,
                                     inner_radius=0,
                                     outer_radius=game.bot_config.radius *
                                     self.p2m)
        player_body = pymunk.Body(mass=1, moment=m)
        player_body.position = game.player.pos * self.p2m
        player_body.angle = game.player.angle
        player_shape = pymunk.Circle(body=player_body,
                                     radius=game.bot_config.radius * self.p2m *
                                     3)  # TODO: Thick boi
        player_shape.color = (255, 0, 0)
        space.add(player_body, player_shape)
        label = pyglet.text.Label(f'{self.time}',
                                  font_size=16,
                                  color=(100, 100, 100, 100),
                                  x=window.width - 20,
                                  y=window.height - 20,
                                  anchor_x='center',
                                  anchor_y='center')

        @window.event
        def on_draw():
            window.clear()
            label.draw()
            space.debug_draw(options=options)

        if self.mouse_enabled:

            @window.event
            def on_mouse_press(x, y, *_):
                # Add new circle on mouse-clicked position
                game.target.x = x / game.game_config.p2m
                game.target.y = y / game.game_config.p2m
                target_body.position = game.target * self.p2m

        def update_method(_):  # Input dt ignored
            dt = 1 / game.game_config.fps
            self.time += dt
            label.text = str(int(self.time))

            # Stop when target is reached
            if not self.finished:
                # Query the game for the next action
                action = net(np.asarray([self.state]))
                if self.debug:
                    print(f"Passed time: {round(dt, 3)}")
                    print(
                        f"Location: x={round(player_body.position.x / self.p2m, 2)}, "
                        f"y={round(player_body.position.y / self.p2m, 2)}")
                    print(f"Orientation: {round(player_body.angle, 2)}")
                    print("Action: lw={l}, rw={r}".format(
                        l=round(action[0][0], 3), r=round(action[0][1], 3)))
                    print("Observation:", [round(s, 3) for s in self.state])

                # Progress game by one step
                obs = game.step_dt(dt=dt, l=action[0][0], r=action[0][1])
                self.finished = obs[D_DONE]
                self.state = obs[D_SENSOR_LIST]

                # Update space's player coordinates and angle
                player_body.position = game.player.pos * self.p2m
                player_body.angle = game.player.angle

                # Check if score has increased
                if game.score > self.score:
                    self.score = game.score
                    target_body.position = game.target * self.p2m
            space.step(dt)

        # Run the game
        time.sleep(5)  # TODO: Waiting time to start recording
        pyglet.clock.schedule_interval(
            update_method, 1.0 / (game.game_config.fps * self.speedup))
        pyglet.app.run()