def get_reward(self, old_bird: Bird, bird: Bird) -> Tuple[bool, int]: reset_bird = False if bird.get_top() <= self.height and bird.get_bottom() > 0: if self.board_controller.is_top(bird): reward = Reward.REWARD_STUCK reset_bird = True elif self.board_controller.is_bottom( bird) or self.board_controller.is_pipe(bird): self.loose = True reward = Reward.REWARD_LOOSE if len(self.board_controller.goals) != 0: self.board_controller.goals.pop(0) elif self.board_controller.is_checkpoint(bird): self.win_streak += 1 self.checked = True reward = Reward.REWARD_CHECKPOINT self.board_controller.goals.pop(0) else: reward = Reward.REWARD_DEFAULT else: reset_bird = True reward = Reward.REWARD_IMPOSSIBLE penalty = 0 if not reset_bird: old_distance = self.board_controller.distance_next_pipe(old_bird) distance = self.board_controller.distance_next_pipe(bird) if distance == -1: penalty = 0 elif old_distance - distance < 0: penalty = distance * int(Reward.REWARD_PENALTY) else: penalty = int(Reward.REWARD_CHECKPOINT) return reset_bird, int(reward) + penalty
def bind_genomes(self): for genome_id, genome in self.__genomes: genome.fitness = 0 net = neat.nn.FeedForwardNetwork.create(genome, self.__config) self.__nets.append(net) self.__birds.append(Bird(100, 266)) self.__ge.append(genome)
def update_bird(self, bird: Bird, action: str) -> Tuple[Bird, int]: self.checked = False old_bird = copy.deepcopy(bird) if action == UP: bird.flap() elif action == RELEASE: bird.fall() reset_bird, reward = self.get_reward(old_bird, bird) if reset_bird: if action == UP: bird.reset_flap() return bird, reward
def is_top(self, bird: Bird) -> bool: return bird.get_top() >= self.board.height - self.texture_manager.texture["top"]["height"]
def is_bottom(self, bird: Bird) -> bool: return bird.get_bottom() < self.texture_manager.texture["bottom"]["height"]
def update_policy(self, bird: Bird) -> None: self.policy.update( self.previous_bird.get_state(), bird.get_state(), self.environment.board_controller.get_position(self.previous_bird), self.environment.board_controller.get_position(bird), self.last_action, self.reward)
def best_action(self, bird: Bird) -> str: return self.policy.best_action( bird.get_state(), self.environment.board_controller.get_position(bird))
def reset(self, bird: Bird) -> None: self.environment.reset() self.previous_bird = bird.reset() self.score = 0 self.reward = 0
def in_pipe(self, pipe: Pipe, bird: Bird) -> bool: if pipe.position_x <= bird.get_max_x() and pipe.position_x + self.texture["pipe"]["width"] >= bird.get_min_x() \ and (pipe.bottom >= bird.get_bottom() or bird.get_top() >= pipe.top): return True return False
def in_checkpoint(self, pipe: Pipe, bird: Bird) -> bool: if pipe.position_x + self.texture["pipe"]["width"] <= bird.get_min_x() \ and pipe.bottom < bird.get_bottom() and bird.get_top() < pipe.top: return True return False
import arcade from resources.env import SCREEN_WIDTH, SCREEN_HEIGHT from src.controllers.environment import Environment from src.entities.bird import Bird from src.learning_engine.agent import Agent from src.views.flappy_window import FlappyWindow if __name__ == "__main__": environment = Environment(SCREEN_WIDTH, SCREEN_HEIGHT) bird = Bird() agent = Agent(environment, bird) window = FlappyWindow(agent, bird) window.setup() arcade.run()