Ejemplo n.º 1
0
class Agent:
    def __init__(self, load_path=''):
        self.n_games = 0
        self.epsilon = 0
        self.gamma = 0.9
        self.load_path = load_path
        self.memory = deque(maxlen=MAX_MEMORY)
        self.model = Net(11, 256, 3)

        if load_path:
            self.model.load_state_dict(torch.load(load_path))
        self.trainer = Trainer(self.model, LR, self.gamma)

    def get_state(self, game):
        # 0          1         2        3
        # U          L         R        D
        # [[1, -10], [0, -10], [0, 10], [1, 10]]
        head = game.snake_pos
        near_head = [
            [head[0], head[1] - 10],
            [head[0] - 10, head[1]],
            [head[0] + 10, head[1]],
            [head[0], head[1] + 10],
        ]

        directions = [
            game.direction == 0,
            game.direction == 1,
            game.direction == 2,
            game.direction == 3,
        ]

        state = [
            (directions[0] and game.is_colision(near_head[0])) or
            (directions[1] and game.is_colision(near_head[1])) or
            (directions[2] and game.is_colision(near_head[2])) or
            (directions[3] and game.is_colision(near_head[3])),

            (directions[0] and game.is_colision(near_head[1])) or
            (directions[1] and game.is_colision(near_head[3])) or
            (directions[2] and game.is_colision(near_head[0])) or
            (directions[3] and game.is_colision(near_head[2])),

            (directions[0] and game.is_colision(near_head[2])) or
            (directions[1] and game.is_colision(near_head[0])) or
            (directions[2] and game.is_colision(near_head[3])) or
            (directions[3] and game.is_colision(near_head[1])),

            game.food_pos[0] < head[0],
            game.food_pos[0] > head[0],
            game.food_pos[1] < head[1],
            game.food_pos[1] > head[1],
        ] + directions
        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)


    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)


    def get_action(self, state):
        if not self.load_path:
            self.epsilon = 80 - self.n_games

        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
Ejemplo n.º 2
0
class Agent:
    def __init__(self):
        self.num_games = 0  # number of games played
        self.epsilon = 0  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(
            maxlen=MAX_MEMORY)  # pops from left if memory limit is exceeded
        self.model = Linear(11, 256, 3)
        self.trainer = Trainer(self.model, lr=LEARNING_RATE, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r))
            or (dir_l and game.is_collision(point_l))
            or (dir_u and game.is_collision(point_u))
            or (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r))
            or (dir_d and game.is_collision(point_l))
            or (dir_l and game.is_collision(point_u))
            or (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r))
            or (dir_u and game.is_collision(point_l))
            or (dir_r and game.is_collision(point_u))
            or (dir_l and game.is_collision(point_d)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < game.head.x,  # food left
            game.food.x > game.head.x,  # food right
            game.food.y < game.head.y,  # food up
            game.food.y > game.head.y  # food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # initially the agent performs more of random moves i.e exploration
        self.epsilon = 80 - self.num_games
        final_move = [0, 0, 0]

        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move