Esempio n. 1
0
    def train(self, result: Result) -> None:
        batch = []
        moves = []
        qs = []

        reward = result.value
        if result is Result.DISQUALIFIED:
            last_board, last_move = self.buffer[-1]
            batch.append(encode_board(last_board, self.mark, as_batch=False))
            moves.append(last_move)
            qs.append(reward)
        else:
            for board, move_hash in self.buffer[::-1]:
                invalid_moves = get_encoded_invalid_moves(board, self.mark)
                invalid_moves_qs = [Result.DISQUALIFIED.value
                                    ] * len(invalid_moves)

                batch.append(encode_board(board, self.mark, as_batch=False))
                moves.append([move_hash, *invalid_moves])
                qs.append([reward, *invalid_moves_qs])

                reward *= self.gamma

        batch = np.array(batch)
        targets = self.model.predict(batch)
        for i, (state_moves, state_qs) in enumerate(zip(moves, qs)):
            targets[i, state_moves] = state_qs

        self.model.fit(batch, targets, epochs=1, verbose=0)

        self.buffer = []
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
Esempio n. 2
0
    def move(self, board) -> Tuple[int, int, Action]:
        mark = self.mark

        if self.training:
            move_hash = 0
            chosen_move = ALL_MOVES[move_hash]

            if random() < self.epsilon:
                possible_moves = get_possible_moves(board, mark)
                if possible_moves:
                    chosen_move = choice(possible_moves)
                    move_hash = ALL_MOVES.index(chosen_move)
            else:
                predictions = self.model.predict(encode_board(board, mark))[0]
                possible_moves_hashes = get_encoded_possible_moves(board, mark)
                move_hash = next((x for _, x in sort_predictions(predictions)
                                  if x in possible_moves_hashes), 0)
                chosen_move = ALL_MOVES[move_hash]

            self.buffer.append((board, move_hash))
            return chosen_move
        else:
            predictions = self.model.predict(encode_board(board, mark))[0]
            possible_moves_hashes = get_encoded_possible_moves(board, mark)
            move_hash = next((x for _, x in sort_predictions(predictions)
                              if x in possible_moves_hashes), 0)
            return ALL_MOVES[move_hash]
Esempio n. 3
0
    def move(self, board) -> Tuple[int, int, Action]:
        if random() < self.epsilon:
            possible_moves = get_possible_moves(board, self.mark)
            if possible_moves:
                chosen_move = choice(possible_moves)
                move_hash = ALL_MOVES.index(chosen_move)
            else:
                move_hash = 0
                chosen_move = ALL_MOVES[move_hash]
        else:
            predictions = self.model.predict(encode_board(board, self.mark))[0]
            possible_moves_hashes = get_encoded_possible_moves(
                board, self.mark)
            move_hash = next((x for _, x in sort_predictions(predictions)
                              if x in possible_moves_hashes), 0)
            chosen_move = ALL_MOVES[move_hash]

        if self.training:
            self.tau += 1
            if self.tau > self.max_tau:
                self.tau = 0
                self.update_target_model()

            self.memorize(board, move_hash, 0, False)

        return chosen_move
Esempio n. 4
0
    def memorize(self, board, move_hash, reward, done):
        if self.prev_board:
            prev_state = encode_board(self.prev_board,
                                      self.mark,
                                      as_batch=False)
            if done:
                invalid_moves_mask = [0] * MOVE_SPACE_SIZE
                curr_state = prev_state
            else:
                opponent_mark = self.mark.opposite_mark()
                invalid_moves_mask = [
                    board[row][col] is opponent_mark
                    for row, col, _ in ALL_MOVES
                ]
                curr_state = encode_board(board, self.mark, as_batch=False)

            self.memory.append((prev_state, self.prev_move, reward, curr_state,
                                invalid_moves_mask, done))

        self.prev_board = board
        self.prev_move = move_hash