예제 #1
0
    def add_game(self, player_strategy, rnd, starting_game_position=None):
        """Runs a game with the given strategy and randomness source, then
        enrolls the outcome in the dataset.

        If @p starting_position is a Game object, start from that position.

        Returns the number of examples (moves) added.
        """
        states = np.zeros((1, EXAMPLE_WIDTH))
        num_moves = 0
        game = starting_game_position or Game(rnd=rnd)
        running = True
        while running:
            intermediate_board, turn_outcome = (
                game.do_turn_and_retrieve_intermediate(
                    player_strategy.get_move(game.board(), game.score())))
            running = (turn_outcome != GAMEOVER)
            num_moves += (turn_outcome != ILLEGAL)
            if turn_outcome == OK:
                states = np.append(states,
                                   Board.as_vector(intermediate_board),
                                   axis=0)
                self._num_examples += 1
        player_strategy.notify_outcome(game.board(), game.score())

        scores = Dataset.evaluate_states(states, game.board(), game.score)
        assert(len(states) == len(scores))
        batch_size_so_far = self._example_batches[-1].shape[0]
        if len(states) + batch_size_so_far > MAX_BATCH_SIZE:
            self._example_batches.append(np.zeros((0, EXAMPLE_WIDTH)))
            self._score_batches.append(np.zeros((0, 1)))
        self._example_batches[-1] = \
            np.append(self._example_batches[-1], states, axis=0)
        self._score_batches[-1] = np.append(self._score_batches[-1], scores)
        return len(states)