예제 #1
0
 def do_rollout(self, node: GameState):
     node = node.__copy__()
     "Make the tree one layer better. (Train for one iteration.)"
     path = self._select(node)
     leaf = path[-1]
     self._expand(leaf)
     if self.heuristic is None:
         reward = self._simulate(leaf.__copy__())
     else:
         reward = self.heuristic.h(node)
     self._backpropagate(path, reward)
예제 #2
0
    def play(self, game: GameState, verbose=False):
        if self.alternate:
            temp = self.actionModel1
            self.actionModel1 = self.actionModel2
            self.actionModel2 = temp

        states = []
        hs = []

        if verbose:
            print(game)

        while not game.game_over():
            actingModel = self.actionModel1 if game.turn() else self.actionModel2
            move, h = actingModel.action(game)
            states.append(game.__copy__())
            hs.append(h)
            game.push(move)

            if verbose:
                print("move: " + str(move) + " h: " + str(h))
                print(game)

        return game.winner(), states, hs