def do_rollout(self, node: GameState): node = node.__copy__() "Make the tree one layer better. (Train for one iteration.)" path = self._select(node) leaf = path[-1] self._expand(leaf) if self.heuristic is None: reward = self._simulate(leaf.__copy__()) else: reward = self.heuristic.h(node) self._backpropagate(path, reward)
def play(self, game: GameState, verbose=False): if self.alternate: temp = self.actionModel1 self.actionModel1 = self.actionModel2 self.actionModel2 = temp states = [] hs = [] if verbose: print(game) while not game.game_over(): actingModel = self.actionModel1 if game.turn() else self.actionModel2 move, h = actingModel.action(game) states.append(game.__copy__()) hs.append(h) game.push(move) if verbose: print("move: " + str(move) + " h: " + str(h)) print(game) return game.winner(), states, hs