def _simulate(self, node: GameState): "Returns the reward for a random simulation (to completion) of `node`" invert_reward = True while True: if node.terminal(): reward = node.reward() return 1 - reward if invert_reward else reward node = node.push(node.random_move()) invert_reward = not invert_reward
def play(self, game: GameState, verbose=False): if self.alternate: temp = self.actionModel1 self.actionModel1 = self.actionModel2 self.actionModel2 = temp states = [] hs = [] if verbose: print(game) while not game.game_over(): actingModel = self.actionModel1 if game.turn() else self.actionModel2 move, h = actingModel.action(game) states.append(game.__copy__()) hs.append(h) game.push(move) if verbose: print("move: " + str(move) + " h: " + str(h)) print(game) return game.winner(), states, hs