def main(): mcts = MCTS() game = Othello() for i in range(32): action = mcts.get_action(game) game.move(action) if game.game_over(): break actions = game.get_actions() probs = np.ones(actions.shape[0]) action = sample(probs, actions) game.move(action) if game.game_over(): break # input('waiting') print(game.get_true_state()) print(game.get_score()) print(game.get_winner()) print('game over')
class ReversiEnv(object): """docstring for ReversiEnv.""" def __init__(self): super(ReversiEnv, self).__init__() self.n = 8 def length(self): return self.n * self.n def reset(self): self.game = Othello() return self.game.get_state(), self.game.get_turn() def action_space(self): return self.game.get_actions() def step(self, action): self.game.move(action) reward = self.game.get_winner() return self.game.get_state(), self.game.get_turn(), reward