예제 #1
0
def main():
    args = parse_args()
    agent = load_agent(args)
    board_size = args.board_size
    game = GameState.new_game(board_size)
    if args.playas == 'black':
        human_play = Player.black
    elif args.playas == 'white':
        human_play = Player.white
    else:
        raise ValueError('Unknown option for playas: {}'.format(args.playas))
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == human_play:
            human_move = input('-- ')
            if len(human_move) > 1:
                point = point_from_coord(human_move.strip())
                move = Move.play(point)
            else:
                move = Move.pass_turn()
        else:
            move = agent.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Black win")
    else:
        print("White win")
예제 #2
0
 def select_move(self, gs):
   if self.strategy.should_pass(gs):
     return Move.pass_turn()
   elif self.strategy.should_resign(gs):
     return Move.resign()
   else:
     return self.agent.select_move(gs)
예제 #3
0
def main():
    args = parse_args()
    board_size = args.size
    game = GameState.new_game(board_size)
    #bot = RandomAgent()
    bot = MCTSAgent(100, 1., 64)
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == Player.black:
            human_move = input('-- ')
            if len(human_move) > 1:
                point = point_from_coord(human_move.strip())
                move = Move.play(point)
            else:
                move = Move.pass_turn()
        else:
            move = bot.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Black win")
    else:
        print("White win")
예제 #4
0
 def select_move(self, gs):
     num_moves = self.encoder.num_points()
     board_tensor = self.encoder.encode(gs)
     X = np.array([board_tensor])
     # epsilon greedy exploration
     if np.random.random() < self.temperature:
         move_probs = np.ones(num_moves) / num_moves
     else:
         move_probs = self.model.predict(X)[0]
         move_probs = self.clip_probs_(move_probs)
     candidates = np.arange(num_moves)
     ranked_moves = np.random.choice(candidates,
                                     num_moves,
                                     replace=False,
                                     p=move_probs)
     for pt_idx in ranked_moves:
         point = self.encoder.decode_point_index(pt_idx)
         move = Move.play(point)
         is_valid = gs.is_valid_move(move)
         is_eye = is_point_an_eye(gs.board, point, gs.nplayer)
         if is_valid and not is_eye:
             if self.collector is not None:
                 self.collector.record_decision(state=board_tensor,
                                                action=pt_idx)
             return move
     #this policy gradient does not learn from passing turn
     return Move.pass_turn()
예제 #5
0
파일: q.py 프로젝트: armandli/rlgames
 def select_move(self, gs):
   board_tensor = self.encoder.encode(gs)
   moves = []
   board_tensors = []
   for move in gs.legal_moves():
     if not move.is_play:
       continue
     moves.append(self.encoder.encode_point(move.pt))
     board_tensors.append(board_tensor)
   if not moves:
     return Move.pass_turn()
   num_moves = len(moves)
   board_tensors = np.array(board_tensors)
   move_vectors = np.zeros((num_moves, self.encoder.num_points()))
   for i, move in enumerate(moves):
     move_vectors[i][move] = 1.
   #Q-learning uses 2 input tensors: the states and the actions
   values = self.model.predict([board_tensors, move_vectors])
   values = values.reshape(len(moves))
   ranked_moves = self.rank_move_eps_greedy(values)
   for move_idx in ranked_moves:
     point = self.encoder.decode_point_index(moves[move_idx])
     if not is_point_an_eye(gs.board, point, gs.nplayer):
       if self.collector is not None:
         self.collector.record_decision(state=board_tensor, action=moves[move_idx])
       return Move.play(point)
   return Move.pass_turn()
예제 #6
0
    def select_move(self, gs):
        num_moves = self.encoder.num_points()
        board_tensor = self.encoder.encode(gs)
        X = np.array([board_tensor])
        actions, values = self.model.predict(X)
        move_probs = actions[0]
        estimated_value = values[0][0]
        eps = 1e-6
        #TODO: how to use temperature here?
        #TODO: move this into a function that does the clipping
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)

        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            move = Move.play(point)
            is_move_valid = _valid = gs.is_valid_move(move)
            is_eye = is_point_an_eye(gs.board, point, gs.nplayer)
            if is_move_valid and not is_eye:
                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=point_idx,
                        estimated_value=estimated_value)
                return Move.play(point)
        return Move.pass_turn()
예제 #7
0
 def test_encode(self):
     encoder = get_encoder_by_name('oneplane', 9)
     gs = GameState.new_game(9)
     gs = gs.apply_move(Move.play(Point(5, 5)))
     gs = gs.apply_move(Move.play(Point(4, 5)))
     code = encoder.encode(gs)
     self.assertEqual(1, code[0][4][4])
     self.assertEqual(-1, code[0][3][4])
예제 #8
0
 def random_move_(self, game_state):
     idxes = np.arange(len(self.cache))
     np.random.shuffle(idxes)
     for idx in idxes:
         m = Move.play(self.cache[idx])
         if (game_state.is_valid_move(m) and not is_point_an_eye(
                 game_state.board, m.pt, game_state.nplayer)):
             return m
     return Move.pass_turn()
예제 #9
0
 def handle_play(self, color, move):
     if move.lower() == 'pass':
         self.game_state = self.game_state.apply_move(Move.pass_turn())
     elif move.lower() == 'resign':
         self.game_state = self.game_state.apply_move(Move.resign())
     else:
         self.game_state = self.game_state.apply_move(
             gtp_position_to_coord(move))
     return response.success()
예제 #10
0
파일: goboard.py 프로젝트: armandli/rlgames
 def legal_moves(self):
   if self.is_over():
     return list()
   ret = [Move.pass_turn(), Move.resign()]
   for ri in range(1, self.board.sz + 1):
     for ci in range(1, self.board.sz + 1):
       m = Move.play(Point(ri, ci))
       if self.is_valid_move(m):
         ret.append(m)
   return ret
예제 #11
0
 def select_move(self, gs):
   eps = 1e-6
   num_actions = self.encoder.num_points()
   move_probs = self.predict(gs)
   #scaling, clipping and re-normalizing move probabilities to reduce ambiguity,
   #then sample from the rescaled moves
   move_probs = move_probs ** 3
   move_probs = np.clip(move_probs, eps, 1 - eps)
   move_probs = move_probs / np.sum(move_probs)
   candidates = np.arange(num_actions)
   actions = np.random.choice(candidates, num_actions, replace=False, p=move_probs)
   for point_idx in actions:
     point = self.encoder.decode_point_index(point_idx)
     if gs.is_valid_move(Move.play(point)) and not is_point_an_eye(gs.board, point, gs.nplayer):
       return Move.play(point)
   return Move.pass_turn()
예제 #12
0
파일: minimax.py 프로젝트: armandli/rlgames
 def recursive_minimax_search_(self, game_state, depth, eval_fn):
     if game_state.is_over():
         winner = game_state.winner()
         if winner == Player.black:
             return (MAX_SCORE, None)
         elif winner is None:
             return (TIE_SCORE, None)
         else:
             return (MIN_SCORE, None)
     if depth == 0:
         return (eval_fn(game_state), None)
     best_moves = list()
     if game_state.nplayer == Player.black:
         best_score = MIN_SCORE
     else:
         best_score = MAX_SCORE
     for m in game_state.legal_moves():
         nstate = game_state.apply_move(m)
         ndepth = depth
         if nstate.nplayer == Player.black:
             ndepth -= 1
         (score,
          _) = self.recursive_minimax_search_(nstate, ndepth, eval_fn)
         if self.is_improvement_(score, best_score, game_state.nplayer):
             best_score = score
             best_moves = [m]
         elif score == best_score:
             best_moves.append(m)
     if len(best_moves) > 0:
         return (best_score, random.choice(best_moves))
     else:
         return (best_score, Move.pass_turn())
예제 #13
0
 def play_their_move(self):
     their_name = self.their_color.name
     their_letter = their_name[0].upper()
     pos = self.command_and_response('genmove {}\n'.format(their_name))
     if pos.lower() == 'resign':
         self.game_state = self.game_state.apply_move(Move.resign())
         self.stopped = True
     elif pos.lower() == 'pass':
         self.game_state = self.game_state.apply_move(Move.pass_turn())
         self.sgf.append(';{}[]\n'.format(their_letter))
         if self.game_state.pmove.is_pass:
             self.stopped = True
     else:
         move = gtp_position_to_coord(pos)
         self.game_state = self.game_state.apply_move(move)
         self.sgf.append(';{}[{}]\n'.format(their_letter,
                                            self.sgf.coordinates(move)))
예제 #14
0
 def select_move(self, game_state):
   """choose a random valid move that preserves its own eyes."""
   candidates = list()
   for move in game_state.legal_moves():
     if not move.is_pass and not move.is_resign:
       if not is_point_an_eye(game_state.board, move.pt, game_state.nplayer):
         candidates.append(move)
   if not candidates:
     return Move.pass_turn()
   return random.choice(candidates)
예제 #15
0
def main():
    args = parse_args()
    with open(args.file) as fd:
        data = fd.read()
        sgf = sgf_game.from_string(data)
        gs, first_move_done = new_game_from_handicap(sgf)
        print_board(gs.board)
        for item in sgf.main_sequence_iter():
            color, move_tuple = item.get_move()
            point = None
            if color is not None:
                if move_tuple is not None:
                    row, col = move_tuple
                    point = Point(row + 1, col + 1)
                    move = Move.play(point)
                    print('Move ({},{})'.format(row + 1, col + 1))
                else:
                    move = Move.pass_turn()
                gs = gs.apply_move(move)
                print_board(gs.board)
예제 #16
0
파일: minimax.py 프로젝트: armandli/rlgames
 def recursive_alpha_beta_minimax_search_(self, game_state, depth, eval_fn,
                                          alpha, beta):
     """
 Alpha-Beta Pruning: introduce new parameters alpha and beta.
 alpha is the best value available to the maximizer from the parent to the root
 beta is the best value available to the minimizer from the parent to the root
 we use those 2 values to prune true segments downstream.
 the idea is if the current node is a maximizer, and the beta it has received from its parent (minimizer)
 has a lower value than the maximum value found by the current node, we no longer need to explore downstream.
 if the current node is a minimizer, and the alpha it has received from its parent (maximizer)
 has a higher value than the minimum value found by the current node, we no longer need to explore downstream.
 in this implementation, we don't prune on equality
 """
     if game_state.is_over():
         winner = game_state.winner()
         if winner == Player.black:
             return (MAX_SCORE, None)
         elif winner is None:
             return (TIE_SCORE, None)
         else:
             return (MIN_SCORE, None)
     if depth == 0:
         return (eval_fn(game_state), None)
     best_moves = list()
     if game_state.nplayer == Player.black:
         best_score = MIN_SCORE
     else:
         best_score = MAX_SCORE
     for m in game_state.legal_moves():
         if self.should_prune_(best_score, alpha, beta, game_state.nplayer):
             break
         nstate = game_state.apply_move(m)
         ndepth = depth
         if nstate.nplayer == Player.black:
             ndepth -= 1
         (score, _) = self.recursive_alpha_beta_minimax_search_(
             nstate, ndepth, eval_fn, alpha, beta)
         if self.is_improvement_(score, best_score, game_state.nplayer):
             best_score = score
             best_moves = [m]
             if game_state.nplayer == Player.black:
                 if best_score > alpha:
                     alpha = best_score
             else:
                 if best_score < beta:
                     beta = best_score
         elif score == best_score:
             best_moves.append(m)
     if len(best_moves) > 0:
         return (best_score, random.choice(best_moves))
     else:
         return (best_score, Move.pass_turn())
예제 #17
0
 def test_encode(self):
     encoder = get_encoder_by_name('sevenplane', 9)
     gs = GameState.new_game(9)
     gs = gs.apply_move(Move.play(Point(2, 7)))
     gs = gs.apply_move(Move.play(Point(7, 2)))
     gs = gs.apply_move(Move.play(Point(3, 6)))
     gs = gs.apply_move(Move.play(Point(6, 3)))
     gs = gs.apply_move(Move.play(Point(3, 7)))
     gs = gs.apply_move(Move.play(Point(2, 6)))
     gs = gs.apply_move(Move.play(Point(2, 5)))
     code = encoder.encode(gs)
     self.assertEqual(1., code[0][1][5])
예제 #18
0
 def encode(self, game_state):
     board_tensor = np.zeros(self.shape())
     base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3}
     for row in range(self.sz):
         for col in range(self.sz):
             p = Point(r=row + 1, c=col + 1)
             gostring = game_state.board.get_go_string_(p)
             if gostring is None:
                 if game_state.does_move_violate_ko_(
                         game_state.nplayer, Move.play(p)):
                     board_tensor[6][row][col] = 1
             else:
                 liberty_plane = min(3, gostring.num_liberties) - 1
                 liberty_plane += base_plane[gostring.color]
                 board_tensor[liberty_plane][row][col] = 1
     return board_tensor
예제 #19
0
 def encode(self, game_state):
     board_tensor = np.zeros(self.shape())
     base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3}
     next_player = game_state.nplayer
     if next_player == Player.white:
         board_tensor[8] = 1
     else:
         board_tensor[9] = 1
     for r in range(self.sz):
         for c in range(self.sz):
             p = Point(r + 1, c + 1)
             string = game_state.board.get_go_string_(p)
             if string is None:
                 if game_state.does_move_violate_ko_(
                         next_player, Move.play(p)):
                     board_tensor[10][r][c] = 1
             else:
                 liberty_plane = min(4, string.num_liberties) - 1
                 liberty_plane += base_plane[string.color]
                 board_tensor[liberty_plane][r][c] = 1
     return board_tensor
예제 #20
0
def main():
    board_size = 3
    game = GameState.new_game(board_size)
    #bot = MinimaxAgent(5, None)
    bot = MCTSAgent(362000, 0.1, 64)
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == Player.black:
            human_move = input('-- ')
            point = point_from_coord(human_move.strip())
            move = Move.play(point)
        else:
            move = bot.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    print_board(game.board)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Human won")
    else:
        print("Bot won")
예제 #21
0
 def select_move(self, game_state):
     if len(self.cache) != game_state.board.sz * game_state.board.sz:
         self.cache = self.init_cache_(game_state.board.sz)
     root = MCTSNode(game_state)
     for _ in range(self.expandmax):
         new_node = self.recursive_uct_(root)
         if new_node is None:
             break
         qvalue = self.mc_play_(new_node.game_state)
         new_node.update(qvalue, self.mc_trials)
     best_nodes = list()
     best_score = self.init_best_score_(root.game_state.nplayer)
     for child in root.children:
         score = child.qvalue / child.ncount
         if self.is_improvement_(score, best_score,
                                 root.game_state.nplayer):
             best_score = score
             best_nodes = [child]
         elif score == best_score:
             best_nodes.append(child)
     if best_nodes:
         return random.choice(best_nodes).pmove
     else:
         return Move.pass_turn()
예제 #22
0
 def decode_move_index(self, index):
     if index == self.sz * self.sz:
         return Move.pass_turn()
     row = index // self.sz
     col = index % self.sz
     return Move.play(Point(row + 1, col + 1))
예제 #23
0
 def legal_moves(self):
     if self.is_over():
         return list()
     ret = [Move.play(pt) for pt in self.empty_positions_()]
     ret.append(Move.pass_turn())
     return ret
예제 #24
0
 def encode_point(self, pt):
     return self.encode_move(Move.play(pt))
예제 #25
0
def gtp_position_to_coord(gtp_position):
  col_str, row_str = gtp_position[0], gtp_position[1:]
  point = Point(int(row_str), COLS.find(col_str.upper()) + 1)
  return Move.play(point)