Exemple #1
0
    def select_move(self, gs):
        num_moves = self.encoder.num_points()
        board_tensor = self.encoder.encode(gs)
        X = np.array([board_tensor])
        actions, values = self.model.predict(X)
        move_probs = actions[0]
        estimated_value = values[0][0]
        eps = 1e-6
        #TODO: how to use temperature here?
        #TODO: move this into a function that does the clipping
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)

        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            move = Move.play(point)
            is_move_valid = _valid = gs.is_valid_move(move)
            is_eye = is_point_an_eye(gs.board, point, gs.nplayer)
            if is_move_valid and not is_eye:
                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=point_idx,
                        estimated_value=estimated_value)
                return Move.play(point)
        return Move.pass_turn()
Exemple #2
0
 def test_encode(self):
     encoder = get_encoder_by_name('oneplane', 9)
     gs = GameState.new_game(9)
     gs = gs.apply_move(Move.play(Point(5, 5)))
     gs = gs.apply_move(Move.play(Point(4, 5)))
     code = encoder.encode(gs)
     self.assertEqual(1, code[0][4][4])
     self.assertEqual(-1, code[0][3][4])
Exemple #3
0
def main():
    args = parse_args()
    agent = load_agent(args)
    board_size = args.board_size
    game = GameState.new_game(board_size)
    if args.playas == 'black':
        human_play = Player.black
    elif args.playas == 'white':
        human_play = Player.white
    else:
        raise ValueError('Unknown option for playas: {}'.format(args.playas))
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == human_play:
            human_move = input('-- ')
            if len(human_move) > 1:
                point = point_from_coord(human_move.strip())
                move = Move.play(point)
            else:
                move = Move.pass_turn()
        else:
            move = agent.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Black win")
    else:
        print("White win")
Exemple #4
0
 def select_move(self, gs):
     num_moves = self.encoder.num_points()
     board_tensor = self.encoder.encode(gs)
     X = np.array([board_tensor])
     # epsilon greedy exploration
     if np.random.random() < self.temperature:
         move_probs = np.ones(num_moves) / num_moves
     else:
         move_probs = self.model.predict(X)[0]
         move_probs = self.clip_probs_(move_probs)
     candidates = np.arange(num_moves)
     ranked_moves = np.random.choice(candidates,
                                     num_moves,
                                     replace=False,
                                     p=move_probs)
     for pt_idx in ranked_moves:
         point = self.encoder.decode_point_index(pt_idx)
         move = Move.play(point)
         is_valid = gs.is_valid_move(move)
         is_eye = is_point_an_eye(gs.board, point, gs.nplayer)
         if is_valid and not is_eye:
             if self.collector is not None:
                 self.collector.record_decision(state=board_tensor,
                                                action=pt_idx)
             return move
     #this policy gradient does not learn from passing turn
     return Move.pass_turn()
Exemple #5
0
def main():
    args = parse_args()
    board_size = args.size
    game = GameState.new_game(board_size)
    #bot = RandomAgent()
    bot = MCTSAgent(100, 1., 64)
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == Player.black:
            human_move = input('-- ')
            if len(human_move) > 1:
                point = point_from_coord(human_move.strip())
                move = Move.play(point)
            else:
                move = Move.pass_turn()
        else:
            move = bot.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Black win")
    else:
        print("White win")
Exemple #6
0
 def select_move(self, gs):
   board_tensor = self.encoder.encode(gs)
   moves = []
   board_tensors = []
   for move in gs.legal_moves():
     if not move.is_play:
       continue
     moves.append(self.encoder.encode_point(move.pt))
     board_tensors.append(board_tensor)
   if not moves:
     return Move.pass_turn()
   num_moves = len(moves)
   board_tensors = np.array(board_tensors)
   move_vectors = np.zeros((num_moves, self.encoder.num_points()))
   for i, move in enumerate(moves):
     move_vectors[i][move] = 1.
   #Q-learning uses 2 input tensors: the states and the actions
   values = self.model.predict([board_tensors, move_vectors])
   values = values.reshape(len(moves))
   ranked_moves = self.rank_move_eps_greedy(values)
   for move_idx in ranked_moves:
     point = self.encoder.decode_point_index(moves[move_idx])
     if not is_point_an_eye(gs.board, point, gs.nplayer):
       if self.collector is not None:
         self.collector.record_decision(state=board_tensor, action=moves[move_idx])
       return Move.play(point)
   return Move.pass_turn()
Exemple #7
0
 def select_move(self, gs):
   eps = 1e-6
   num_actions = self.encoder.num_points()
   move_probs = self.predict(gs)
   #scaling, clipping and re-normalizing move probabilities to reduce ambiguity,
   #then sample from the rescaled moves
   move_probs = move_probs ** 3
   move_probs = np.clip(move_probs, eps, 1 - eps)
   move_probs = move_probs / np.sum(move_probs)
   candidates = np.arange(num_actions)
   actions = np.random.choice(candidates, num_actions, replace=False, p=move_probs)
   for point_idx in actions:
     point = self.encoder.decode_point_index(point_idx)
     if gs.is_valid_move(Move.play(point)) and not is_point_an_eye(gs.board, point, gs.nplayer):
       return Move.play(point)
   return Move.pass_turn()
Exemple #8
0
 def random_move_(self, game_state):
     idxes = np.arange(len(self.cache))
     np.random.shuffle(idxes)
     for idx in idxes:
         m = Move.play(self.cache[idx])
         if (game_state.is_valid_move(m) and not is_point_an_eye(
                 game_state.board, m.pt, game_state.nplayer)):
             return m
     return Move.pass_turn()
Exemple #9
0
 def legal_moves(self):
   if self.is_over():
     return list()
   ret = [Move.pass_turn(), Move.resign()]
   for ri in range(1, self.board.sz + 1):
     for ci in range(1, self.board.sz + 1):
       m = Move.play(Point(ri, ci))
       if self.is_valid_move(m):
         ret.append(m)
   return ret
Exemple #10
0
 def test_encode(self):
     encoder = get_encoder_by_name('sevenplane', 9)
     gs = GameState.new_game(9)
     gs = gs.apply_move(Move.play(Point(2, 7)))
     gs = gs.apply_move(Move.play(Point(7, 2)))
     gs = gs.apply_move(Move.play(Point(3, 6)))
     gs = gs.apply_move(Move.play(Point(6, 3)))
     gs = gs.apply_move(Move.play(Point(3, 7)))
     gs = gs.apply_move(Move.play(Point(2, 6)))
     gs = gs.apply_move(Move.play(Point(2, 5)))
     code = encoder.encode(gs)
     self.assertEqual(1., code[0][1][5])
Exemple #11
0
 def encode(self, game_state):
     board_tensor = np.zeros(self.shape())
     base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3}
     for row in range(self.sz):
         for col in range(self.sz):
             p = Point(r=row + 1, c=col + 1)
             gostring = game_state.board.get_go_string_(p)
             if gostring is None:
                 if game_state.does_move_violate_ko_(
                         game_state.nplayer, Move.play(p)):
                     board_tensor[6][row][col] = 1
             else:
                 liberty_plane = min(3, gostring.num_liberties) - 1
                 liberty_plane += base_plane[gostring.color]
                 board_tensor[liberty_plane][row][col] = 1
     return board_tensor
Exemple #12
0
def main():
    args = parse_args()
    with open(args.file) as fd:
        data = fd.read()
        sgf = sgf_game.from_string(data)
        gs, first_move_done = new_game_from_handicap(sgf)
        print_board(gs.board)
        for item in sgf.main_sequence_iter():
            color, move_tuple = item.get_move()
            point = None
            if color is not None:
                if move_tuple is not None:
                    row, col = move_tuple
                    point = Point(row + 1, col + 1)
                    move = Move.play(point)
                    print('Move ({},{})'.format(row + 1, col + 1))
                else:
                    move = Move.pass_turn()
                gs = gs.apply_move(move)
                print_board(gs.board)
Exemple #13
0
 def encode(self, game_state):
     board_tensor = np.zeros(self.shape())
     base_plane = {game_state.nplayer: 0, game_state.nplayer.other: 3}
     next_player = game_state.nplayer
     if next_player == Player.white:
         board_tensor[8] = 1
     else:
         board_tensor[9] = 1
     for r in range(self.sz):
         for c in range(self.sz):
             p = Point(r + 1, c + 1)
             string = game_state.board.get_go_string_(p)
             if string is None:
                 if game_state.does_move_violate_ko_(
                         next_player, Move.play(p)):
                     board_tensor[10][r][c] = 1
             else:
                 liberty_plane = min(4, string.num_liberties) - 1
                 liberty_plane += base_plane[string.color]
                 board_tensor[liberty_plane][r][c] = 1
     return board_tensor
Exemple #14
0
def main():
    board_size = 3
    game = GameState.new_game(board_size)
    #bot = MinimaxAgent(5, None)
    bot = MCTSAgent(362000, 0.1, 64)
    while not game.is_over():
        print_board(game.board)
        if game.nplayer == Player.black:
            human_move = input('-- ')
            point = point_from_coord(human_move.strip())
            move = Move.play(point)
        else:
            move = bot.select_move(game)
        print_move(game.nplayer, move)
        game = game.apply_move(move)
    print_board(game.board)
    winner = game.winner()
    if winner is None:
        print("Tie")
    elif winner == Player.black:
        print("Human won")
    else:
        print("Bot won")
Exemple #15
0
def gtp_position_to_coord(gtp_position):
  col_str, row_str = gtp_position[0], gtp_position[1:]
  point = Point(int(row_str), COLS.find(col_str.upper()) + 1)
  return Move.play(point)
Exemple #16
0
 def decode_move_index(self, index):
     if index == self.sz * self.sz:
         return Move.pass_turn()
     row = index // self.sz
     col = index % self.sz
     return Move.play(Point(row + 1, col + 1))
Exemple #17
0
 def legal_moves(self):
     if self.is_over():
         return list()
     ret = [Move.play(pt) for pt in self.empty_positions_()]
     ret.append(Move.pass_turn())
     return ret
Exemple #18
0
 def encode_point(self, pt):
     return self.encode_move(Move.play(pt))