def search_my_move(self, env: ChessEnv, is_root_node=False) -> float:
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)

        This method searches for possible moves, adds them to a search tree, and eventually returns the
        best move that was found during the search.

        :param ChessEnv env: environment in which to search for the move
        :param boolean is_root_node: whether this is the root node of the search.
        :return float: value of the move. This is calculated by getting a prediction
            from the value network.
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            # assert env.whitewon != env.white_to_move # side to move can't be winner!
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[state].p = leaf_p
                return leaf_v # I'm returning everything from the POV of side to move

            # SELECT STEP
            action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss

            my_visit_stats = self.tree[state]
            my_stats = my_visit_stats.a[action_t]

            my_visit_stats.sum_n += virtual_loss
            my_stats.n += virtual_loss
            my_stats.w += -virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        env.step(action_t.uci())
        leaf_v = self.search_my_move(env)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_visit_stats.sum_n += -virtual_loss + 1
            my_stats.n += -virtual_loss + 1
            my_stats.w += virtual_loss + leaf_v
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v
Exemple #2
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Exemple #3
0
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)

    me_player = None
    env = ChessEnv().reset()

    while True:
        line = input()
        words = line.rstrip().split(" ",1)
        if words[0] == "uci":
            print("id name ChessZero")
            print("id author ChessZero")
            print("uciok")
        elif words[0] == "isready":
            if not me_player:
                me_player = get_player(config)
            print("readyok")
        elif words[0] == "ucinewgame":
            env.reset()
        elif words[0] == "position":
            words = words[1].split(" ",1)
            if words[0] == "startpos":
                env.reset()
            else:
                if words[0] == "fen": # skip extraneous word
                    words = words[1].split(' ',1)
                fen = words[0]
                for _ in range(5):
                    words = words[1].split(' ',1)
                    fen += " " + words[0]
                env.update(fen)
            if len(words) > 1:
                words = words[1].split(" ",1)
                if words[0] == "moves":
                    for w in words[1].split(" "):
                        env.step(w, False)
        elif words[0] == "go":
            if not me_player:
                me_player = get_player(config)
            action = me_player.action(env, False)
            print(f"bestmove {action}")
        elif words[0] == "stop":
            pass
        elif words[0] == "quit":
            break
Exemple #4
0
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config)
    ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
Exemple #5
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    chess_model = PlayWithHuman(config)

    env = ChessEnv().reset()
    human_is_black = random() < 0.5
    chess_model.start_game(human_is_black)

    while not env.done:
        if (env.board.turn == chess.BLACK) == human_is_black:
            action = chess_model.move_by_human(env)
            print("You move to: " + action)
        else:
            action = chess_model.move_by_ai(env)
            print("AI moves to: " + action)
        board, info = env.step(action)
        env.render()
        print("Board FEN = " + board.fen())

    print("\nEnd of the game.") #spaces after this?
    print("Game result:") #and this?
    print(env.board.result())
    def play_game(self, best_model, ng_model):
        env = ChessEnv().reset()

        best_player = ChessPlayer(self.config,
                                  best_model,
                                  play_config=self.config.eval.play_config)
        ng_player = ChessPlayer(self.config,
                                ng_model,
                                play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.board.turn == chess.BLACK:
                action = black.action(observation)
            else:
                action = white.action(observation)
            board, info = env.step(action)
            observation = board.fen()

        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    start_time = time()
    total = 0.0
    total_step = 0.0

    while not env.done:
        if env.white_to_move:
            if (current_white):
                action = white.action(env)
            else:
                start_time = time()
                action = white.action_modify(env)
                total += time() - start_time
                total_step += 1
        else:
            if (current_white):
                start_time = time()
                action = black.action_modify(env)
                total += time() - start_time
                total_step += 1
            else:
                action = black.action(env)

        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    #print(f"time={total:5.1f}s total step = {total_step} average = {total/total_step:5.1f}")
    #exit()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white, total, total_step
    def search_my_move(self,
                       env: ChessEnv,
                       is_root_node=False,
                       tid=0) -> float:  #dfs to the leaf and back up
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)
        :return: leaf value
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[state].p = leaf_p
                self.tree[state].legal_moves = state_moves(env)
                return leaf_v  # I'm returning everything from the POV of side to move

            if tid in self.tree[state].visit:  # loop -> loss
                return 0

            self.tree[state].visit.append(tid)
            # SELECT STEP
            canon_action = self.select_action_q_and_u(state, is_root_node)

            virtual_loss = self.config.play.virtual_loss
            my_visit_stats = self.tree[state]
            my_visit_stats.sum_n += virtual_loss

            my_stats = my_visit_stats.a[canon_action]
            my_stats.n += virtual_loss
            my_stats.w -= virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        if env.white_to_move:
            env.step(canon_action)
        else:
            env.step(flip_move(canon_action))
        leaf_v = self.search_my_move(env, False,
                                     tid)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_visit_stats = self.tree[state]
            my_visit_stats.visit.remove(tid)
            my_visit_stats.sum_n += 1 - virtual_loss

            my_stats = my_visit_stats.a[canon_action]
            my_stats.n += 1 - virtual_loss
            my_stats.w += leaf_v + virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v
Exemple #9
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()
    # EDIT CODE HERE TO CHANGE THE ENVIRONMENT

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)
    move = 0
    failed_play = 0
    total_failed_plays = 0
    print("Match Started")
    moves_list = ""
    while not env.done:
        # CHANGES_MADE_HERE
        temp = deepcopy(env)
        black_pieces = set("prnbqk")
        white_pieces = set("PRNBQK")

        if env.white_to_move:
            x = temp.board.piece_map()
            for i in x:
                if str(x[i]) in black_pieces:
                    temp.board.remove_piece_at(i)
            action = white.action(temp)
        else:
            x = temp.board.piece_map()
            for i in x:
                if str(x[i]) in white_pieces:
                    temp.board.remove_piece_at(i)
            action = black.action(temp)
        print("Match in Progress: ",
              move,
              "Moves made in the game, Failed Plays: ",
              total_failed_plays,
              end='\r')
        try:
            env.step(action)
            moves_list += action + ', '
            failed_play = 0
            move += 1
            if env.num_halfmoves >= config.play.max_game_length:
                env.adjudicate()
        except ValueError:
            failed_play += 1
            total_failed_plays += 1
            if failed_play == 50:
                logger.warning("\nEnding the Game due to lack of development")
                env.adjudicate()
            continue

        # END_OF_CHANGES
    with open("result.csv", "a+") as fp:
        result = str(move) + ", " + str(total_failed_plays) + ", " + str(
            env.winner) + ", <" + env.board.fen()
        result += ">, Adjudicated\n" if failed_play == 50 else ">, Game End\n"
        fp.write(result)
        fp.close()

    with open("moves_list.csv", "a+") as fp:
        fp.write(moves_list)
        fp.write("\n")
        fp.close()

    if env.winner == Winner.white:
        black_win = -1
        logger.info("White wins")
    elif env.winner == Winner.black:
        black_win = 1
        logger.info("Black wins")
    else:
        black_win = 0
        logger.info("Draw Match")

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Exemple #10
0
    def search_my_move_m(self,
                         env: ChessEnv,
                         is_root_node=False,
                         version=0) -> float:
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)

        This method searches for possible moves, adds them to a search tree, and eventually returns the
        best move that was found during the search.

        :param ChessEnv env: environment in which to search for the move
        :param boolean is_root_node: whether this is the root node of the search.
        :return float: value of the move. This is calculated by getting a prediction
            from the value network.
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            # assert env.whitewon != env.white_to_move # side to move can't be winner!
            return -1

        state = state_key(env)

        if (version == 0):
            with self.node_lock[state]:
                if state not in self.tree:
                    leaf_p, leaf_v = self.expand_and_evaluate(env)
                    self.tree[state].p = leaf_p
                    return leaf_v  # I'm returning everything from the POV of side to move

                # SELECT STEP
                action_t = self.select_action_q_and_u(env, is_root_node)

                virtual_loss = self.play_config.virtual_loss_t

                my_visit_stats = self.tree[state]
                my_stats = my_visit_stats.a[action_t]

                my_visit_stats.sum_n += virtual_loss
                my_stats.n += virtual_loss
                my_stats.w += -virtual_loss
                my_stats.q = my_stats.w / my_stats.n

            env.step(action_t.uci())
            leaf_v = self.search_my_move_m(
                env, version=0)  # next move from enemy POV
            leaf_v = -leaf_v

            # BACKUP STEP
            # on returning search path
            # update: N, W, Q
            with self.node_lock[state]:
                my_visit_stats.sum_n += -virtual_loss + 1
                my_stats.n += -virtual_loss + 1
                my_stats.w += virtual_loss + leaf_v
                my_stats.q = my_stats.w / my_stats.n

        #testing version 1, using constant virtual loss
        elif (version == 1):
            with self.node_lock[state]:
                if state not in self.tree:
                    leaf_p, leaf_v = self.expand_and_evaluate(env)
                    self.tree[state].p = leaf_p
                    return leaf_v  # I'm returning everything from the POV of side to move

                # SELECT STEP
                action_t = self.select_action_q_and_u(env, is_root_node)

                virtual_loss = self.play_config.virtual_loss_t

                my_visit_stats = self.tree[state]
                my_stats = my_visit_stats.a[action_t]
                my_stats.v += -virtual_loss

            env.step(action_t.uci())
            leaf_v = self.search_my_move_m(
                env, version=1)  # next move from enemy POV
            leaf_v = -leaf_v

            # BACKUP STEP
            # on returning search path
            # update: N, W, Q
            with self.node_lock[state]:
                my_visit_stats.sum_n += 1
                my_stats.n += 1
                my_stats.w += leaf_v
                my_stats.q = my_stats.w / my_stats.n
                my_stats.v += virtual_loss

        #testing version 2, using deminishing virtual loss value
        elif (version == 2):
            with self.node_lock[state]:
                if state not in self.tree:
                    leaf_p, leaf_v = self.expand_and_evaluate(env)
                    self.tree[state].p = leaf_p
                    return leaf_v  # I'm returning everything from the POV of side to move

                # SELECT STEP
                action_t = self.select_action_q_and_u(env, is_root_node)

                my_visit_stats = self.tree[state]
                my_stats = my_visit_stats.a[action_t]

                virtual_loss = self.play_config.virtual_loss_t / (
                    my_stats.n * my_stats.n + 1)

                my_stats.v += -virtual_loss

            env.step(action_t.uci())
            leaf_v = self.search_my_move_m(
                env, version=2)  # next move from enemy POV
            leaf_v = -leaf_v

            # BACKUP STEP
            # on returning search path
            # update: N, W, Q
            with self.node_lock[state]:
                my_visit_stats.sum_n += 1
                my_stats.n += 1
                my_stats.w += leaf_v
                my_stats.q = my_stats.w / my_stats.n
                my_stats.v += virtual_loss

        else:
            print("something is wrong!")
        return leaf_v