Beispiel #1
0
def supervised_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv(config).reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    env.board = game.board()
    for move in game.main_line():
        ai = white if env.board.turn == chess.WHITE else black
        ai.sl_action(env, move)
        env.step(move)

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.WHITE
        white_win = 1
    elif result == '0-1':
        env.winner = Winner.BLACK
        white_win = -1
    else:
        env.winner = Winner.DRAW
        white_win = 0

    white.finish_game(white_win)
    black.finish_game(-white_win)
    return env, merge_data(white, black)
Beispiel #2
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Beispiel #3
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(
        game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)

    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation,
                                     actions[k],
                                     weight=white_weight)  #ignore=True
        else:
            action = black.sl_action(env.observation,
                                     actions[k],
                                     weight=black_weight)  #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Beispiel #4
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
def self_play_buffer(config, cur) -> (GoBangEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (GoBangEnv,list((str,list(float)): a tuple containing the final GoBangEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop() # borrow
    env = GoBangEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        # pretty_print_panel(env.board.panel)
        # print(f'After action:{action}')
        env.step(action)
        # pretty_print_panel(env.board.panel)
        # print()
        # if env.num_halfmoves >= config.play.max_game_length:
        #     env.adjudicate()

    if env.winner == Winner.white:
        black_score, white_score = -1, 1
    elif env.winner == Winner.black:
        black_score, white_score = 1, -1
    else:
        black_score, white_score = -0.5, -0.5

    black.finish_game(black_score)
    white.finish_game(white_score)

    data = []
    for i in range(len(black.moves)):
        data.append(black.moves[i])
        if i < len(white.moves):
            data.append(white.moves[i])
            
    pretty_print_panel(env.board.panel)
    print()
    #print(data)
    
    cur.append(pipes)
    return env, data
Beispiel #6
0
def get_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Beispiel #7
0
def get_buffer(game, config) -> (ChessEnv, list):
    env = ChessEnv().reset()
    black = ChessPlayer(config, dummy=True)
    white = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    observation = env.observation
    while not env.done and k < len(actions):
        if env.board.turn == chess.WHITE:
            action = white.sl_action(observation, actions[k])  #ignore=True
        else:
            action = black.sl_action(observation, actions[k])  #ignore=True
        board, info = env.step(action, False)
        observation = board.fen()
        k += 1

    env.done = True
    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Beispiel #8
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()
    search_tree = defaultdict(VisitStats)

    white = ChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = ChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves >= config.play.max_game_length or cc >= 4:
            env.adjudicate()
    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Beispiel #9
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ChessEnv|None env:
        :param chess_zero.agent.model_chess.ChessModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env     # type: ChessEnv
        self.black = None  # type: ChessPlayer
        self.white = None  # type: ChessPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.start_game(idx)
            end_time = time()
            logger.debug(f"game {idx} time={end_time - start_time} sec, "
                         f"turn={env.turn}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def start_game(self, idx):
        self.env.reset()
        self.black = ChessPlayer(self.config, self.model)
        self.white = ChessPlayer(self.config, self.model)
        observation = self.env.observation
        while not self.env.done:
            if self.env.board.turn == chess.BLACK:
                action = self.black.action(observation)
            else:
                action = self.white.action(observation)
            board, info = self.env.step(action)
            observation = board.fen()
        self.finish_game()
        self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

    def load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model
Beispiel #10
0
class SupervisedLearningWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ChessEnv|None env:
        :param chess_zero.agent.model_chess.ChessModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env     # type: ChessEnv
        self.black = None  # type: ChessPlayer
        self.white = None  # type: ChessPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.read_game(idx)
            end_time = time()
            logger.debug(f"game {idx} time={end_time - start_time} sec, "
                         f"turn={int(env.turn/2)}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def read_game(self, idx):
        self.env.reset()
        self.black = ChessPlayer(self.config, self.model)
        self.white = ChessPlayer(self.config, self.model)
        files = find_pgn_files(self.config.resource.play_data_dir)
        if len(files) > 0:
            random.shuffle(files)
            filename = files[0]
            pgn = open(filename, errors='ignore')
            size = os.path.getsize(filename)
            pos = random.randint(0, size)
            pgn.seek(pos)

            line = pgn.readline()
            offset = 0
            # Parse game headers.
            while line:
                if line.isspace() or line.startswith("%"):
                    line = pgn.readline()
                    continue

                # Read header tags.
                tag_match = TAG_REGEX.match(line)
                if tag_match:
                    offset = pgn.tell()
                    break

                line = pgn.readline()

            pgn.seek(offset)
            game = chess.pgn.read_game(pgn)
            node = game
            result = game.headers["Result"]
            actions = []
            while not node.is_end():
                next_node = node.variation(0)
                actions.append(node.board().uci(next_node.move))
                node = next_node
            pgn.close()

            k = 0
            observation = self.env.observation
            while not self.env.done and k < len(actions):
                if self.env.board.turn == chess.BLACK:
                    action = self.black.sl_action(observation, actions[k])
                else:
                    action = self.white.sl_action(observation, actions[k])
                board, info = self.env.step(action)
                observation = board.fen()
                k += 1

            self.env.done = True
            if not self.env.board.is_game_over() and result != '1/2-1/2':
                self.env.resigned = True
            if result == '1-0':
                self.env.winner = Winner.white
            elif result == '0-1':
                self.env.winner = Winner.black
            else:
                self.env.winner = Winner.draw

            self.finish_game()
            self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
            self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

    def load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model
Beispiel #11
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()
    # EDIT CODE HERE TO CHANGE THE ENVIRONMENT

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)
    move = 0
    failed_play = 0
    total_failed_plays = 0
    print("Match Started")
    moves_list = ""
    while not env.done:
        # CHANGES_MADE_HERE
        temp = deepcopy(env)
        black_pieces = set("prnbqk")
        white_pieces = set("PRNBQK")

        if env.white_to_move:
            x = temp.board.piece_map()
            for i in x:
                if str(x[i]) in black_pieces:
                    temp.board.remove_piece_at(i)
            action = white.action(temp)
        else:
            x = temp.board.piece_map()
            for i in x:
                if str(x[i]) in white_pieces:
                    temp.board.remove_piece_at(i)
            action = black.action(temp)
        print("Match in Progress: ",
              move,
              "Moves made in the game, Failed Plays: ",
              total_failed_plays,
              end='\r')
        try:
            env.step(action)
            moves_list += action + ', '
            failed_play = 0
            move += 1
            if env.num_halfmoves >= config.play.max_game_length:
                env.adjudicate()
        except ValueError:
            failed_play += 1
            total_failed_plays += 1
            if failed_play == 50:
                logger.warning("\nEnding the Game due to lack of development")
                env.adjudicate()
            continue

        # END_OF_CHANGES
    with open("result.csv", "a+") as fp:
        result = str(move) + ", " + str(total_failed_plays) + ", " + str(
            env.winner) + ", <" + env.board.fen()
        result += ">, Adjudicated\n" if failed_play == 50 else ">, Game End\n"
        fp.write(result)
        fp.close()

    with open("moves_list.csv", "a+") as fp:
        fp.write(moves_list)
        fp.write("\n")
        fp.close()

    if env.winner == Winner.white:
        black_win = -1
        logger.info("White wins")
    elif env.winner == Winner.black:
        black_win = 1
        logger.info("Black wins")
    else:
        black_win = 0
        logger.info("Draw Match")

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data