def supervised_buffer(config, game) -> (ChessEnv, list): env = ChessEnv(config).reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] env.board = game.board() for move in game.main_line(): ai = white if env.board.turn == chess.WHITE else black ai.sl_action(env, move) env.step(move) if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.WHITE white_win = 1 elif result == '0-1': env.winner = Winner.BLACK white_win = -1 else: env.winner = Winner.DRAW white_win = 0 white.finish_game(white_win) black.finish_game(-white_win) return env, merge_data(white, black)
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int( game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(config, game) -> (ChessEnv, list): env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(game, config) -> (ChessEnv, list): env = ChessEnv().reset() black = ChessPlayer(config, dummy=True) white = ChessPlayer(config, dummy=True) result = game.headers["Result"] actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 observation = env.observation while not env.done and k < len(actions): if env.board.turn == chess.WHITE: action = white.sl_action(observation, actions[k]) #ignore=True else: action = black.sl_action(observation, actions[k]) #ignore=True board, info = env.step(action, False) observation = board.fen() k += 1 env.done = True if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data