def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min( 5, move_number ) / 5 # reduces the noise of the opening... plz train faster sl_value = value * value_certainty + testeval( state_fen, False) * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray( policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)
def action(self, env: GoBangEnv, can_stop=True) -> (str): """ Figures out the next best move within the specified environment and returns a string describing the action to take. :param GoBangEnv env: environment in which to figure out the action :param boolean can_stop: whether we are allowed to take no action (return None) :return: None if no action should be taken (indicating a resign). Otherwise, returns a string indicating the action to take in uci format """ self.reset() # for tl in range(self.play_config.thinking_loop): root_value, naked_value = self.search_moves(env) policy = self.calc_policy(env) my_action = int( np.random.choice(range(self.labels_n), p=self.apply_temperature(policy, env.num_halfmoves))) # if can_stop and self.play_config.resign_threshold is not None and \ # root_value <= self.play_config.resign_threshold \ # and env.num_halfmoves > self.play_config.min_resign_turn: # # noinspection PyTypeChecker # return None # else: self.moves.append( [get_state_by_input_planes(env.observation), list(policy)]) # ??list??????json.dump()???? # ????????? move = [env.observation, list(policy)] for _state, _policy in Config.flip_moves(move): self.moves.append( [get_state_by_input_planes(np.array(_state)), _policy]) return self.config.labels[my_action]
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] env = ChessEnv().reset() for state_fen, policy, value in data: move_number = int(state_fen.split(' ')[5]) # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True) # assert state_fen == f2 next_move = env.deltamove(state_fen) if next_move == None: # new game! assert state_fen == chess.STARTING_FEN env.reset() else: env.step(next_move, False) state_planes = env.canonical_input_planes() # assert env.check_current_planes(state_planes) side_to_move = state_fen.split(" ")[1] if side_to_move == 'b': #assert np.sum(policy) == 0 policy = Config.flip_policy(policy) else: #assert abs(np.sum(policy) - 1) < 1e-8 pass # if np.sum(policy) != 0: # policy /= np.sum(policy) #assert abs(np.sum(policy) - 1) < 1e-8 assert len(policy) == 1968 assert state_planes.dtype == np.float32 value_certainty = min( 15, move_number ) / 15 # reduces the noise of the opening... plz train faster SL_value = value * value_certainty + env.testeval() * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(SL_value) return np.array(state_list, dtype=np.float32), np.array( policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if not env.white_to_move: leaf_p = Config.flip_policy( leaf_p) # get it back to python-chess form return leaf_p, leaf_v
def calc_policy(self, env): """calc π(a|s0) :return: """ state = state_key(env) my_visitstats = self.tree[state] policy = np.zeros(self.labels_n) for action, a_s in my_visitstats.a.items(): policy[self.move_lookup[action]] = a_s.n policy /= np.sum(policy) if not env.white_to_move: policy = Config.flip_policy(policy) return policy
def __init__(self): # Tensorflow session import tensorflow as tf log.debug("Initializing Tensorflow session...") tf_session_config = tf.ConfigProto() tf_session_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_session_config) from chess_zero.config import Config, PlayWithHumanConfig from chess_zero.env.chess_env import ChessEnv self.chess_env_class = ChessEnv default_config = Config() PlayWithHumanConfig().update_play_config(default_config.play) self.alpha_player = self.get_player_from_model(default_config)
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if env.board.turn == chess.BLACK: leaf_p = Config.flip_policy( leaf_p) # get it back to python-chess form #np.testing.assert_array_equal(Config.flip_policy(Config.flip_policy(leaf_p)), leaf_p) return leaf_p, leaf_v
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v This gets a prediction for the policy and value of the state within the given env :return (float, float): the policy and value predictions for this state """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if not env.white_to_move: leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form return leaf_p, leaf_v
def expand_and_evaluate(self, env) -> (np.ndarray, float): """expand new leaf this is called with state locked insert P(a|s), return leaf_v :param ChessEnv env: :return: leaf_v """ if self.play_config.tablebase_access and env.board.num_pieces() <= 5: return self.tablebase_and_evaluate(env) state = env.board.gather_features(self.config.model.t_history) leaf_p, leaf_v = self.predict(state) if env.board.turn == chess.BLACK: leaf_p = Config.flip_policy(leaf_p) return leaf_p, leaf_v
def load_data_from_file(filename, t_history): # necessary to catch an exception here...? if the play data file isn't completely written yet, then some error will be thrown about a "missing delimiter", etc. data = read_game_data_from_file(filename) state_list = [] policy_list = [] value_list = [] board = MyBoard(None) board.fullmove_number = 1000 # an arbitrary large value. for state, policy, value in data: board.push_fen(state) state = board.gather_features(t_history) if board.turn == chess.BLACK: policy = Config.flip_policy(policy) state_list.append(state) policy_list.append(policy) value_list.append(value) return state_list, policy_list, value_list
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)
def start(config=Config(config_type='mini')): return ManEvaluateWorker(config).start()
def create(self): # Initial Alpha Zero setup default_config = Config() PlayWithHumanConfig().update_play_config(default_config.play) return self.get_player_from_model(default_config)
#在棋盘相应位置落相应颜色棋子 plot_chess(i + 1, j + 1, screen, no) action = f'{i}_{j}_{no}' print(action) pygame.display.flip() env.step(action) clock.tick(60) if env.white_won: put_text('白棋胜利,请重新游戏', screen, 30) else: put_text('黑棋胜利,请重新游戏', screen, 30) sleep(10) if __name__ == "__main__": try: pygame.init() pygame.mixer.init() import chess_zero.lib.tf_util as tu os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" tu.set_session_config(allow_growth=True) PvEWorker(Config(config_type='mini')).start() except SystemExit: pass except: traceback.print_exc() pygame.quit() input()
from chess_zero.lib.logger import setup_logger from chess_zero.config import Config from chess_zero.manager import create_parser, setup, logger from chess_zero.worker import sl import os import sys import multiprocessing as mp _PATH_ = os.path.dirname(os.path.dirname(__file__)) if _PATH_ not in sys.path: sys.path.append(_PATH_) mp.set_start_method('spawn') sys.setrecursionlimit(10000) parser = create_parser() args = parser.parse_args() config_type = args.type if args.cmd == 'uci': disable(999999) # plz don't interfere with uci config = Config(config_type=config_type) setup(config, args) logger.info(f"config type: {config_type}") sl.start(config)
# The gRPC serve function. # # Params: # max_workers: pool of threads to execute calls asynchronously # port: gRPC server port # # Add all your classes to the server here. # (from generated .py files by protobuf compiler) def serve(max_workers=10, port=7777): server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers)) grpc_bt_grpc.add_AlphaZeroServicer_to_server(AlphaZeroServicer(), server) server.add_insecure_port("[::]:{}".format(port)) return server if __name__ == "__main__": """ Runs the gRPC server to communicate with the Snet Daemon. """ # Initial Alpha Zero setup default_config = Config() PlayWithHumanConfig().update_play_config(default_config.play) ALPHA_ZERO_PLAYER = get_player_from_model(default_config) parser = service.common.common_parser(__file__) args = parser.parse_args(sys.argv[1:]) service.common.main_loop(serve, args)