def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min( 5, move_number ) / 5 # reduces the noise of the opening... plz train faster sl_value = value * value_certainty + testeval( state_fen, False) * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray( policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] env = ChessEnv().reset() for state_fen, policy, value in data: move_number = int(state_fen.split(' ')[5]) # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True) # assert state_fen == f2 next_move = env.deltamove(state_fen) if next_move == None: # new game! assert state_fen == chess.STARTING_FEN env.reset() else: env.step(next_move, False) state_planes = env.canonical_input_planes() # assert env.check_current_planes(state_planes) side_to_move = state_fen.split(" ")[1] if side_to_move == 'b': #assert np.sum(policy) == 0 policy = Config.flip_policy(policy) else: #assert abs(np.sum(policy) - 1) < 1e-8 pass # if np.sum(policy) != 0: # policy /= np.sum(policy) #assert abs(np.sum(policy) - 1) < 1e-8 assert len(policy) == 1968 assert state_planes.dtype == np.float32 value_certainty = min( 15, move_number ) / 15 # reduces the noise of the opening... plz train faster SL_value = value * value_certainty + env.testeval() * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(SL_value) return np.array(state_list, dtype=np.float32), np.array( policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if not env.white_to_move: leaf_p = Config.flip_policy( leaf_p) # get it back to python-chess form return leaf_p, leaf_v
def calc_policy(self, env): """calc π(a|s0) :return: """ state = state_key(env) my_visitstats = self.tree[state] policy = np.zeros(self.labels_n) for action, a_s in my_visitstats.a.items(): policy[self.move_lookup[action]] = a_s.n policy /= np.sum(policy) if not env.white_to_move: policy = Config.flip_policy(policy) return policy
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if env.board.turn == chess.BLACK: leaf_p = Config.flip_policy( leaf_p) # get it back to python-chess form #np.testing.assert_array_equal(Config.flip_policy(Config.flip_policy(leaf_p)), leaf_p) return leaf_p, leaf_v
def expand_and_evaluate(self, env) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v This gets a prediction for the policy and value of the state within the given env :return (float, float): the policy and value predictions for this state """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") if not env.white_to_move: leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form return leaf_p, leaf_v
def expand_and_evaluate(self, env) -> (np.ndarray, float): """expand new leaf this is called with state locked insert P(a|s), return leaf_v :param ChessEnv env: :return: leaf_v """ if self.play_config.tablebase_access and env.board.num_pieces() <= 5: return self.tablebase_and_evaluate(env) state = env.board.gather_features(self.config.model.t_history) leaf_p, leaf_v = self.predict(state) if env.board.turn == chess.BLACK: leaf_p = Config.flip_policy(leaf_p) return leaf_p, leaf_v
def load_data_from_file(filename, t_history): # necessary to catch an exception here...? if the play data file isn't completely written yet, then some error will be thrown about a "missing delimiter", etc. data = read_game_data_from_file(filename) state_list = [] policy_list = [] value_list = [] board = MyBoard(None) board.fullmove_number = 1000 # an arbitrary large value. for state, policy, value in data: board.push_fen(state) state = board.gather_features(t_history) if board.turn == chess.BLACK: policy = Config.flip_policy(policy) state_list.append(state) policy_list.append(policy) value_list.append(value) return state_list, policy_list, value_list
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)