def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min( 5, move_number ) / 5 # reduces the noise of the opening... plz train faster sl_value = value * value_certainty + testeval( state_fen, False) * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray( policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] for state_fen, policy, value in data: state_planes = canon_input_planes(state_fen) if is_black_turn(state_fen): policy = Config.flip_policy(policy) move_number = int(state_fen.split(' ')[5]) value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)