def comparison_fn(board, data): py_board = chess.Board() py_board.kings = board.kings py_board.queens = board.queens py_board.rooks = board.rooks py_board.bishops = board.bishops py_board.knights = board.knights py_board.pawns = board.pawns py_board.occupied = board.occupied py_board.occupied_co[True] = board.occupied_w py_board.occupied_co[False] = board.occupied_b py_board.castling_rights = board.castling_rights py_board.ep_square = board.ep_square py_board.turn = board.turn move_wasnt_in_computed_data_fn = lambda m: data.moves.get( (m.from_square, m.to_square, None if m.promotion == 0 else m.promotion)) is None moves_to_search = [ move for move in py_board.generate_legal_moves() if move_wasnt_in_computed_data_fn(move) ] stockfish_ai.position(py_board) chosen_move = stockfish_ai.go(searchmoves=moves_to_search, movetime=sf_time).bestmove yield Move( chosen_move.from_square, chosen_move.to_square, 0 if chosen_move.promotion is None else chosen_move.promotion)
def writer_fn(dict, filenames): """ The function to be returned by board_eval_data_writer_creator. """ def create_serialized_example(array_to_write): """ NOTES: 1) I should be doing this like it's done in the move generation database generation, where a board is a set of 64 uint8s. """ return tf.train.Example(features=tf.train.Features( feature={ "boards": tf.train.Feature(int64_list=tf.train.Int64List( value=array_to_write)) })).SerializeToString() writers = [ tf.python_io.TFRecordWriter(file) if for_deep_pink else open( file, 'wb') for file, for_deep_pink in zip(filenames, for_deep_pink_loss) ] number_of_boards = len(dict) if not print_frequency is None: print("Number of board configurations:", number_of_boards) start_time = time.time() cur_entry_num = 0 dict_iterator = iter(dict.items()) for writer, ratio, should_get_deep_pink_loss in zip( writers, file_ratios, for_deep_pink_loss): if not should_get_deep_pink_loss: pickle.dump( { next(dict_iterator) for _ in range( int(math.floor(ratio * number_of_boards))) }, writer, pickle.HIGHEST_PROTOCOL) cur_entry_num += int(math.floor(ratio * number_of_boards)) if not print_frequency is None: print( cur_entry_num, "boards writen (just completed a file's pickle.dump)") else: for _ in range(int(math.floor(ratio * number_of_boards))): if not print_frequency is None and cur_entry_num % print_frequency == 0: print( cur_entry_num, "total boards writen. The time since the previous print:", time.time() - start_time) start_time = time.time() cur_board_data, cur_data = next(dict_iterator) temp_board = board_state_from_dict_key(cur_board_data) original_board_data_to_write = get_feature_array( cur_board_data) # Get the maximum number of times a move was chosen from the current position # most_chosen_move= max(cur_data.moves, key=cur_data.moves.get) max_move_count = max(cur_data.moves.values()) # Get the set of moves chosen max_move_count number of times most_chosen_moves = [ move for move in cur_data.moves.keys() if cur_data.moves.get(move) == max_move_count ] most_chosen_move_picked = most_chosen_moves[ random.randrange(len(most_chosen_moves))] most_chosen_data = get_feature_array( vectorized_flip_vertically( get_board_info_tuple( copy_push( temp_board, Move( most_chosen_move_picked[0], most_chosen_move_picked[1], 0 if most_chosen_move_picked[2] is None else most_chosen_move_picked[2]))))) comparison_moves = [ move for move in comparison_move_generator( temp_board, cur_data) ] if len(comparison_moves) != 0: comparison_move = comparison_moves[random.randrange( len(comparison_moves))] for_comparison_board = get_feature_array( vectorized_flip_vertically( get_board_info_tuple( copy_push(temp_board, comparison_move)))) writer.write( create_serialized_example( np.concatenate([ original_board_data_to_write, most_chosen_data, for_comparison_board ]))) # example_triplets = product([original_board_data_to_write], most_chosen_data, for_comparison_boards) # serialized_data_strings = list(map(lambda x:create_serialized_example(np.concatenate(x)), example_triplets)) # for cur_string in serialized_data_strings: # writer.write(cur_string) cur_entry_num += 1 writer.close()
def create_database_from_pgn(filenames, game_filters=[], to_collect_filters=[], post_collection_filters=[], data_writer=None, num_first_moves_to_skip=0, output_filenames=["board_config_database.csv"], print_info=True): """ A function used to generate customized chess databases from a set of pgn files. It does so using the python-chess package for pgn file parsing, and error handling (if the error lies within the pgn file itself), and uses Batch First's BoardState JitClass for move generations. :param filenames: An array of filenames (paths) for the png files of chess games to read data from. :param game_filters: A list of functions used to filter out games. Each function mast accept a python-chess Game game object and return a boolean value indicating weather the game should be filtered out or not :param to_collect_filters: A list of functions used to filter out (board, move) pairs during the parsing of the pgn files. Each function in the array must accept two parameters, the first being the current BoardState object, and the second being the next move to be made, as a Python-Chess Move object. They should return True if the (board, move) pair should be filtered out, False if not. :param post_collection_filters: An array of functions just like to_collect_filters, except that the second parameter is a BoardData object instead of a move, and the filters are applied after all pgn files have been parsed, as opposed to during. :param data_writer: A function that takes in two parameters, the first of which is a dictionary mapping string representations of boards (as determined by other parameters of this function) to BoardData objects, and the second parameter accepts the output_filenames array, which is given as another parameter of this function. If none is give, it will pickle and save the dictionary of information. :param num_first_moves_to_skip: The number of halfmoves to omit at the start of every game during collection of board data :param output_filenames: An array of filenames (paths) to be passed to the data_writer as a parameter. :print_info: A boolean value, indicating if updates on what is happening within the function should be printed NOTES: 1) Parsing errors are handled internally within the python-chess package, and the logs are being stored in the Game object's error array. """ def game_is_okay(game): for filter in game_filters: if filter(game): return False return True configs = {} for index, filename in enumerate(filenames): if print_info: print("Starting file", str(index + 1), "with", line_counter(filename), "lines") pgn_file = open(filename) cur_game = chess.pgn.read_game(pgn_file) while not cur_game is None and not game_is_okay(cur_game): cur_game = chess.pgn.read_game(pgn_file) while not cur_game is None: the_board = create_board_state_from_fen(INITIAL_BOARD_FEN) for move_num, move in enumerate(cur_game.main_line()): should_save = move_num >= num_first_moves_to_skip if should_save and to_collect_filters != []: for filter in to_collect_filters: if filter(the_board, move): should_save = False break if should_save: if the_board.turn == TURN_WHITE: white_move_info = tuple( get_board_info_tuple(the_board)) else: white_move_info = tuple( vectorized_flip_vertically( get_board_info_tuple(the_board))) if configs.get(white_move_info) is None: configs[white_move_info] = BoardData( (move.from_square, move.to_square, move.promotion)) else: configs[white_move_info].update( (move.from_square, move.to_square, move.promotion)) push_with_hash_update( the_board, Move(move.from_square, move.to_square, 0 if move.promotion is None else move.promotion)) cur_game = chess.pgn.read_game(pgn_file) while not cur_game is None and not game_is_okay(cur_game): cur_game = chess.pgn.read_game(pgn_file) pgn_file.close() if print_info: print("Applying post-collection filters to data.") to_delete = [] if post_collection_filters != []: for board_info, data in configs.items(): for filter in post_collection_filters: if filter(board_info, data): to_delete.append(board_info) break if print_info: print("Number of boards deleted by post-collection filters:", len(to_delete)) for board_info in to_delete: del configs[board_info] if print_info: print("Writing data to new file.") if data_writer is None: with open(output_filenames[0], 'wb') as writer: pickle.dump(configs, writer, pickle.HIGHEST_PROTOCOL) else: data_writer(configs, output_filenames)