Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description='Train a NN predictor from config', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('config', help='config file for model / training')
    args = parser.parse_args()

    tstart = time.time()
    with open(args.config) as f:
        config = yaml.safe_load(f.read())

    collection_name = os.path.basename(os.path.dirname(args.config)).replace('configs_', '')
    name = os.path.basename(args.config).split('.')[0]
    outputDir = os.path.join('models', collection_name, name)
    os.makedirs(outputDir, exist_ok = True)
    tensorboard_writer = maia_chess_backend.torch.TB_wrapper(name, log_dir = os.path.join('runs', collection_name))

    with torch.cuda.device(config['device']):
        maia_chess_backend.printWithDate(f"Loading model:{config['model']}")
        net = maia_chess_backend.torch.NetFromConfigNew(config['model'])

        train_loader, test_loader, val_loader = setupLoaders(config)
        try:
            train_loop(net, config, train_loader, test_loader, val_loader, tensorboard_writer, outputDir)
        except KeyboardInterrupt:
            net.save(os.path.join(outputDir, f"net-final.pt"))

    maia_chess_backend.printWithDate(f"Done everything in {humanize.naturaldelta(time.time() - tstart)}, exiting")
def writerWorker(outputFile, inputQueue, num_readers, name):
    i = -1
    num_kill_remaining = num_readers
    tstart = time.time()
    maia_chess_backend.printWithDate("Writer created")
    with bz2.open(outputFile, 'wb') as f:
        maia_chess_backend.printWithDate(f"Created: {outputFile}")
        f.write((','.join(maia_chess_backend.full_csv_header) + '\n').encode('utf8'))
        tLast = time.time()
        while True:
            try:
                dat = inputQueue.get()
            except queue.Empty:
                #Should never happen
                break
            try:
                f.write(dat)
            except TypeError:
                if dat == 'kill':
                    num_kill_remaining -= 1
                    if num_kill_remaining <= 0:
                        break
                else:
                    raise
            else:
                i += 1
                if i % 1000 == 0 and  time.time() - tLast > logging_delay:
                    tLast = time.time()
                    maia_chess_backend.printWithDate(f"{name} Written {i} games in {humanize.naturaldelta(time.time() - tstart)}, doing {(i + 1) /(time.time() - tstart):.0f} games a second", flush = True)
    maia_chess_backend.printWithDate("Received shutdown signal to writer")
    maia_chess_backend.printWithDate(f"Done a total of {i} games in {humanize.naturaldelta(time.time() - tstart)}")
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description='Make mmapped version of csv', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('inputs', nargs = '+', help='input csv')
    parser.add_argument('outputDir', help='output dir of mmapped files')
    parser.add_argument('--nrows', type=int, help='number of rows to read in, FOR TESTING', default = None)

    parser.add_argument('--min_elo', type=int, help='min active elo', default = 1000)
    parser.add_argument('--max_elo', type=int, help='max active elo', default = 4000)
    parser.add_argument('--allow_negative_loss', type=bool, help='allow winrate losses below 0', default = False)
    parser.add_argument('--allow_low_time', type=bool, help='Include low time moves', default = False)
    parser.add_argument('--min_ply', type=int, help='min move ply to consider', default = 6)

    parser.add_argument('--nb_to_b_ratio', type=float, help='ratio fof blunders to non blunders in dataset', default = 1.5)


    #parser.add_argument('split_column', help='what to split the csvs on, i.e. is_blunder')

    #parser.add_argument('y_vals', nargs = '+', help='columns to treate as y vals')

    args = parser.parse_args()

    maia_chess_backend.printWithDate(f"Starting mmap of {', '.join(args.inputs)} writing to {args.outputDir} with {', '.join(mmap_columns)}")

    with  multiprocessing.Pool(32) as pool:
        pool.starmap(run_path, [(p, args) for p in args.inputs])
    maia_chess_backend.printWithDate("Done")
Exemple #4
0
def run_path(path, args):
    #helper for multiproc
    try:
        mmap_csv(
                path,
                load_csv(path, args.nrows),
                args.outputDir,
                args,
            )
    except EOFError:
        maia_chess_backend.printWithDate(f"EOF error in: {path}")
def gamesConverter(inputQueue, outputQueue, allow_non_sf):
    #maia_chess_backend.printWithDate("Converter created")
    while True:
        try:
            #print('qsize', inputQueue.qsize())
            dat = inputQueue.get()
        except queue.Empty:
            break
        if dat == 'kill':
            outputQueue.put('kill', True, 1000)
            break
        else:
            try:
                s = maia_chess_backend.gameToCSVlines(dat, allow_non_sf = allow_non_sf)
            except maia_chess_backend.NoStockfishEvals:
                pass
            except:
                maia_chess_backend.printWithDate('error:')
                maia_chess_backend.printWithDate(dat)
                maia_chess_backend.printWithDate(traceback.format_exc())
                raise
            else:
                if len(s) > 0:
                    lines = '\n'.join(s) + '\n'
                    outputQueue.put(lines.encode('utf8'), True, 1000)
    maia_chess_backend.printWithDate("Received shutdown signal to Converter", flush = True)
def main():

    parser = argparse.ArgumentParser(description='process PGN file with stockfish annotaions into a csv file', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('input', help='input PGNs')
    parser.add_argument('outputDir', help='output CSVs dir')

    parser.add_argument('--pool', type=int, help='number of simultaneous jobs running per file', default = 30)
    parser.add_argument('--allow_non_sf', help='Allow games with no stockfish info', default = False, action="store_true")
    #parser.add_argument('--debug', help='DEBUG MODE', default = False, action="store_true")
    #parser.add_argument('--readers', type=int, help='number of simultaneous reader running per inputfile', default = 24)
    parser.add_argument('--queueSize', type=int, help='Max number of games to cache', default = 1000)

    args = parser.parse_args()

    maia_chess_backend.printWithDate(f"Starting CSV conversion of {args.input} writing to {args.outputDir}")

    os.makedirs(args.outputDir, exist_ok=True)

    name = os.path.basename(args.input).split('.')[0]
    outputName = os.path.join(args.outputDir, f"{name}.csv.bz2")
    #names[n] = (name, outputName)

    maia_chess_backend.printWithDate(f"Loading file: {name}")
    maia_chess_backend.printWithDate(f"Starting main loop")

    tstart = time.time()
    with multiprocessing.Manager() as manager:
        with multiprocessing.Pool(args.pool) as workers_pool, multiprocessing.Pool(3) as io_pool:
            pgnReader, gameReader, writer, unproccessedQueue, resultsQueue = processPGN(args.input, name, outputName, args.queueSize, args.pool, args.allow_non_sf, manager, workers_pool, io_pool)

            maia_chess_backend.printWithDate(f"Done loading Queues in {humanize.naturaldelta(time.time() - tstart)}, waiting for reading to finish")

            cleanup(pgnReader, gameReader, writer)
def readerWorker(inputPath, unproccessedQueue, resultsQueue, name, num_readers):
    tstart = time.time()
    gamesFile = maia_chess_backend.LightGamesFile(inputPath, just_games = True)
    try:
        tLast = time.time()
        for i, (_, gs) in enumerate(gamesFile):
            unproccessedQueue.put(gs, True, 1000)
            if i % 1000 == 0 and  time.time() - tLast > logging_delay:
                tLast = time.time()
                maia_chess_backend.printWithDate(f"{name} Loaded {i} games, input queue depth: {unproccessedQueue.qsize()}, ouput queue depth: {resultsQueue.qsize()}", flush = True)
    except (EOFError, StopIteration):
        pass

    maia_chess_backend.printWithDate(f"{name} Done loading Queue in {humanize.naturaldelta(time.time() - tstart)}, sending kills")
    for i in range(num_readers):
        #maia_chess_backend.printWithDate(f"Putting kill number {i} in queue")
        unproccessedQueue.put('kill', True, 100)
def processPGN(gamesPath, inputName, outputName, queueSize, poolSize, allow_non_sf, manager, workers_pool, io_pool):
    unproccessedQueue = manager.Queue(queueSize)
    resultsQueue = manager.Queue(queueSize)

    readers = []
    for _ in range(poolSize - 1):
        reader = workers_pool.apply_async(gamesConverter, (unproccessedQueue, resultsQueue, allow_non_sf))
        readers.append(reader)
    maia_chess_backend.printWithDate(f"{inputName} Started {len(readers)} readers", flush = True)
    pgnReader = io_pool.apply_async(readerWorker, (gamesPath, unproccessedQueue, resultsQueue, inputName, len(readers)))
    maia_chess_backend.printWithDate(f"{inputName} loader created")

    writer = io_pool.apply_async(writerWorker, (outputName, resultsQueue, len(readers), inputName))
    maia_chess_backend.printWithDate(f"{inputName} Started writer for: {inputName}", flush = True)

    return pgnReader, readers, writer, unproccessedQueue, resultsQueue
Exemple #9
0
def make_df_mmaps(df, name, output_dir):
    os.makedirs(output_dir, exist_ok = True)

    mmaps = {}
    maia_chess_backend.printWithDate(f"Making y_vals mmaps for: {name}", flush = True)
    for y_name in mmap_columns:
        make_var_mmap(y_name, output_dir, mmaps, df)
        #print(y_name, end = ' ', flush = True)

    make_game_id_mmap(output_dir, mmaps, df)

    maia_chess_backend.printWithDate(f"Making move array mmaps for: {name}", flush = True)
    make_move_mmap(output_dir, mmaps, df)

    maia_chess_backend.printWithDate(f"Making boards array mmaps for: {name}", flush = True)
    make_board_mmap(output_dir, mmaps, df)
def cleanup(pgnReaders, gameReaders, writers):

    #time.sleep(10)
    while len(gameReaders) > 0:
        for i in range(len(gameReaders)):
            #maia_chess_backend.printWithDate(f"Checking {i} of {len(gameReaders)}", flush = True)
            try:
                gameReaders[i].get(1)
            except multiprocessing.TimeoutError:
                pass
            else:
                del gameReaders[i]
                break
    maia_chess_backend.printWithDate(f"Done processing")
    pgnReaders.get()
    maia_chess_backend.printWithDate(f"Done reading")
    writers.get()
    maia_chess_backend.printWithDate(f"Done cleanup")
Exemple #11
0
def get_latest_chunks(path):
    chunks = []
    maia_chess_backend.printWithDate(f"found {glob.glob(path)} chunk dirs")
    for d in glob.glob(path):
        maia_chess_backend.printWithDate(f"found {len(chunks)} chunks",
                                         end='\r')
        chunks += glob.glob(os.path.join(d, '*.gz'))
    maia_chess_backend.printWithDate(f"found {len(chunks)} chunks total")
    if len(chunks) < 10:
        print("Not enough chunks {}".format(len(chunks)))
        sys.exit(1)
    if len(chunks) < 1000:
        print("There are not very many chunks so results may be unstable")

    print("sorting {} chunks...".format(len(chunks)), end='')
    chunks.sort(key=os.path.getmtime, reverse=True)
    print("[done]")
    print("{} - {}".format(os.path.basename(chunks[-1]),
                           os.path.basename(chunks[0])))
    random.shuffle(chunks)
    return chunks
Exemple #12
0
def train_loop(net, config, train_loader, test_loader, val_loader, tensorboard_writer, outputDir):
    maia_chess_backend.printWithDate(f"Starting training loop")
    if torch.cuda.is_available():
        net.cuda()

    lastFewAcs = []

    optimizer = torch.optim.Adam(
                    net.parameters(),
                    lr = config['training']['lr_intial'],
                    #momentum = 0.9,
                    weight_decay = 0.0001,
                    betas = (0.9, 0.999),
                    eps = 1e-8,
                    #nesterov = True,
                    )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
                    optimizer,
                    milestones=config['training']['lr_steps'],
                    gamma=config['training']['lr_gamma'],
                    )
    loss_reg = torch.nn.MSELoss(reduction='mean')
    loss_class = torch.nn.CrossEntropyLoss(ignore_index = -1)

    epoch_losses = {'count' : 0}
    step_durations = []
    tstart = time.time()
    tests = 0
    last_save = time.time()

    for step in range(config['training']['total_steps']):
        delta_t = step_train(net, train_loader, optimizer, loss_reg, loss_class, epoch_losses)

        step_durations.append(delta_t)

        if step % 100 == 0:
            i = step % config['training']['test_steps']

            maia_chess_backend.printWithDate(f"Step {step} {i /config['training']['test_steps']*100:02.0f}% {make_info_str(epoch_losses)} {(i + 1) / (time.time() - tstart):03.2f} steps/second", end = '\r')

        if step > 0 and step % config['training']['validate_steps'] == 0:

            val_results = step_validate(net, val_loader, config['training']['test_size'] * 10)
        else:
            val_results = None

        if step > 0 and step % config['training']['test_steps'] == 0:
            tests += 1
            maia_chess_backend.printWithDate(f"Training step {step} losses: {make_info_str(epoch_losses)}" + ' ' * 10)

            test_losses, accuracies = step_test(net, test_loader, loss_reg, loss_class, config['training']['test_size'])

            maia_chess_backend.printWithDate(f"Testing {tests} step {step}  losses: {make_info_str(test_losses)} accuracy: {make_info_str(accuracies)}")

            last_save, batch_acc = save_results(step, tests, last_save, net, tensorboard_writer, epoch_losses, test_losses, accuracies, val_results, optimizer, step_durations, config['training']['batch_size'], train_loader.num_blunders, train_loader.num_nonblunders, outputDir)

            if tests == 1:
                t_x, t_y = next(train_loader)
                if net.has_extras:
                    tensorboard_writer.add_graph(
                            net,
                            input_to_model = (t_x, t_y),
                            )
                else:
                    tensorboard_writer.add_graph(net,input_to_model = t_x)
            if config['training'].get('auto_stop', None) is not None and batch_acc is not None:
                lastFewAcs.append(batch_acc)
                if len(lastFewAcs) - np.argmax(lastFewAcs) > config['training'].get('auto_stop', None) - 1:
                    break
            epoch_losses = {'count' : 0}
            step_durations = []
            tstart = time.time()

        scheduler.step()

    test_losses, accuracies = step_test(net, test_loader, loss_reg, loss_class, config['training']['test_size'])
    val_results = step_validate(net, val_loader, config['training']['test_size'] * 10)
    net.save(os.path.join(outputDir, f"net-final-{step}.pt"))
    last_save, batch_acc = save_results(step, tests, last_save, net, tensorboard_writer, epoch_losses, test_losses, accuracies, val_results, optimizer, step_durations, config['training']['batch_size'], train_loader.num_blunders, train_loader.num_nonblunders, outputDir)
Exemple #13
0
def main(config_path, name, collection_name):
    output_name = os.path.join('models', collection_name, name + '.txt')

    with open(config_path) as f:
        cfg = yaml.safe_load(f.read())

    maia_chess_backend.printWithDate(yaml.dump(cfg, default_flow_style=False))

    experimental_parser = cfg['dataset'].get('experimental_v4_only_dataset',
                                             False)

    train_chunks = get_latest_chunks(cfg['dataset']['input_train'])
    test_chunks = get_latest_chunks(cfg['dataset']['input_test'])

    shuffle_size = cfg['training']['shuffle_size']
    total_batch_size = cfg['training']['batch_size']
    batch_splits = cfg['training'].get('num_batch_splits', 1)
    if total_batch_size % batch_splits != 0:
        raise ValueError('num_batch_splits must divide batch_size evenly')
    split_batch_size = total_batch_size // batch_splits
    # Load data with split batch size, which will be combined to the total batch size in tfprocess.
    maia_chess_backend.maia.ChunkParser.BATCH_SIZE = split_batch_size

    root_dir = os.path.join('models', collection_name, name)
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)
    tfprocess = maia_chess_backend.maia.TFProcess(cfg, name, collection_name)

    if experimental_parser:
        train_dataset = tf.data.Dataset.from_tensor_slices(train_chunks).shuffle(len(train_chunks)).repeat()\
                         .interleave(lambda x: tf.data.FixedLengthRecordDataset(x, 8292, compression_type='GZIP', num_parallel_reads=1).filter(sample), num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                         .shuffle(shuffle_size)\
                         .batch(split_batch_size).map(extract_inputs_outputs).prefetch(4)
    else:
        train_parser = maia_chess_backend.maia.ChunkParser(
            FileDataSrc(train_chunks.copy()),
            shuffle_size=shuffle_size,
            sample=SKIP,
            batch_size=maia_chess_backend.maia.ChunkParser.BATCH_SIZE)
        train_dataset = tf.data.Dataset.from_generator(
            train_parser.parse,
            output_types=(tf.string, tf.string, tf.string, tf.string))
        train_dataset = train_dataset.map(
            maia_chess_backend.maia.ChunkParser.parse_function)
        train_dataset = train_dataset.prefetch(4)

    shuffle_size = int(shuffle_size)
    if experimental_parser:
        test_dataset = tf.data.Dataset.from_tensor_slices(test_chunks).shuffle(len(test_chunks)).repeat()\
                         .interleave(lambda x: tf.data.FixedLengthRecordDataset(x, 8292, compression_type='GZIP', num_parallel_reads=1).filter(sample), num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                         .shuffle(shuffle_size)\
                         .batch(split_batch_size).map(extract_inputs_outputs).prefetch(4)
    else:
        test_parser = maia_chess_backend.maia.ChunkParser(
            FileDataSrc(test_chunks),
            shuffle_size=shuffle_size,
            sample=SKIP,
            batch_size=maia_chess_backend.maia.ChunkParser.BATCH_SIZE)
        test_dataset = tf.data.Dataset.from_generator(
            test_parser.parse,
            output_types=(tf.string, tf.string, tf.string, tf.string))
        test_dataset = test_dataset.map(
            maia_chess_backend.maia.ChunkParser.parse_function)
        test_dataset = test_dataset.prefetch(4)

    tfprocess.init_v2(train_dataset, test_dataset)

    tfprocess.restore_v2()

    # If number of test positions is not given
    # sweeps through all test chunks statistically
    # Assumes average of 10 samples per test game.
    # For simplicity, testing can use the split batch size instead of total batch size.
    # This does not affect results, because test results are simple averages that are independent of batch size.
    num_evals = cfg['training'].get('num_test_positions',
                                    len(test_chunks) * 10)
    num_evals = max(
        1, num_evals // maia_chess_backend.maia.ChunkParser.BATCH_SIZE)
    print("Using {} evaluation batches".format(num_evals))

    tfprocess.process_loop_v2(total_batch_size,
                              num_evals,
                              batch_splits=batch_splits)

    if cfg['training'].get('swa_output', False):
        tfprocess.save_swa_weights_v2(output_name)
    else:
        tfprocess.save_leelaz_weights_v2(output_name)

    train_parser.shutdown()
    test_parser.shutdown()
Exemple #14
0
def mmap_csv(target_path, df, outputDir, args):
    maia_chess_backend.printWithDate(f"Loading: {target_path}")
    name = os.path.basename(target_path).split('.')[0]

    #df =

    maia_chess_backend.printWithDate(f"Filtering data starting at {len(df)} rows")

    df = df[df['move_ply'] >= args.min_ply]

    if not args.allow_low_time:
        df = df[df['low_time'].eq(False)]

    if not args.allow_negative_loss:
        df = df[df['winrate_loss'] > 0]

    df = df[df['active_elo'] > args.min_elo]
    df = df[df['active_elo'] < args.max_elo]

    df = df.dropna()

    maia_chess_backend.printWithDate(f"Filtering down data to {len(df)} rows")

    df_blunder = df[df['is_blunder_wr']]
    maia_chess_backend.printWithDate(f"Found {len(df_blunder)} blunders")

    df_blunder = df_blunder.sample(frac=1).reset_index(drop=True)

    df_non_blunder = df[df['is_blunder_wr'].eq(False)]
    maia_chess_backend.printWithDate(f"Found {len(df_non_blunder)} non blunders")

    df_non_blunder = df_non_blunder.sample(frac=1).reset_index(drop=True).iloc[:int(len(df_blunder) * args.nb_to_b_ratio)]

    del df

    maia_chess_backend.printWithDate(f"Reduced to {len(df_non_blunder)} non blunders")

    maia_chess_backend.printWithDate(f"Starting mmaping")

    os.makedirs(outputDir, exist_ok = True)
    make_df_mmaps(df_blunder, name, os.path.join(outputDir, name, 'blunder'))
    del df_blunder
    make_df_mmaps(df_non_blunder, name, os.path.join(outputDir, name, 'nonblunder'))
Exemple #15
0
def load_csv(target_path, nrows):
    maia_chess_backend.printWithDate(f"Loading: {target_path}", flush = True)
    return pandas.read_csv(target_path, usecols=target_columns, nrows = nrows)