def rl_loop_eval():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    (_, new_model) = get_latest_model()

    qmeas.start_time('puzzle')
    new_model_path = os.path.join(MODELS_DIR, new_model)
    sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
    ]
    result, total_pct = predict_games.report_for_puzzles_parallel(
        new_model_path, sgf_files, 2, tries_per_move=1)
    #result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
    print('accuracy = ', total_pct)
    print('result = ', result)
    mlperf_log.minigo_print(key=mlperf_log.EVAL_ACCURACY,
                            value={
                                "epoch": iteration,
                                "value": total_pct
                            })
    mlperf_log.minigo_print(key=mlperf_log.EVAL_TARGET,
                            value=goparams.TERMINATION_ACCURACY)
    qmeas.record('puzzle_total', total_pct)
    qmeas.record('puzzle_result', repr(result))
    qmeas.record('puzzle_summary', {
        'results': repr(result),
        'total_pct': total_pct,
        'model': new_model
    })
    qmeas._flush()
    with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
    qmeas.stop_time('puzzle')
    if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        mlperf_log.minigo_print(key=mlperf_log.RUN_STOP,
                                value={"success": True})
        with open('TERMINATE_FLAG', 'w') as f:
            f.write(repr(result))
            f.write('\n' + str(total_pct) + '\n')
    qmeas.end()
Esempio n. 2
0
def rl_loop():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    if goparams.DUMMY_MODEL:
        # monkeypatch the hyperparams so that we get a quickly executing network.
        dual_net.get_default_hyperparams = lambda **kwargs: {
            'k': 8,
            'fc_width': 16,
            'num_shared_layers': 1,
            'l2_strength': 1e-4,
            'momentum': 0.9
        }

        dual_net.TRAIN_BATCH_SIZE = 16
        dual_net.EXAMPLES_PER_GENERATION = 64

        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    qmeas.stop_time('selfplay_wait')
    print("Gathering game output...")
    gather()

    print("Training on gathered game data...")
    _, model_name = get_latest_model()
    new_model = train()

    if goparams.EVALUATE_PUZZLES:

        qmeas.start_time('puzzle')
        new_model_path = os.path.join(MODELS_DIR, new_model)
        sgf_files = [
            './benchmark_sgf/9x9_pro_YKSH.sgf',
            './benchmark_sgf/9x9_pro_IYMD.sgf',
            './benchmark_sgf/9x9_pro_YSIY.sgf',
            './benchmark_sgf/9x9_pro_IYHN.sgf',
        ]
        result, total_pct = predict_games.report_for_puzzles(new_model_path,
                                                             sgf_files,
                                                             2,
                                                             tries_per_move=1)
        print('accuracy = ', total_pct)
        mlperf_log.minigo_print(key=mlperf_log.EVAL_ACCURACY,
                                value={
                                    "iteration": iteration,
                                    "value": total_pct
                                })
        mlperf_log.minigo_print(key=mlperf_log.EVAL_TARGET,
                                value=goparams.TERMINATION_ACCURACY)

        qmeas.record('puzzle_total', total_pct)
        qmeas.record('puzzle_result', repr(result))
        qmeas.record('puzzle_summary', {
            'results': repr(result),
            'total_pct': total_pct,
            'model': new_model
        })
        qmeas._flush()
        with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'),
                  'w') as f:
            f.write(repr(result))
            f.write('\n' + str(total_pct) + '\n')
        qmeas.stop_time('puzzle')
        if total_pct >= goparams.TERMINATION_ACCURACY:
            print('Reaching termination accuracy; ',
                  goparams.TERMINATION_ACCURACY)

            mlperf_log.minigo_print(key=mlperf_log.RUN_STOP,
                                    value={"success": True})

            with open('TERMINATE_FLAG', 'w') as f:
                f.write(repr(result))
                f.write('\n' + str(total_pct) + '\n')

    if goparams.EVALUATE_MODELS:
        if not evaluate(model_name, new_model):
            bury_latest_model()
Esempio n. 3
0

if __name__ == '__main__':
    #tf.logging.set_verbosity(tf.logging.INFO)
    seed = int(sys.argv[1])
    iteration = int(sys.argv[2])
    print('Setting random seed, iteration = ', seed, iteration)
    seed = hash(seed) + iteration
    print("training seed: ", seed)
    random.seed(seed)
    tf.set_random_seed(seed)
    numpy.random.seed(seed)

    qmeas.start(os.path.join(BASE_DIR, 'stats'))
    # get TF logger
    log = logging.getLogger('tensorflow')
    log.setLevel(logging.DEBUG)

    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    # create file handler which logs even debug messages
    fh = logging.FileHandler('tensorflow.log')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    log.addHandler(fh)
    rl_loop()
    qmeas.end()
    mlperf_log.minigo_print(key=mlperf_log.EVAL_STOP, value=iteration)
Esempio n. 4
0
        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    print("Creating random initial weights...")
    bootstrap()



if __name__ == '__main__':
    #tf.logging.set_verbosity(tf.logging.INFO)
    qmeas.start(os.path.join(BASE_DIR, 'stats'))

    # get TF logger
    log = logging.getLogger('tensorflow')
    log.setLevel(logging.DEBUG)

    # create formatter and add it to the handlers
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    # create file handler which logs even debug messages
    fh = logging.FileHandler('tensorflow.log')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    log.addHandler(fh)

    # mlperf logging for starting the entire run
    mlperf_log.minigo_print(key=mlperf_log.RUN_START)

    main_fn()
    qmeas.end()