Esempio n. 1
0
def evaluate(prev_model,
             cur_model,
             readouts=200,
             verbose=1,
             resign_threshold=0.95):
    ''' returns True if cur model should be used in future games '''
    prev_model_save_path = os.path.join(MODELS_DIR, prev_model)
    cur_model_save_path = os.path.join(MODELS_DIR, cur_model)
    game_output_dir = os.path.join(SELFPLAY_DIR, cur_model)
    game_holdout_dir = os.path.join(HOLDOUT_DIR, cur_model)
    sgf_dir = os.path.join(SGF_DIR, cur_model)
    cur_win_pct = main.evaluate_evenly(prev_model_save_path,
                                       cur_model_save_path,
                                       game_output_dir,
                                       readouts=readouts,
                                       games=goparams.EVAL_GAMES_PER_SIDE)

    print('Evalute Win Pct = ', cur_win_pct)

    qmeas.record('evaluate_win_pct', cur_win_pct)
    keep = False
    if cur_win_pct >= goparams.EVAL_WIN_PCT_FOR_NEW_MODEL:
        qmeas.record('evaluate_choice', 'new')
        keep = True
    else:
        qmeas.record('evaluate_choice', 'old')
        keep = False
    qmeas.record('eval_summary', {
        'win_pct': cur_win_pct,
        'model': cur_model,
        'keep': keep
    })
    # return keep
    return False
Esempio n. 2
0
def rl_loop():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    if goparams.DUMMY_MODEL:
        # monkeypatch the hyperparams so that we get a quickly executing network.
        dual_net.get_default_hyperparams = lambda **kwargs: {
            'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

        dual_net.TRAIN_BATCH_SIZE = 16
        dual_net.EXAMPLES_PER_GENERATION = 64

        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    qmeas.stop_time('selfplay_wait')
    print("Gathering game output...")
    gather()

    print("Training on gathered game data...")
    _, model_name = get_latest_model()
    new_model = train()


    if goparams.EVALUATE_PUZZLES:


      qmeas.start_time('puzzle')
      new_model_path = os.path.join(MODELS_DIR, new_model)
      sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
      ]
      result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
      print('accuracy = ', total_pct)
      qmeas.record('puzzle_total', total_pct)
      qmeas.record('puzzle_result', repr(result))
      qmeas.record('puzzle_summary', {'results': repr(result), 'total_pct': total_pct, 'model': new_model})
      qmeas._flush()
      with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
      qmeas.stop_time('puzzle')
      if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        with open('TERMINATE_FLAG', 'w') as f:
          f.write(repr(result))
          f.write('\n' + str(total_pct) + '\n')


    if goparams.EVALUATE_MODELS:
      if not evaluate(model_name, new_model):
        bury_latest_model()
Esempio n. 3
0
def evaluate(prev_model, cur_model, readouts=200, verbose=1, resign_threshold=0.95):
    ''' returns True if cur model should be used in future games '''
    prev_model_save_path = os.path.join(MODELS_DIR, prev_model)
    cur_model_save_path = os.path.join(MODELS_DIR, cur_model)
    game_output_dir = os.path.join(SELFPLAY_DIR, cur_model)
    game_holdout_dir = os.path.join(HOLDOUT_DIR, cur_model)
    sgf_dir = os.path.join(SGF_DIR, cur_model)
    cur_win_pct = main.evaluate_evenly(prev_model_save_path, cur_model_save_path, game_output_dir, readouts=readouts, games=goparams.EVAL_GAMES_PER_SIDE)

    print('Evalute Win Pct = ', cur_win_pct)

    qmeas.record('evaluate_win_pct', cur_win_pct)
    keep = False
    if cur_win_pct >= goparams.EVAL_WIN_PCT_FOR_NEW_MODEL:
      qmeas.record('evaluate_choice', 'new')
      keep = True
    else:
      qmeas.record('evaluate_choice', 'old')
      keep = False
    qmeas.record('eval_summary', {'win_pct': cur_win_pct, 'model': cur_model, 'keep': keep})
    return keep 
def rl_loop_eval():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    (_, new_model) = get_latest_model()

    qmeas.start_time('puzzle')
    new_model_path = os.path.join(MODELS_DIR, new_model)
    sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
    ]
    result, total_pct = predict_games.report_for_puzzles_parallel(
        new_model_path, sgf_files, 2, tries_per_move=1)
    #result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
    print('accuracy = ', total_pct)
    print('result = ', result)
    mlperf_log.minigo_print(key=mlperf_log.EVAL_ACCURACY,
                            value={
                                "epoch": iteration,
                                "value": total_pct
                            })
    mlperf_log.minigo_print(key=mlperf_log.EVAL_TARGET,
                            value=goparams.TERMINATION_ACCURACY)
    qmeas.record('puzzle_total', total_pct)
    qmeas.record('puzzle_result', repr(result))
    qmeas.record('puzzle_summary', {
        'results': repr(result),
        'total_pct': total_pct,
        'model': new_model
    })
    qmeas._flush()
    with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
    qmeas.stop_time('puzzle')
    if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        mlperf_log.minigo_print(key=mlperf_log.RUN_STOP,
                                value={"success": True})
        with open('TERMINATE_FLAG', 'w') as f:
            f.write(repr(result))
            f.write('\n' + str(total_pct) + '\n')
    qmeas.end()