def rl_loop():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    if goparams.DUMMY_MODEL:
        # monkeypatch the hyperparams so that we get a quickly executing network.
        dual_net.get_default_hyperparams = lambda **kwargs: {
            'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

        dual_net.TRAIN_BATCH_SIZE = 16
        dual_net.EXAMPLES_PER_GENERATION = 64

        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    qmeas.stop_time('selfplay_wait')
    print("Gathering game output...")
    gather()

    print("Training on gathered game data...")
    _, model_name = get_latest_model()
    new_model = train()


    if goparams.EVALUATE_PUZZLES:


      qmeas.start_time('puzzle')
      new_model_path = os.path.join(MODELS_DIR, new_model)
      sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
      ]
      result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
      print('accuracy = ', total_pct)
      qmeas.record('puzzle_total', total_pct)
      qmeas.record('puzzle_result', repr(result))
      qmeas.record('puzzle_summary', {'results': repr(result), 'total_pct': total_pct, 'model': new_model})
      qmeas._flush()
      with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
      qmeas.stop_time('puzzle')
      if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        with open('TERMINATE_FLAG', 'w') as f:
          f.write(repr(result))
          f.write('\n' + str(total_pct) + '\n')


    if goparams.EVALUATE_MODELS:
      if not evaluate(model_name, new_model):
        bury_latest_model()
def rl_loop_eval():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    (_, new_model) = get_latest_model()

    qmeas.start_time('puzzle')
    new_model_path = os.path.join(MODELS_DIR, new_model)
    sgf_files = [
        './benchmark_sgf/9x9_pro_YKSH.sgf',
        './benchmark_sgf/9x9_pro_IYMD.sgf',
        './benchmark_sgf/9x9_pro_YSIY.sgf',
        './benchmark_sgf/9x9_pro_IYHN.sgf',
    ]
    result, total_pct = predict_games.report_for_puzzles_parallel(
        new_model_path, sgf_files, 2, tries_per_move=1)
    #result, total_pct = predict_games.report_for_puzzles(new_model_path, sgf_files, 2, tries_per_move=1)
    print('accuracy = ', total_pct)
    print('result = ', result)
    mlperf_log.minigo_print(key=mlperf_log.EVAL_ACCURACY,
                            value={
                                "epoch": iteration,
                                "value": total_pct
                            })
    mlperf_log.minigo_print(key=mlperf_log.EVAL_TARGET,
                            value=goparams.TERMINATION_ACCURACY)
    qmeas.record('puzzle_total', total_pct)
    qmeas.record('puzzle_result', repr(result))
    qmeas.record('puzzle_summary', {
        'results': repr(result),
        'total_pct': total_pct,
        'model': new_model
    })
    qmeas._flush()
    with open(os.path.join(BASE_DIR, new_model + '-puzzles.txt'), 'w') as f:
        f.write(repr(result))
        f.write('\n' + str(total_pct) + '\n')
    qmeas.stop_time('puzzle')
    if total_pct >= goparams.TERMINATION_ACCURACY:
        print('Reaching termination accuracy; ', goparams.TERMINATION_ACCURACY)
        mlperf_log.minigo_print(key=mlperf_log.RUN_STOP,
                                value={"success": True})
        with open('TERMINATE_FLAG', 'w') as f:
            f.write(repr(result))
            f.write('\n' + str(total_pct) + '\n')
    qmeas.end()
Beispiel #3
0
def rl_loop():
    """Run the reinforcement learning loop

    This is meant to be more of an integration test than a realistic way to run
    the reinforcement learning.
    """
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
        'k': 8,
        'fc_width': 16,
        'num_shared_layers': 1,
        'l2_strength': 1e-4,
        'momentum': 0.9
    }

    dual_net.TRAIN_BATCH_SIZE = 16
    dual_net.EXAMPLES_PER_GENERATION = 64

    #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
    preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    # with tempfile.TemporaryDirectory() as base_dir:
    base_dir = "/tmp/minigo"
    with open('/tmp/foo', 'w') as fff:
        working_dir = os.path.join(base_dir, 'models_in_training')
        model_save_path = os.path.join(base_dir, 'models', '000000-bootstrap')
        next_model_save_file = os.path.join(base_dir, 'models',
                                            '000001-nextmodel')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        holdout_dir = os.path.join(base_dir, 'data', 'holdout',
                                   '000000-bootstrap')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.makedirs(os.path.join(base_dir, 'data'), exist_ok=True)

        print("Creating random initial weights...")
        main.bootstrap(working_dir, model_save_path)
        for i in range(100):
            qmeas.start_time('main-loop')
            print("Playing some games...")
            # Do two selfplay runs to test gather functionality
            qmeas.start_time('main-loop-self-play')
            for j in range(2):
                main.selfplay(load_file=model_save_path,
                              output_dir=model_selfplay_dir,
                              output_sgf=sgf_dir,
                              holdout_pct=0,
                              readouts=10)
            qmeas.stop_time('main-loop-self-play')
            # Do one holdout run to test validation
            qmeas.start_time('main-loop-self-play-holdout')
            main.selfplay(load_file=model_save_path,
                          holdout_dir=holdout_dir,
                          output_dir=model_selfplay_dir,
                          output_sgf=sgf_dir,
                          holdout_pct=100,
                          readouts=10)
            qmeas.stop_time('main-loop-self-play-holdout')

            print("See sgf files here?")
            sgf_listing = subprocess.check_output(
                ["ls", "-l", sgf_dir + "/full"])
            print(sgf_listing.decode("utf-8"))

            print("Gathering game output...")
            qmeas.start_time('main-loop-gather')
            main.gather(input_directory=selfplay_dir,
                        output_directory=gather_dir)
            qmeas.stop_time('main-loop-gather')
            print("Training on gathered game data...")
            qmeas.start_time('main-loop-train')
            main.train(working_dir,
                       gather_dir,
                       next_model_save_file,
                       generation_num=1)
            qmeas.stop_time('main-loop-train')
            print("Trying validate on 'holdout' game...")
            qmeas.start_time('main-loop-validate')
            main.validate(working_dir, holdout_dir)
            qmeas.stop_time('main-loop-validate')
            print("Verifying that new checkpoint is playable...")
            main.selfplay(load_file=next_model_save_file,
                          holdout_dir=holdout_dir,
                          output_dir=model_selfplay_dir,
                          output_sgf=sgf_dir,
                          readouts=10)
            qmeas.stop_time('main-loop')
            qmeas._flush()
Beispiel #4
0
def rl_loop():
    """Run the reinforcement learning loop

    This is meant to be more of an integration test than a realistic way to run
    the reinforcement learning.
    """
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
        'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

    dual_net.TRAIN_BATCH_SIZE = 16
    dual_net.EXAMPLES_PER_GENERATION = 64

    #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
    preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    # with tempfile.TemporaryDirectory() as base_dir:
    base_dir = "/tmp/minigo"
    with open('/tmp/foo', 'w') as fff:
        working_dir = os.path.join(base_dir, 'models_in_training')
        model_save_path = os.path.join(base_dir, 'models', '000000-bootstrap')
        next_model_save_file = os.path.join(base_dir, 'models', '000001-nextmodel')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        holdout_dir = os.path.join(
            base_dir, 'data', 'holdout', '000000-bootstrap')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.makedirs(os.path.join(base_dir, 'data'), exist_ok=True)

        print("Creating random initial weights...")
        main.bootstrap(working_dir, model_save_path)
        for i in range(100):
          qmeas.start_time('main-loop')
          print("Playing some games...")
          # Do two selfplay runs to test gather functionality
          qmeas.start_time('main-loop-self-play')
          for j in range(2):
            main.selfplay(
                load_file=model_save_path,
                output_dir=model_selfplay_dir,
                output_sgf=sgf_dir,
                holdout_pct=0,
                readouts=10)
          qmeas.stop_time('main-loop-self-play')
          # Do one holdout run to test validation
          qmeas.start_time('main-loop-self-play-holdout')
          main.selfplay(
              load_file=model_save_path,
              holdout_dir=holdout_dir,
              output_dir=model_selfplay_dir,
              output_sgf=sgf_dir,
              holdout_pct=100,
              readouts=10)
          qmeas.stop_time('main-loop-self-play-holdout')

          print("See sgf files here?")
          sgf_listing = subprocess.check_output(["ls", "-l", sgf_dir + "/full"])
          print(sgf_listing.decode("utf-8"))

          print("Gathering game output...")
          qmeas.start_time('main-loop-gather')
          main.gather(input_directory=selfplay_dir, output_directory=gather_dir)
          qmeas.stop_time('main-loop-gather')
          print("Training on gathered game data...")
          qmeas.start_time('main-loop-train')
          main.train(working_dir, gather_dir, next_model_save_file, generation_num=1)
          qmeas.stop_time('main-loop-train')
          print("Trying validate on 'holdout' game...")
          qmeas.start_time('main-loop-validate')
          main.validate(working_dir, holdout_dir)
          qmeas.stop_time('main-loop-validate')
          print("Verifying that new checkpoint is playable...")
          main.selfplay(
              load_file=next_model_save_file,
              holdout_dir=holdout_dir,
              output_dir=model_selfplay_dir,
              output_sgf=sgf_dir,
              readouts=10)
          qmeas.stop_time('main-loop')
          qmeas._flush()