Example #1
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  for file_name in [
        "target.pb", "target_raw.ckpt.data-00000-of-00001",
        "target_raw.ckpt.index", "target_raw.ckpt.meta"]:
    shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name,
                os.path.join(fsdb.models_dir(), file_name))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  with logged_timer('Total time'):
    try:
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
async def start_selfplay():
    output_dir = os.path.join(fsdb.selfplay_dir(), "$MODEL")
    holdout_dir = os.path.join(fsdb.holdout_dir(), "$MODEL")
    model_pattern = os.path.join(fsdb.models_dir(), '%d.pb')

    logs = []
    processes = []
    loop = asyncio.get_event_loop()
    for i, device in enumerate(FLAGS.selfplay_devices):
        cmd = [
            'bazel-bin/cc/concurrent_selfplay',
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'selfplay.flags')),
            '--run_forever=1',
            '--device={}'.format(device),
            '--model={}'.format(model_pattern),
            '--output_dir={}/{}'.format(output_dir, i),
            '--holdout_dir={}/{}'.format(holdout_dir, i)]

        cmd_str = await expand_cmd_str(cmd)
        f = open(os.path.join(FLAGS.base_dir, 'selfplay_%d.log' % i), 'w')
        f.write(cmd_str + '\n\n')
        f.flush()
        logging.info('Running: %s', cmd_str)

        processes.append(await asyncio.create_subprocess_exec(
            *cmd, stdout=f, stderr=asyncio.subprocess.STDOUT))
        logs.append(f)

    return (processes, logs)
def smart_rsync(from_model_num=0, source_dir=None, dest_dir=LOCAL_DIR):
    source_dir = source_dir or fsdb.selfplay_dir()
    from_model_num = 0 if from_model_num < 0 else from_model_num
    models = [m for m in fsdb.get_models() if m[0] >= from_model_num]
    for _, model in models:
        _rsync_dir(os.path.join(source_dir, model),
                   os.path.join(dest_dir, model))
async def sample_training_examples(state):
    """Sample training examples from recent selfplay games.

    Args:
        state: the RL loop State instance.

    Returns:
        A list of golden chunks up to num_records in length, sorted by path.
    """

    dirs = [x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir()]
    src_patterns = []
    for d in sorted(dirs, reverse=True)[:FLAGS.window_size]:
        src_patterns.append(os.path.join(d, '*', '*', '*.tfrecord.zz'))

    dst_path = os.path.join(fsdb.golden_chunk_dir(),
                            '{}.tfrecord.zz'.format(state.train_model_name))

    logging.info('Writing training chunks to %s', dst_path)
    lines = await sample_records(src_patterns,
                                 dst_path,
                                 num_read_threads=8,
                                 num_write_threads=8,
                                 sample_frac=FLAGS.train_filter)
    logging.info('\n'.join(lines))

    chunk_pattern = os.path.join(
        fsdb.golden_chunk_dir(),
        '{}-*-of-*.tfrecord.zz'.format(state.train_model_name))
    chunk_paths = sorted(tf.gfile.Glob(chunk_pattern))
    assert len(chunk_paths) == 8

    return chunk_paths
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
def selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    result = checked_run([
        'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100',
        '--model={}.pb'.format(model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ] + cc_flags(state), 'selfplay')
    logging.info(get_lines(result, make_slice[-2:]))

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(
        os.path.join(fsdb.golden_chunk_dir(),
                     state.output_model_name + '.tfrecord.zz'))
Example #8
0
def run_tpu(no_resign=False):
    os.environ[
        'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt'
    flagset = [
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--model={}'.format(
            os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true',
        '--output_bigtable={}'.format(FLAGS.output_bigtable)
    ]

    if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ:
        flagset.append('--tpu_name={}'.format(
            os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']))

    if no_resign:
        flagset.extend(['--flagfile=rl_loop/distributed_flags_nr'])
    else:
        flagset.extend([
            '--flags_path={}'.format(fsdb.flags_path()),
            '--flagfile=rl_loop/distributed_flags'
        ])

    mask_flags.checked_run(flagset)
def time_rsync(from_date, source_dir=None, dest_dir=LOCAL_DIR):
    source_dir = source_dir or fsdb.selfplay_dir()
    while from_date < dt.datetime.utcnow():
        src = os.path.join(source_dir, from_date.strftime("%Y-%m-%d-%H"))
        if tf.gfile.Exists(src):
            _rsync_dir(
                src, os.path.join(dest_dir, from_date.strftime("%Y-%m-%d-%H")))
        from_date = from_date + dt.timedelta(hours=1)
Example #10
0
def run_tpu():
    mask_flags.checked_run([
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--checkpoint_dir={}'.format(fsdb.working_dir()),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(
            fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()),
        '--run_forever=true', '--flagfile=rl_loop/distributed_flags'
    ])
Example #11
0
async def selfplay(state):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

    Args:
        state: the RL loop State instance.
    """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

    commands = []
    num_selfplay_processes = len(FLAGS.selfplay_devices)
    if num_selfplay_processes == 1:
        commands.append([
            'bazel-bin/cc/selfplay', '--flagfile={}'.format(
                os.path.join(FLAGS.flags_dir, 'selfplay.flags')),
            '--num_games={}'.format(FLAGS.selfplay_num_games),
            '--parallel_games={}'.format(FLAGS.selfplay_num_games_per_thread),
            '--model={}:0,{}'.format(FLAGS.engine, state.best_model_path),
            '--output_dir={}/{}'.format(output_dir, 0),
            '--holdout_dir={}/{}'.format(holdout_dir, 0)
        ])
    else:
        for i, device in enumerate(FLAGS.selfplay_devices):
            a = ((i - 1) *
                 FLAGS.selfplay_num_games) // (num_selfplay_processes - 1)
            b = (i * FLAGS.selfplay_num_games) // (num_selfplay_processes - 1)
            num_games = b - a
            parallel_games = (
                (num_games + FLAGS.selfplay_num_games_per_thread - 1) //
                FLAGS.selfplay_num_games_per_thread)

            commands.append([
                'bazel-bin/cc/selfplay', '--flagfile={}'.format(
                    os.path.join(FLAGS.flags_dir, 'selfplay.flags')),
                '--num_games={}'.format(num_games),
                '--parallel_games={}'.format(parallel_games),
                '--model={}:{},{}'.format(FLAGS.engine, device,
                                          state.best_model_path),
                '--output_dir={}/{}'.format(output_dir, i),
                '--holdout_dir={}/{}'.format(holdout_dir, i)
            ])

    all_lines = await run_commands(commands)

    black_wins_total = white_wins_total = num_games = 0
    for lines in all_lines:
        result = '\n'.join(lines[-6:])
        logging.info(result)
        stats = parse_win_stats_table(result, 1)[0]
        num_games += stats.total_wins
        black_wins_total += stats.black_wins.total
        white_wins_total += stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_wins_total / num_games, white_wins_total / num_games)
Example #12
0
async def bootstrap_selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name)

    lines = await run(
        'bazel-bin/cc/concurrent_selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir))
    logging.info('\n'.join(lines[-6:]))
async def bootstrap_selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name)

    features = 'extra' if FLAGS.use_extra_features else 'agz'
    lines = await run(
        'bazel-bin/cc/concurrent_selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--model={}:0.4:0.4'.format(features),
        '--num_games={}'.format(FLAGS.min_games_per_iteration),
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir))
    logging.info('\n'.join(lines[-6:]))
Example #14
0
async def selfplay(state, flagfile='selfplay', seed_factor=0):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
    seed_factor: Factor to increase seed.
  """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  lines = await run(
      'bazel-bin/cc/selfplay',
      '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
      '--model={}'.format(get_ckpt_path(state.best_model_path)),
      '--output_dir={}'.format(output_dir),
      '--holdout_dir={}'.format(holdout_dir),
      '--seed={}'.format(state.seed+100*seed_factor))
  result = '\n'.join(lines[-6:])
  logging.info(result)
  result = '\n'.join(lines[-50:])
  try:
      stats = parse_win_stats_table(result, 1)[0]
      num_games = stats.total_wins
      logging.info('Black won %0.3f, white won %0.3f',
                   stats.black_wins.total / num_games,
                   stats.white_wins.total / num_games)
  except AssertionError:
    # Poplar logging might screw up lines extraction approach.
    logging.error("No results to parse: \n %s" % lines[-50:])

  if not MULTI_SP:
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                              state.output_model_name + '.tfrecord.zz'))
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logging.getLogger('mlperf_compliance').propagate = False

    ##-->multi-node setup
    if FLAGS.use_multinode:
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
        print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format(
            mpi_rank, mpi_size, socket.gethostname()))
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
    dirs = [
        fsdb.models_dir(),
        fsdb.selfplay_dir(),
        fsdb.holdout_dir(),
        fsdb.eval_dir(),
        fsdb.golden_chunk_dir(),
        fsdb.working_dir()
    ]

    ##-->sharedFS for dataExchange. tmp solution 5/6/2019
    if FLAGS.use_multinode:
        ensure_dir_exists(FLAGS.shared_dir_exchange)
    for d in dirs:
        ensure_dir_exists(d)

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(),
                                                'target.pb'))
    shutil.copy(FLAGS.target_path + '.og',
                os.path.join(fsdb.models_dir(), 'target.pb.og'))

    with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)):
        try:
            rl_loop(mpi_comm, mpi_rank, mpi_size)
        finally:
            asyncio.get_event_loop().close()
async def bootstrap_selfplay(state):
    output_name = '000000-000000'
    output_dir = os.path.join(fsdb.selfplay_dir(), output_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), output_name)
    sgf_dir = os.path.join(fsdb.sgf_dir(), output_name)

    lines = await run(
        'bazel-bin/cc/selfplay',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'bootstrap.flags')),
        '--num_games={}'.format(FLAGS.selfplay_num_games),
        '--parallel_games=32', '--model=random:0,0.4:0.4',
        '--output_dir={}/0'.format(output_dir),
        '--holdout_dir={}/0'.format(holdout_dir),
        '--sgf_dir={}'.format(sgf_dir))
    logging.info('\n'.join(lines[-6:]))
async def selfplay(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

    # instead of 2 workers in 1 process per device, we do 2 processes with 1 worker
    all_tasks = []
    loop = asyncio.get_event_loop()
    for i in range(FLAGS.num_gpus_selfplay * 2):  # 2 worker per device
        all_tasks.append(
            loop.create_task(
                selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
    all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

    black_wins_total = white_wins_total = num_games = 0
    for lines in all_lines:
        if type(lines) == RuntimeError or type(lines) == OSError:
            raise lines
            continue
        result = '\n'.join(lines[-6:])
        logging.info(result)
        stats = parse_win_stats_table(result, 1)[0]
        num_games += stats.total_wins
        black_wins_total += stats.black_wins.total
        white_wins_total += stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_wins_total / num_games, white_wins_total / num_games)

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank)
    else:
        divide_record(state, pattern, FLAGS.num_gpus_train, -1)
Example #18
0
async def selfplay_multi(state, num_ipus):
  """ Start *num_ipu* selfplay processes """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
  flagfile = 'selfplay'

  all_tasks = []
  loop = asyncio.get_event_loop()
  for i in range(num_ipus):
    all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
  all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

  black_wins_total = white_wins_total = num_games = 0
  for lines in all_lines:
    if type(lines) == RuntimeError or type(lines) == OSError:
      raise lines
    result = '\n'.join(lines[-6:])
    logging.info(result)
    stats = parse_win_stats_table(result, 1)[0]
    num_games += stats.total_wins
    black_wins_total += stats.black_wins.total
    white_wins_total += stats.white_wins.total

  logging.info('Black won %0.3f, white won %0.3f',
               black_wins_total / num_games,
               white_wins_total / num_games)

  # copy paste from selfplay to aggregate results
  # potentially should be parallized to training?

  # Write examples to a single record.
  pattern = os.path.join(output_dir, '*', '*.zz')
  random.seed(state.seed)
  tf.set_random_seed(state.seed)
  np.random.seed(state.seed)
  # TODO(tommadams): This method of generating one golden chunk per generation
  # is sub-optimal because each chunk gets reused multiple times for training,
  # introducing bias. Instead, a fresh dataset should be uniformly sampled out
  # of *all* games in the training window before the start of each training run.
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

  # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
  # so.
  logging.info('Writing golden chunk from "{}"'.format(pattern))
  buffer.parallel_fill(tf.gfile.Glob(pattern))
  buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                            state.output_model_name + '.tfrecord.zz'))
Example #19
0
def run_cc():
    _, model_name = fsdb.get_latest_model()
    num_games_finished = len(fsdb.get_games(model_name))
    if num_games_finished > 25000:
        print("{} has enough games! ({})".format(model_name,
                                                 num_games_finished))
        time.sleep(10 * 60)
        sys.exit()

    mask_flags.checked_run([
        'bazel-bin/cc/selfplay', '--model=tf,{}'.format(model_name),
        '--mode=selfplay', '--output_dir={}/{}'.format(fsdb.selfplay_dir(),
                                                       model_name),
        '--holdout_dir={}/{}'.format(fsdb.holdout_dir(), model_name),
        '--sgf_dir={}/{}'.format(fsdb.sgf_dir(), model_name),
        '--flagfile=rl_loop/distributed_flags'
    ])
Example #20
0
async def sample_training_examples(state):
    """Sample training examples from recent selfplay games.

    Args:
        state: the RL loop State instance.

    Returns:
        A list of golden chunks up to num_records in length, sorted by path.
    """

    # Training examples are written out to the following directory hierarchy:
    #   selfplay_dir/device_id/model_name/timestamp/
    # Read examples from the most recent `window_size` models.
    device_dirs = [
        x.path for x in os.scandir(fsdb.selfplay_dir()) if x.is_dir()
    ]
    models = set()
    for d in device_dirs:
        models.update([x.name for x in os.scandir(d) if x.is_dir()])
    models = sorted(models, reverse=True)[:FLAGS.window_size]

    src_patterns = []
    for d in device_dirs:
        for model in models:
            src_patterns.append(os.path.join(d, model, '*', '*.tfrecord.zz'))

    dst_path = os.path.join(fsdb.golden_chunk_dir(),
                            '{}.tfrecord.zz'.format(state.train_model_name))

    logging.info('Writing training chunks to %s', dst_path)
    lines = await sample_records(src_patterns,
                                 dst_path,
                                 num_read_threads=8,
                                 num_write_threads=8,
                                 sample_frac=FLAGS.train_filter)
    logging.info('\n'.join(lines))

    chunk_pattern = os.path.join(
        fsdb.golden_chunk_dir(),
        '{}-*-of-*.tfrecord.zz'.format(state.train_model_name))
    chunk_paths = sorted(tf.gfile.Glob(chunk_pattern))
    assert len(chunk_paths) == 8

    return chunk_paths
Example #21
0
def wait_for_training_examples(state, num_games):
    """Wait for training examples to be generated by the latest model.

    Args:
        state: the RL loop State instance.
        num_games: number of games to wait for.
    """

    first_time_around = True
    while True:
        model_dirs = list(os.scandir(fsdb.selfplay_dir()))
        if len(model_dirs) == state.iter_num:
            pattern = os.path.join(model_dirs[-1], '*', '*', '*.tfrecord.zz')
            paths = sorted(tf.gfile.Glob(pattern))
            if len(paths) >= num_games:
                break
        if first_time_around:
            logging.info('Waiting for %d games', num_games)
            first_time_around = False
        time.sleep(1)
def wait_for_training_examples(state, selfplay_processes, num_games):
    """Wait for training examples to be generated by the latest model.

    Args:
        state: the RL loop State instance.
        num_games: number of games to wait for.
    """

    first_time_around = True
    while True:
        check_on_selfplay(selfplay_processes)

        model_dir = os.path.join(fsdb.selfplay_dir(), state.selfplay_model_name)
        if os.path.isdir(model_dir):
            pattern = os.path.join(model_dir, '*', '*', '*.tfrecord.zz')
            paths = sorted(tf.gfile.Glob(pattern))
            if len(paths) >= num_games:
                break
        if first_time_around:
            logging.info('Waiting for %d games in %s', num_games, model_dir)
            first_time_around = False
        time.sleep(1)
Example #23
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  mll.init_start()
  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(),
          fsdb.mpi_log_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb'))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node))
  logging.info('Train nodes = {}'.format(FLAGS.train_node))
  logging.info('Eval nodes = {}'.format(FLAGS.eval_node))

  with logged_timer('Total time'):
    try:
      mll.init_stop()
      mll.run_start()
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Example #24
0
def selfplay(state):
  play_output_name = state.play_output_name
  play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name)
  play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name)

  result = checked_run([
      'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048',
      '--num_readouts=100', '--model={}'.format(
          state.play_model_path), '--output_dir={}'.format(play_output_dir),
      '--holdout_dir={}'.format(play_holdout_dir)
  ] + cc_flags(state), 'selfplay')
  logging.info(get_lines(result, make_slice[-2:]))

  # Write examples to a single record.
  logging.info('Extracting examples')
  random.seed(state.seed)
  tensorflow.set_random_seed(state.seed)
  numpy.random.seed(state.seed)
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
  buffer.parallel_fill(
      tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz')))
  buffer.flush(
      os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))
def make_chunk_for(output_dir=LOCAL_DIR,
                   local_dir=LOCAL_DIR,
                   game_dir=None,
                   model_num=1,
                   positions=EXAMPLES_PER_GENERATION,
                   threads=8,
                   sampling_frac=0.02):
    """
    Explicitly make a golden chunk for a given model `model_num`
    (not necessarily the most recent one).

      While we haven't yet got enough samples (EXAMPLES_PER_GENERATION)
      Add samples from the games of previous model.
    """
    game_dir = game_dir or fsdb.selfplay_dir()
    ensure_dir_exists(output_dir)
    models = [model for model in fsdb.get_models() if model[0] < model_num]
    buf = ExampleBuffer(positions, sampling_frac=sampling_frac)
    files = []
    for _, model in sorted(models, reverse=True):
        local_model_dir = os.path.join(local_dir, model)
        if not tf.gfile.Exists(local_model_dir):
            print("Rsyncing", model)
            _rsync_dir(os.path.join(game_dir, model), local_model_dir)
        files.extend(tf.gfile.Glob(os.path.join(local_model_dir, '*.zz')))
        print("{}: {} games".format(model, len(files)))
        if len(files) * 200 * sampling_frac > positions:
            break

    print("Filling from {} files".format(len(files)))

    buf.parallel_fill(files, threads=threads)
    print(buf)
    output = os.path.join(output_dir, str(model_num) + '.tfrecord.zz')
    print("Writing to", output)
    buf.flush(output)
Example #26
0
async def selfplay(state, flagfile='selfplay'):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  multi_instance, num_instance, flag_list = extract_multi_instance(
      ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))])
  sp_cmd = ['bazel-bin/cc/selfplay',
            '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
            '--model={}'.format(state.best_model_path),
            '--output_dir={}'.format(output_dir),
            '--holdout_dir={}'.format(holdout_dir)]
  if not multi_instance:
    lines = await run(
        *sp_cmd,
        '--seed={}'.format(state.seed))
  else:
    if FLAGS.selfplay_node == []:
      # run selfplay locally
      lines = await run(
          'python3', 'ml_perf/execute.py',
          '--num_instance={}'.format(num_instance),
          '--',
          *sp_cmd,
          '--seed={}'.format(state.seed))
    else:
      with logged_timer('selfplay mn'):
        # run one selfplay instance per host
        lines = await run_distributed(
            ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'],
            num_instance, FLAGS.selfplay_node, None, None, state.seed,
            *sp_cmd)

  result = '\n'.join(lines)
  with logged_timer('parse win stats'):
    stats = parse_win_stats_table(result, 1)[0]
    num_games = stats.total_wins
    black_total = stats.black_wins.total
    white_total = stats.white_wins.total

    logging.info('Black won %0.3f, white won %0.3f',
                 black_total / num_games,
                 white_total / num_games)
    bias = abs(white_total - black_total)/num_games
    logging.info('Black total %d, white total %d, total games %d, bias %0.3f.',
                 black_total, white_total, num_games, bias)

  with logged_timer('generate golden chunk'):
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    files = tf.gfile.Glob(pattern)

    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)

    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    threads = FLAGS.golden_chunk_split
    file_list = []
    files_number = len(files)
    chunk_size = files_number // threads

    # split files into N seperate parts
    for i in range(threads):
      if i == threads - 1:
        file_list += [[i, files[chunk_size * i :]]]
      else:
        file_list += [[i, files[chunk_size * i : chunk_size * (i + 1)]]]
    pool = mp.Pool(threads)
    pool.map(functools.partial(gen_golden_chunk, state=state), file_list)

  return bias
Example #27
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.sgf_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python3', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    selfplay_cmd = [
        'python3', 'selfplay.py',
        '--load_file={}'.format(bootstrap_model_path),
        '--selfplay_dir={}'.format(
            os.path.join(fsdb.selfplay_dir(),
                         bootstrap_name)), '--holdout_dir={}'.format(
                             os.path.join(fsdb.holdout_dir(), bootstrap_name)),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
        '--flagfile=rl_loop/local_flags'
    ]

    # Selfplay twice
    mask_flags.checked_run(selfplay_cmd)
    mask_flags.checked_run(selfplay_cmd)
    # and once more to generate a held out game for validation
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

    # Double check that at least one sgf has been generated.
    assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

    print("Making shuffled golden chunk from selfplay data...")
    # TODO(amj): refactor example_buffer so it can be called the same way
    # as everything else.
    eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                      local_dir=fsdb.working_dir(),
                      game_dir=fsdb.selfplay_dir(),
                      model_num=1,
                      positions=64,
                      threads=8,
                      sampling_frac=1)

    tf_records = sorted(
        gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

    trained_model_name = shipname.generate(1)
    trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name)

    # Train on shuffled game data
    mask_flags.checked_run([
        'python3', 'train.py', *tf_records,
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(trained_model_path),
        '--flagfile=rl_loop/local_flags'
    ])

    # Validate the trained model on held out game
    mask_flags.checked_run([
        'python3', 'validate.py',
        os.path.join(fsdb.holdout_dir(), bootstrap_name),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    # Verify that trained model works for selfplay
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd +
                           ['--load_file={}'.format(trained_model_path)])

    mask_flags.checked_run([
        'python3', 'evaluate.py', bootstrap_model_path, trained_model_path,
        '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()),
        '--flagfile=rl_loop/local_flags'
    ])
    print("Completed integration test!")
Example #28
0
async def selfplay(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    output_dir = '/tmp/minigo' + output_dir

    multi_instance, num_instance, flag_list = extract_multi_instance([
        '--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir,
                                                     flagfile))
    ])
    sp_cmd = [
        'bazel-bin/cc/selfplay',
        '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
        '--model={}'.format(state.best_model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ]
    if not multi_instance:
        lines = await run(*sp_cmd, '--seed={}'.format(state.seed))
    else:
        if FLAGS.selfplay_node == []:
            # run selfplay locally
            lines = await run('python3', 'ml_perf/execute.py',
                              '--num_instance={}'.format(num_instance), '--',
                              *sp_cmd, '--seed={}'.format(state.seed))
        else:
            with logged_timer('selfplay mn'):
                # run one selfplay instance per host
                lines = await run_distributed(
                    ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'],
                    num_instance, FLAGS.selfplay_node, None, None, state.seed,
                    *sp_cmd)

    #result = '\n'.join(lines)
    #with logged_timer('parse win stats'):
    #  stats = parse_win_stats_table(result, 1)[0]
    #  num_games = stats.total_wins
    #  black_total = stats.black_wins.total
    #  white_total = stats.white_wins.total

    #  logging.info('Black won %0.3f, white won %0.3f',
    #               black_total / num_games,
    #               white_total / num_games)
    #  bias = abs(white_total - black_total)/num_games
    #  logging.info('Black total %d, white total %d, total games %d, bias %0.3f.',
    #               black_total, white_total, num_games, bias)

    with logged_timer('generate golden chunk'):
        # Write examples to a single record.
        hosts = FLAGS.selfplay_node
        if hosts == []:
            hosts = ['localhost']
        num_instance = len(hosts)
        numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
        train_instance_num = FLAGS.train_instance_per_numa * len(
            FLAGS.train_node) * numa_per_node
        selfplay_node_num = len(hosts)
        selfplay_num = selfplay_node_num
        out_files_number = int(train_instance_num /
                               gcd(train_instance_num, selfplay_num))

        cmd = [
            'python3', 'ml_perf/divide_golden_chunk.py',
            '--read_path={}'.format(output_dir + "/*"),
            '--write_path={}'.format(
                os.path.join(fsdb.golden_chunk_dir(),
                             state.output_model_name + '.tfrecord.zz')),
            '--out_files_number={}'.format(out_files_number),
            '--physical_cores={}'.format(FLAGS.physical_cores),
            '--base_dir={}'.format(FLAGS.base_dir)
        ]
        lines = await run_distributed([], 1, hosts, None, None, state.seed,
                                      *cmd)
def selfplay_noasync(state, flagfile='selfplay'):
    """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
  """

    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    base_seed = state.seed * FLAGS.num_gpus_selfplay * 2

    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        base_seed = base_seed + (mpi_rank * 1433)

    mpi_info = MPI.Info.Create()
    num_workers = 2 * FLAGS.num_gpus_selfplay
    cores_per_worker = (FLAGS.cores_per_socket *
                        FLAGS.num_socket) // num_workers

    # TODO: set hosts to self play nodes here.
    mpi_info.Set("host", socket.gethostname())
    mpi_info.Set("bind_to", "none")
    icomm = MPI.COMM_SELF.Spawn("ompi_bind_DGX1.sh",
                                maxprocs=num_workers,
                                args=[
                                    'bazel-bin/cc/selfplay_mpi',
                                    '--flagfile={}.flags'.format(
                                        os.path.join(FLAGS.flags_dir,
                                                     flagfile)),
                                    '--model={}'.format(state.best_model_path),
                                    '--output_dir={}'.format(output_dir),
                                    '--holdout_dir={}'.format(holdout_dir),
                                    '--seed={}'.format(base_seed)
                                ],
                                info=mpi_info)

    icomm.barrier()
    icomm.Disconnect()

    black_wins_total = white_wins_total = num_games = 0

    #for lines in all_lines:
    #  if type(lines) == RuntimeError or type(lines) == OSError:
    #    raise lines
    #    continue
    #  result = '\n'.join(lines[-6:])
    #  logging.info(result)
    #  stats = parse_win_stats_table(result, 1)[0]
    #  num_games += stats.total_wins
    #  black_wins_total += stats.black_wins.total
    #  white_wins_total += stats.white_wins.total

    #logging.info('Black won %0.3f, white won %0.3f',
    #             black_wins_total / num_games,
    #             white_wins_total / num_games)

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)

    logging.info('Writing golden chunk from "{}"'.format(pattern))
    if FLAGS.use_multinode:
        mpi_rank = MPI.COMM_WORLD.Get_rank()
        divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank)
    else:
        divide_record(state, pattern, FLAGS.num_gpus_train, -1)
Example #30
0
def main(unused_argv):

    for i in range(0, NUM_LOOP):
        if i == 0:
            src_model_name = shipname.generate(0)
            fsdb.switch_base(os.path.join(base_dir, src_model_name))
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            bootstrap_model_path = os.path.join(fsdb.models_dir(),
                                                src_model_name)
            mask_flags.checked_run([
                'python3', 'bootstrap.py',
                '--export_path={}'.format(bootstrap_model_path),
                '--work_dir={}'.format(fsdb.working_dir()),
                '--flagfile=rl_loop/local_flags'
            ])
            dst_model_name = shipname.generate(1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))
        else:
            src_model_name = dst_model_name
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            dst_model_name = shipname.generate(i + 1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))

        utils.ensure_dir_exists(fsdb.models_dir())
        utils.ensure_dir_exists(fsdb.selfplay_dir())
        utils.ensure_dir_exists(fsdb.holdout_dir())
        utils.ensure_dir_exists(fsdb.sgf_dir())
        utils.ensure_dir_exists(fsdb.eval_dir())
        utils.ensure_dir_exists(fsdb.golden_chunk_dir())
        utils.ensure_dir_exists(fsdb.working_dir())

        #bootstrap_name = shipname.generate(0)
        #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)

        print(src_model_name)
        print(src_model_path)
        selfplay_cmd = [
            'python3', 'selfplay.py', '--load_file={}'.format(src_model_path),
            '--selfplay_dir={}'.format(
                os.path.join(fsdb.selfplay_dir(),
                             dst_model_name)), '--holdout_dir={}'.format(
                                 os.path.join(fsdb.holdout_dir(),
                                              dst_model_name)),
            '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
            '--flagfile=rl_loop/local_flags'
        ]

        # Selfplay twice
        mask_flags.checked_run(selfplay_cmd)
        mask_flags.checked_run(selfplay_cmd)

        # and once more to generate a held out game for validation
        # exploits flags behavior where if you pass flag twice, second one wins.
        mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

        # Double check that at least one sgf has been generated.
        assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

        print("Making shuffled golden chunk from selfplay data...")
        # TODO(amj): refactor example_buffer so it can be called the same way
        # as everything else.
        eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                          local_dir=fsdb.working_dir(),
                          game_dir=fsdb.selfplay_dir(),
                          model_num=1,
                          positions=64,
                          threads=8,
                          sampling_frac=1)

        tf_records = sorted(
            gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

        #trained_model_name = shipname.generate(1)
        trained_model_name = dst_model_name
        trained_model_path = os.path.join(fsdb.models_dir(),
                                          trained_model_name)

        # Train on shuffled game data
        mask_flags.checked_run([
            'python3', 'train.py', *tf_records,
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(trained_model_path),
            '--flagfile=rl_loop/local_flags'
        ])

    print("Finished!")