Esempio n. 1
0
def gen_golden_chunk(files, state):
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
    buffer.parallel_fill(files[1], threads=1)
    buffer.flush(
        os.path.join(
            fsdb.golden_chunk_dir(),
            state.output_model_name + '-{}.tfrecord.zz'.format(files[0])))
def selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    result = checked_run([
        'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100',
        '--model={}.pb'.format(model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ] + cc_flags(state), 'selfplay')
    logging.info(get_lines(result, make_slice[-2:]))

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(
        os.path.join(fsdb.golden_chunk_dir(),
                     state.output_model_name + '.tfrecord.zz'))
Esempio n. 3
0
def initialize_from_checkpoint(state, out_files_number):
    """Initialize the reinforcement learning loop from a checkpoint."""
    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        example_num = buffer.parallel_fill(tf.gfile.Glob(path),
                                           FLAGS.physical_cores)
        buffer.flush_new(out_path, example_num, out_files_number, 1)

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    dual_net.optimize_graph(start_model_path, best_model_path,
                            FLAGS.quantization,
                            fsdb.golden_chunk_dir() + '/*.zz*',
                            FLAGS.eval_min_max_every_epoch)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Esempio n. 4
0
def initialize_from_checkpoint(state):
    """Initialize the reinforcement learning loop from a checkpoint."""

    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    shutil.copy(start_model_path,
                os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))
    shutil.copy(
        start_model_path + '.og',
        os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og'))

    # Copy the training chunks.
    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        buffer.parallel_fill(tf.gfile.Glob(path))
        buffer.flush(out_path, FLAGS.num_gpus_train)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Esempio n. 5
0
async def selfplay(state, flagfile='selfplay', seed_factor=0):
  """Run selfplay and write a training chunk to the fsdb golden_chunk_dir.

  Args:
    state: the RL loop State instance.
    flagfile: the name of the flagfile to use for selfplay, either 'selfplay'
        (the default) or 'boostrap'.
    seed_factor: Factor to increase seed.
  """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)

  lines = await run(
      'bazel-bin/cc/selfplay',
      '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)),
      '--model={}'.format(get_ckpt_path(state.best_model_path)),
      '--output_dir={}'.format(output_dir),
      '--holdout_dir={}'.format(holdout_dir),
      '--seed={}'.format(state.seed+100*seed_factor))
  result = '\n'.join(lines[-6:])
  logging.info(result)
  result = '\n'.join(lines[-50:])
  try:
      stats = parse_win_stats_table(result, 1)[0]
      num_games = stats.total_wins
      logging.info('Black won %0.3f, white won %0.3f',
                   stats.black_wins.total / num_games,
                   stats.white_wins.total / num_games)
  except AssertionError:
    # Poplar logging might screw up lines extraction approach.
    logging.error("No results to parse: \n %s" % lines[-50:])

  if not MULTI_SP:
    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                              state.output_model_name + '.tfrecord.zz'))
Esempio n. 6
0
def divide_record(state, pattern, num_out, rank):
    if rank < 0:
        rank_str = ''
    else:
        rank_str = '-mpirank-' + str(rank)
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    output = os.path.join(fsdb.golden_chunk_dir(),
                          state.output_model_name + rank_str + '.tfrecord.zz')
    buffer.flush(output, num_out)

    if rank >= 0:
        ##put files to exchange
        output = output + '*'
        put_files_exchange(state, rank, fileout=output)
    return
Esempio n. 7
0
async def selfplay_multi(state, num_ipus):
  """ Start *num_ipu* selfplay processes """
  output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
  holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
  flagfile = 'selfplay'

  all_tasks = []
  loop = asyncio.get_event_loop()
  for i in range(num_ipus):
    all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i)))
  all_lines = await asyncio.gather(*all_tasks, return_exceptions=True)

  black_wins_total = white_wins_total = num_games = 0
  for lines in all_lines:
    if type(lines) == RuntimeError or type(lines) == OSError:
      raise lines
    result = '\n'.join(lines[-6:])
    logging.info(result)
    stats = parse_win_stats_table(result, 1)[0]
    num_games += stats.total_wins
    black_wins_total += stats.black_wins.total
    white_wins_total += stats.white_wins.total

  logging.info('Black won %0.3f, white won %0.3f',
               black_wins_total / num_games,
               white_wins_total / num_games)

  # copy paste from selfplay to aggregate results
  # potentially should be parallized to training?

  # Write examples to a single record.
  pattern = os.path.join(output_dir, '*', '*.zz')
  random.seed(state.seed)
  tf.set_random_seed(state.seed)
  np.random.seed(state.seed)
  # TODO(tommadams): This method of generating one golden chunk per generation
  # is sub-optimal because each chunk gets reused multiple times for training,
  # introducing bias. Instead, a fresh dataset should be uniformly sampled out
  # of *all* games in the training window before the start of each training run.
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

  # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
  # so.
  logging.info('Writing golden chunk from "{}"'.format(pattern))
  buffer.parallel_fill(tf.gfile.Glob(pattern))
  buffer.flush(os.path.join(fsdb.golden_chunk_dir(),
                            state.output_model_name + '.tfrecord.zz'))
def main(unused_argv):
    mpi_comm = MPI.COMM_WORLD
    mpi_rank = mpi_comm.Get_rank()
    mpi_size = mpi_comm.Get_size()
    # avoid seed out of range
    random.seed(FLAGS.seed % 1048576)
    tf.set_random_seed(FLAGS.seed % 1048576)
    np.random.seed(FLAGS.seed % 1048576)

    pattern = os.path.join(FLAGS.read_path, '*.zz')
    files = tf.gfile.Glob(pattern)

    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
    example_num = buffer.parallel_fill(files, threads=FLAGS.physical_cores)
    # make sure all nodes generate same number of examples
    example_num = int(mpi_comm.allreduce(example_num, op=MPI.MIN))
    buffer.flush_new(FLAGS.write_path + '_{}'.format(mpi_rank),
                     example_num,
                     FLAGS.out_files_number,
                     threads=1)

    shutil.rmtree('/tmp/minigo/home', ignore_errors=True)
Esempio n. 9
0
def selfplay(state):
  play_output_name = state.play_output_name
  play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name)
  play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name)

  result = checked_run([
      'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048',
      '--num_readouts=100', '--model={}'.format(
          state.play_model_path), '--output_dir={}'.format(play_output_dir),
      '--holdout_dir={}'.format(play_holdout_dir)
  ] + cc_flags(state), 'selfplay')
  logging.info(get_lines(result, make_slice[-2:]))

  # Write examples to a single record.
  logging.info('Extracting examples')
  random.seed(state.seed)
  tensorflow.set_random_seed(state.seed)
  numpy.random.seed(state.seed)
  buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
  buffer.parallel_fill(
      tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz')))
  buffer.flush(
      os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))