Exemple #1
0
def initialize_from_checkpoint(state, out_files_number):
    """Initialize the reinforcement learning loop from a checkpoint."""
    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        example_num = buffer.parallel_fill(tf.gfile.Glob(path),
                                           FLAGS.physical_cores)
        buffer.flush_new(out_path, example_num, out_files_number, 1)

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    dual_net.optimize_graph(start_model_path, best_model_path,
                            FLAGS.quantization,
                            fsdb.golden_chunk_dir() + '/*.zz*',
                            FLAGS.eval_min_max_every_epoch)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Exemple #2
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  for file_name in [
        "target.pb", "target_raw.ckpt.data-00000-of-00001",
        "target_raw.ckpt.index", "target_raw.ckpt.meta"]:
    shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name,
                os.path.join(fsdb.models_dir(), file_name))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  with logged_timer('Total time'):
    try:
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
async def train(state, selfplay_processes):
    """Run training and write a new model to the fsdb models_dir.

    Args:
        state: the RL loop State instance.
        tf_records: a list of paths to TensorFlow records to train on.
    """

    wait_for_training_examples(state, selfplay_processes, FLAGS.min_games_per_iteration)
    tf_records = await sample_training_examples(state)

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)

    await run(
        'python3', 'train.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--use_extra_features={}'.format(FLAGS.use_extra_features),
        '--freeze=true',
        *tf_records)

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)

    if FLAGS.validate and state.iter_num > 1:
        try:
            await validate(state)
        except Exception as e:
            logging.error(e)
Exemple #6
0
async def train(state, tf_records):
    """Run training and write a new model to the fsdb models_dir.

    Args:
        state: the RL loop State instance.
        tf_records: a list of paths to TensorFlow records to train on.
    """

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    await run(
        'python3', 'train.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--use_extra_features={}'.format(FLAGS.use_extra_features),
        '--freeze=true',
        *tf_records)

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
Exemple #7
0
def initialize_from_checkpoint(state):
    """Initialize the reinforcement learning loop from a checkpoint."""

    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    shutil.copy(start_model_path,
                os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))

    # Copy the training chunks.
    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        shutil.copy(path, fsdb.golden_chunk_dir())

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Exemple #8
0
def run_tpu(no_resign=False):
    os.environ[
        'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt'
    flagset = [
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--model={}'.format(
            os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true',
        '--output_bigtable={}'.format(FLAGS.output_bigtable)
    ]

    if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ:
        flagset.append('--tpu_name={}'.format(
            os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']))

    if no_resign:
        flagset.extend(['--flagfile=rl_loop/distributed_flags_nr'])
    else:
        flagset.extend([
            '--flags_path={}'.format(fsdb.flags_path()),
            '--flagfile=rl_loop/distributed_flags'
        ])

    mask_flags.checked_run(flagset)
def py_flags(state):
    return [
        '--work_dir={}'.format(fsdb.working_dir()),
        '--trunk_layers=10',
        '--conv_width=64',
        '--value_cost_weight=0.25',
        '--training_seed={}'.format(state.seed),
    ]
def train(state, tf_records):
    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    checked_run(
        'training', 'python3', 'train.py', *tf_records,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--training_seed={}'.format(state.seed), '--freeze=true')
Exemple #11
0
def run_tpu():
    mask_flags.checked_run([
        'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu',
        '--checkpoint_dir={}'.format(fsdb.working_dir()),
        '--output_dir={}'.format(fsdb.selfplay_dir()),
        '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(
            fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()),
        '--run_forever=true', '--flagfile=rl_loop/distributed_flags'
    ])
Exemple #12
0
def validate_pro():
    """Validate on professional data."""
    cmd = [
        'python3', 'validate.py', FLAGS.pro_dataset, '--use_tpu',
        '--tpu_name={}'.format(TPU_NAME),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/distributed_flags', '--validate_name=pro'
    ]
    mask_flags.run(cmd)
Exemple #13
0
def freeze(save_path, rewrite_tpu=False):
    cmd = [
        'python3', 'freeze_graph.py',
        '--work_dir={}'.format(fsdb.working_dir()),
        '--model_path={}'.format(save_path)
    ]

    if rewrite_tpu:
        cmd.extend(['--use_tpu', '--tpu_name={}'.format(TPU_NAME)])

    return mask_flags.run(cmd)
async def validate(state):
    dirs = [x.path for x in os.scandir(fsdb.holdout_dir()) if x.is_dir()]
    src_dirs = sorted(dirs, reverse=True)[:FLAGS.window_size]

    await run(
        'python3', 'validate.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'validate.flags')),
        '--work_dir={}'.format(fsdb.working_dir()), '--expand_validation_dirs',
        *src_dirs)
async def train(state, tf_records):
    """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
    tf_records: a list of paths to TensorFlow records to train on.
  """
    new_env = os.environ.copy()
    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)

    if FLAGS.use_mgpu_horovod:
        # assign leading cores of sockets to train
        await run(
            new_env, 'mpiexec', '--allow-run-as-root', '--map-by',
            'ppr:{}:socket,pe=2'.format(
                str(FLAGS.num_gpus_train // FLAGS.num_socket)), '-np',
            str(FLAGS.num_gpus_train), 'python3', 'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(model_path), '--training_seed={}'.format(
                state.seed), '--use_mgpu_horovod=true', '--freeze=true')
    else:
        new_env['CUDA_VISIBLE_DEVICES'] = '0'
        await run(
            new_env, 'python3', 'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(model_path),
            '--training_seed={}'.format(state.seed), '--freeze=true')

    minigo_print(key='save_model', value={'iteration': state.iter_num})

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
async def validate(state, holdout_glob):
    """Validate the trained model against holdout games.

  Args:
    state: the RL loop State instance.
    holdout_glob: a glob that matches holdout games.
  """

    await checked_run(
        'python3', 'validate.py', holdout_glob,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                            'validate.flags')),
        '--work_dir={}'.format(fsdb.working_dir()))
Exemple #17
0
def train():
    model_num, model_name = fsdb.get_latest_model()
    print("Training on gathered game data, initializing from {}".format(
        model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    save_file = os.path.join(fsdb.models_dir(), new_model_name)

    # TODO(jacksona): Refactor train.py to take the filepath as a flag.
    cmd = [
        'python3', 'train.py', '__unused_file__', '--use_tpu', '--use_bt',
        '--work_dir={}'.format(fsdb.working_dir()),
        '--tpu_name={}'.format(TPU_NAME),
        '--flagfile=rl_loop/distributed_flags',
        '--export_path={}'.format(save_file)
    ]

    completed_process = mask_flags.run(cmd)
    if completed_process.returncode > 0:
        print("Training failed!")
        return completed_process

    # Train.py already copies the {data,index,meta} files to $BUCKET/models
    # Persist the checkpoint two ways:
    # Freeze the .ckpt file in the work_dir for the TPU selfplayers
    # Freeze a non-tpu version of the graph for later GPU use.
    latest_checkpoint = tf.train.latest_checkpoint(fsdb.working_dir())
    p = freeze(latest_checkpoint, rewrite_tpu=True)
    if p.returncode > 0:
        print("== TPU freeze failed!")
        return p

    p = freeze(save_file, rewrite_tpu=False)
    if p.returncode > 0:
        print("== Model freeze failed!")
        return p

    return completed_process
Exemple #18
0
async def train(state, tf_records=None):
  """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
    tf_records: a list of paths to TensorFlow records to train on.
  """
  if tf_records is None:
    # Train on shuffled game data from recent selfplay rounds.
    tf_records = get_golden_chunk_records()
  model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
  if DISTRIBUTION_STRATEGY == "even":
    await run(
        'numactl',
        '--cpunodebind={}'.format(1),
        '--membind={}'.format(1),
        'python3', 'train.py', *tf_records,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--training_seed={}'.format(state.seed),
        '--freeze=true')
  elif DISTRIBUTION_STRATEGY == "skewed":
    await run(
        'numactl',
        '--cpunodebind={}'.format(1),
        '--membind={}'.format(1),
        'python3', 'train.py', *tf_records,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--training_seed={}'.format(state.seed),
        '--freeze=true')
  # Append the time elapsed from when the RL was started to when this model
  # was trained.
  elapsed = time.time() - state.start_time
  timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(timestamps_path, 'a') as f:
    print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logging.getLogger('mlperf_compliance').propagate = False

    ##-->multi-node setup
    if FLAGS.use_multinode:
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
        print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format(
            mpi_rank, mpi_size, socket.gethostname()))
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
    dirs = [
        fsdb.models_dir(),
        fsdb.selfplay_dir(),
        fsdb.holdout_dir(),
        fsdb.eval_dir(),
        fsdb.golden_chunk_dir(),
        fsdb.working_dir()
    ]

    ##-->sharedFS for dataExchange. tmp solution 5/6/2019
    if FLAGS.use_multinode:
        ensure_dir_exists(FLAGS.shared_dir_exchange)
    for d in dirs:
        ensure_dir_exists(d)

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(),
                                                'target.pb'))
    shutil.copy(FLAGS.target_path + '.og',
                os.path.join(fsdb.models_dir(), 'target.pb.og'))

    with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)):
        try:
            rl_loop(mpi_comm, mpi_rank, mpi_size)
        finally:
            asyncio.get_event_loop().close()
Exemple #20
0
async def validate(state, holdout_glob):
  """Validate the trained model against holdout games.

  Args:
    state: the RL loop State instance.
    holdout_glob: a glob that matches holdout games.
  """

  if not glob.glob(holdout_glob):
    print('Glob "{}" didn\'t match any files, skipping validation'.format(
          holdout_glob))
  else:
    await run(
        'python3', 'validate.py', holdout_glob,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'validate.flags')),
        '--work_dir={}'.format(fsdb.working_dir()))
Exemple #21
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  mll.init_start()
  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(),
          fsdb.mpi_log_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb'))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node))
  logging.info('Train nodes = {}'.format(FLAGS.train_node))
  logging.info('Eval nodes = {}'.format(FLAGS.eval_node))

  with logged_timer('Total time'):
    try:
      mll.init_stop()
      mll.run_start()
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
def spawn_train_workers(state):
    # need to be removed
    tf_records = get_golden_chunk_records(state)
    comm_world = MPI.COMM_WORLD

    # spawn one worker process
    print("Spawning worker processes on {}".format(socket.gethostname()))
    mpi_info = MPI.Info.Create()
    num_workers = FLAGS.num_gpus_train
    # subtract 1 core from this value, oversubscription might not work
    cores_per_worker = (FLAGS.cores_per_socket *
                        FLAGS.num_socket) // num_workers - 1

    mpi_info.Set("host", socket.gethostname())
    mpi_info.Set(
        "map_by", "ppr:{}:socket,PE={}".format(num_workers // FLAGS.num_socket,
                                               cores_per_worker))
    icomm = MPI.COMM_SELF.Spawn(
        "python3",
        maxprocs=num_workers,
        args=[
            'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(
                os.path.join(fsdb.models_dir(),
                             'new_model')), '--training_seed=13337',
            '--num_selfplays={}'.format(comm_world.size - 1),
            '--window_iters={}'.format(FLAGS.window_size),
            '--total_iters={}'.format(FLAGS.iterations),
            '--golden_chunk_pattern={}'.format(
                os.path.join(fsdb.golden_chunk_dir(), '*.zz*')),
            '--freeze=true', '--use_multinode=true', '--use_mgpu_horovod=true'
        ],
        info=mpi_info)
    return icomm
def freeze(save_path, rewrite_tpu=False):
    cmd = ['python3', 'freeze_graph.py',
           '--work_dir={}'.format(fsdb.working_dir()),
           '--model_path={}'.format(save_path)]

    return mask_flags.run(cmd)
Exemple #24
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.sgf_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python3', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    selfplay_cmd = [
        'python3', 'selfplay.py',
        '--load_file={}'.format(bootstrap_model_path),
        '--selfplay_dir={}'.format(
            os.path.join(fsdb.selfplay_dir(),
                         bootstrap_name)), '--holdout_dir={}'.format(
                             os.path.join(fsdb.holdout_dir(), bootstrap_name)),
        '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
        '--flagfile=rl_loop/local_flags'
    ]

    # Selfplay twice
    mask_flags.checked_run(selfplay_cmd)
    mask_flags.checked_run(selfplay_cmd)
    # and once more to generate a held out game for validation
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

    # Double check that at least one sgf has been generated.
    assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

    print("Making shuffled golden chunk from selfplay data...")
    # TODO(amj): refactor example_buffer so it can be called the same way
    # as everything else.
    eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                      local_dir=fsdb.working_dir(),
                      game_dir=fsdb.selfplay_dir(),
                      model_num=1,
                      positions=64,
                      threads=8,
                      sampling_frac=1)

    tf_records = sorted(
        gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

    trained_model_name = shipname.generate(1)
    trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name)

    # Train on shuffled game data
    mask_flags.checked_run([
        'python3', 'train.py', *tf_records,
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(trained_model_path),
        '--flagfile=rl_loop/local_flags'
    ])

    # Validate the trained model on held out game
    mask_flags.checked_run([
        'python3', 'validate.py',
        os.path.join(fsdb.holdout_dir(), bootstrap_name),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--flagfile=rl_loop/local_flags'
    ])

    # Verify that trained model works for selfplay
    # exploits flags behavior where if you pass flag twice, second one wins.
    mask_flags.checked_run(selfplay_cmd +
                           ['--load_file={}'.format(trained_model_path)])

    mask_flags.checked_run([
        'python3', 'evaluate.py', bootstrap_model_path, trained_model_path,
        '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()),
        '--flagfile=rl_loop/local_flags'
    ])
    print("Completed integration test!")
def validate(state, holdout_glob):
  checked_run('validation',
      'python3', 'validate.py', holdout_glob,
      '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
      '--work_dir={}'.format(fsdb.working_dir()))
Exemple #26
0
async def train(state, window_size):
    """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
  """
    train_node = FLAGS.train_node
    num_node = len(train_node)
    if num_node == 0:
        dist_train = False
    else:
        dist_train = True

    if dist_train:
        intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1
        numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores
        instance_per_node = numa_per_node * FLAGS.train_instance_per_numa

        mpi_async_progress = ''
        for i in range(numa_per_node):
            for j in range(FLAGS.train_instance_per_numa):
                if (not i == 0) or (not j == 0):
                    mpi_async_progress += ','
                mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j)
    else:
        intra_threads = FLAGS.physical_cores

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    cmd = [
        'python3', 'train.py',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--window_size={}'.format(window_size),
        '--data_path={}'.format(fsdb.golden_chunk_dir()),
        '--training_seed={}'.format(state.seed), '--freeze=True',
        '--num_inter_threads=1', '--num_intra_threads={}'.format(intra_threads)
    ]

    if (dist_train):
        genvs = [
            'HOROVOD_FUSION_THRESHOLD=134217728', 'KMP_BLOCKTIME=0',
            'KMP_HW_SUBSET=1T', 'OMP_BIND_PROC=true',
            'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress,
            'OMP_NUM_THREADS={}'.format(intra_threads)
        ]
        hosts = []
        proclists = []
        numa_nodes = []
        for node in range(num_node):
            # add all instance to the list
            for numa in range(numa_per_node):
                for instance in range(FLAGS.train_instance_per_numa):
                    hosts += [train_node[node]]
                    proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads
                    proclists += ['{}'.format(proclist)]
                    numa_nodes += ['{}'.format(numa)]

        lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes,
                                      None, *cmd, '--dist_train=True')
    else:
        lines = await run(*cmd)
    print('\n'.join(lines), file=sys.stderr)
Exemple #27
0
def main(unused_argv):

    for i in range(0, NUM_LOOP):
        if i == 0:
            src_model_name = shipname.generate(0)
            fsdb.switch_base(os.path.join(base_dir, src_model_name))
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            bootstrap_model_path = os.path.join(fsdb.models_dir(),
                                                src_model_name)
            mask_flags.checked_run([
                'python3', 'bootstrap.py',
                '--export_path={}'.format(bootstrap_model_path),
                '--work_dir={}'.format(fsdb.working_dir()),
                '--flagfile=rl_loop/local_flags'
            ])
            dst_model_name = shipname.generate(1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))
        else:
            src_model_name = dst_model_name
            src_model_path = os.path.join(fsdb.models_dir(), src_model_name)
            dst_model_name = shipname.generate(i + 1)
            fsdb.switch_base(os.path.join(base_dir, dst_model_name))

        utils.ensure_dir_exists(fsdb.models_dir())
        utils.ensure_dir_exists(fsdb.selfplay_dir())
        utils.ensure_dir_exists(fsdb.holdout_dir())
        utils.ensure_dir_exists(fsdb.sgf_dir())
        utils.ensure_dir_exists(fsdb.eval_dir())
        utils.ensure_dir_exists(fsdb.golden_chunk_dir())
        utils.ensure_dir_exists(fsdb.working_dir())

        #bootstrap_name = shipname.generate(0)
        #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)

        print(src_model_name)
        print(src_model_path)
        selfplay_cmd = [
            'python3', 'selfplay.py', '--load_file={}'.format(src_model_path),
            '--selfplay_dir={}'.format(
                os.path.join(fsdb.selfplay_dir(),
                             dst_model_name)), '--holdout_dir={}'.format(
                                 os.path.join(fsdb.holdout_dir(),
                                              dst_model_name)),
            '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0',
            '--flagfile=rl_loop/local_flags'
        ]

        # Selfplay twice
        mask_flags.checked_run(selfplay_cmd)
        mask_flags.checked_run(selfplay_cmd)

        # and once more to generate a held out game for validation
        # exploits flags behavior where if you pass flag twice, second one wins.
        mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100'])

        # Double check that at least one sgf has been generated.
        assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full'))

        print("Making shuffled golden chunk from selfplay data...")
        # TODO(amj): refactor example_buffer so it can be called the same way
        # as everything else.
        eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(),
                          local_dir=fsdb.working_dir(),
                          game_dir=fsdb.selfplay_dir(),
                          model_num=1,
                          positions=64,
                          threads=8,
                          sampling_frac=1)

        tf_records = sorted(
            gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz')))

        #trained_model_name = shipname.generate(1)
        trained_model_name = dst_model_name
        trained_model_path = os.path.join(fsdb.models_dir(),
                                          trained_model_name)

        # Train on shuffled game data
        mask_flags.checked_run([
            'python3', 'train.py', *tf_records,
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(trained_model_path),
            '--flagfile=rl_loop/local_flags'
        ])

    print("Finished!")
Exemple #28
0
def py_flags(state):
  return [
      '--work_dir={}'.format(fsdb.working_dir()),
      '--training_seed={}'.format(state.seed),
  ]