Esempio n. 1
0
def main(unused_argv):
  """Run the reinforcement learning loop."""

  print('Wiping dir %s' % FLAGS.base_dir, flush=True)
  shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
  dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(),
          fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()]
  for d in dirs:
    ensure_dir_exists(d);

  # Copy the flag files so there's no chance of them getting accidentally
  # overwritten while the RL loop is running.
  flags_dir = os.path.join(FLAGS.base_dir, 'flags')
  shutil.copytree(FLAGS.flags_dir, flags_dir)
  FLAGS.flags_dir = flags_dir

  # Copy the target model to the models directory so we can find it easily.
  for file_name in [
        "target.pb", "target_raw.ckpt.data-00000-of-00001",
        "target_raw.ckpt.index", "target_raw.ckpt.meta"]:
    shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name,
                os.path.join(fsdb.models_dir(), file_name))

  logging.getLogger().addHandler(
      logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
  formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                '%Y-%m-%d %H:%M:%S')
  for handler in logging.getLogger().handlers:
    handler.setFormatter(formatter)

  with logged_timer('Total time'):
    try:
      rl_loop()
    finally:
      asyncio.get_event_loop().close()
Esempio n. 2
0
def initialize_from_checkpoint(state):
    """Initialize the reinforcement learning loop from a checkpoint."""

    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    shutil.copy(start_model_path,
                os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))
    shutil.copy(
        start_model_path + '.og',
        os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og'))

    # Copy the training chunks.
    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        buffer.parallel_fill(tf.gfile.Glob(path))
        buffer.flush(out_path, FLAGS.num_gpus_train)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        try:
            rl_loop()
        finally:
            asyncio.get_event_loop().close()
Esempio n. 4
0
async def train(state, tf_records):
    """Run training and write a new model to the fsdb models_dir.

    Args:
        state: the RL loop State instance.
        tf_records: a list of paths to TensorFlow records to train on.
    """

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    await run(
        'python3', 'train.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--use_extra_features={}'.format(FLAGS.use_extra_features),
        '--freeze=true',
        *tf_records)

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def main(unused_argv):
    """Run the reinforcement learning loop."""

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)

    utils.ensure_dir_exists(fsdb.models_dir())
    utils.ensure_dir_exists(fsdb.selfplay_dir())
    utils.ensure_dir_exists(fsdb.holdout_dir())
    utils.ensure_dir_exists(fsdb.eval_dir())
    utils.ensure_dir_exists(fsdb.golden_chunk_dir())
    utils.ensure_dir_exists(fsdb.working_dir())

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy('ml_perf/target.pb', fsdb.models_dir())

    logging.getLogger().addHandler(
        logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log')))
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    for handler in logging.getLogger().handlers:
        handler.setFormatter(formatter)

    with utils.logged_timer('Total time'):
        for target_win_rate in rl_loop():
            if target_win_rate > 0.5:
                return logging.info('Passed exit criteria.')
        logging.info('Failed to converge.')
async def train(state, selfplay_processes):
    """Run training and write a new model to the fsdb models_dir.

    Args:
        state: the RL loop State instance.
        tf_records: a list of paths to TensorFlow records to train on.
    """

    wait_for_training_examples(state, selfplay_processes, FLAGS.min_games_per_iteration)
    tf_records = await sample_training_examples(state)

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)

    await run(
        'python3', 'train.py',
        '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)),
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--use_extra_features={}'.format(FLAGS.use_extra_features),
        '--freeze=true',
        *tf_records)

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)

    if FLAGS.validate and state.iter_num > 1:
        try:
            await validate(state)
        except Exception as e:
            logging.error(e)
def load_train_times():
  models = []
  path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(path, 'r') as f:
    for line in f.readlines():
      line = line.strip()
      if line:
        timestamp, name = line.split(' ')
        path = 'tf,' + os.path.join(fsdb.models_dir(), name + '.pb')
        models.append((float(timestamp), name, path))
  return models
Esempio n. 8
0
def post_train(state):
  model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
  dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch)
  mll.save_model(state.iter_num-1)

  # Append the time elapsed from when the RL was started to when this model
  # was trained.
  elapsed = time.time() - state.start_time
  timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(timestamps_path, 'a') as f:
    print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
Esempio n. 9
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logging.getLogger('mlperf_compliance').propagate = False

    ##-->multi-node setup
    if FLAGS.use_multinode:
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
        print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format(
            mpi_rank, mpi_size, socket.gethostname()))
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

    print('Wiping dir %s' % FLAGS.base_dir, flush=True)
    shutil.rmtree(FLAGS.base_dir, ignore_errors=True)
    dirs = [
        fsdb.models_dir(),
        fsdb.selfplay_dir(),
        fsdb.holdout_dir(),
        fsdb.eval_dir(),
        fsdb.golden_chunk_dir(),
        fsdb.working_dir()
    ]

    ##-->sharedFS for dataExchange. tmp solution 5/6/2019
    if FLAGS.use_multinode:
        ensure_dir_exists(FLAGS.shared_dir_exchange)
    for d in dirs:
        ensure_dir_exists(d)

    # Copy the flag files so there's no chance of them getting accidentally
    # overwritten while the RL loop is running.
    flags_dir = os.path.join(FLAGS.base_dir, 'flags')
    shutil.copytree(FLAGS.flags_dir, flags_dir)
    FLAGS.flags_dir = flags_dir

    # Copy the target model to the models directory so we can find it easily.
    shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(),
                                                'target.pb'))
    shutil.copy(FLAGS.target_path + '.og',
                os.path.join(fsdb.models_dir(), 'target.pb.og'))

    with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)):
        try:
            rl_loop(mpi_comm, mpi_rank, mpi_size)
        finally:
            asyncio.get_event_loop().close()
Esempio n. 10
0
def get_files_exchange(state, mpi_rank):
    ##-->Train gets selfplay
    ##-->Self-play gets eval-model
    if mpi_rank == FLAGS.train_rank:
        selfplay_files = glob.glob(
            os.path.join(FLAGS.shared_dir_exchange,
                         state.output_model_name + '-mpirank-*.zz*'))
        for filename in selfplay_files:
            print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
                mpi_rank, filename, state.iter_num))
            shutil.copy(filename, fsdb.golden_chunk_dir())
    else:
        ##self-play needs to get training eval model
        dst_dir = os.path.join(fsdb.models_dir())

        src_file = os.path.join(FLAGS.shared_dir_exchange,
                                state.train_model_name + '.pb')
        print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
            mpi_rank, src_file, state.iter_num))
        shutil.copy(src_file, dst_dir)

        src_file = os.path.join(FLAGS.shared_dir_exchange,
                                state.train_model_name + '.pb' + '.og')
        print('Rank = {}, Getting file={} iter={} from SharedFS'.format(
            mpi_rank, src_file, state.iter_num))
        shutil.copy(src_file, dst_dir)
Esempio n. 11
0
async def convert(state):
    """Freeze the trained model and convert to TRT.

  Args:
    state: the RL loop State instance.
  """
    # set to use only second from last GPU
    new_env = os.environ.copy()
    new_env['CUDA_VISIBLE_DEVICES'] = str(FLAGS.num_gpus_train - 2)

    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    if FLAGS.use_multinode:
        ##convert is running in parallel with eval, and val
        cores_per_worker = (FLAGS.cores_per_socket *
                            FLAGS.num_socket) // FLAGS.num_gpus_selfplay
        start = (FLAGS.num_gpus_train - 2) * cores_per_worker
        end = start + cores_per_worker - 1
        ##cpu-str
        cpus = str(start) + '-' + str(end)
        await run(new_env, 'taskset', '-c', cpus, 'python3', 'freeze_graph.py',
                  '--model_path={}'.format(model_path),
                  '--trt_batch={}'.format(FLAGS.trt_batch))

    else:
        await run(new_env, 'python3', 'freeze_graph.py',
                  '--model_path={}'.format(model_path),
                  '--trt_batch={}'.format(FLAGS.trt_batch))
Esempio n. 12
0
async def start_selfplay():
    output_dir = os.path.join(fsdb.selfplay_dir(), "$MODEL")
    holdout_dir = os.path.join(fsdb.holdout_dir(), "$MODEL")
    model_pattern = os.path.join(fsdb.models_dir(), '%d.pb')

    logs = []
    processes = []
    loop = asyncio.get_event_loop()
    for i, device in enumerate(FLAGS.selfplay_devices):
        cmd = [
            'bazel-bin/cc/concurrent_selfplay',
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'selfplay.flags')),
            '--run_forever=1',
            '--device={}'.format(device),
            '--model={}'.format(model_pattern),
            '--output_dir={}/{}'.format(output_dir, i),
            '--holdout_dir={}/{}'.format(holdout_dir, i)]

        cmd_str = await expand_cmd_str(cmd)
        f = open(os.path.join(FLAGS.base_dir, 'selfplay_%d.log' % i), 'w')
        f.write(cmd_str + '\n\n')
        f.flush()
        logging.info('Running: %s', cmd_str)

        processes.append(await asyncio.create_subprocess_exec(
            *cmd, stdout=f, stderr=asyncio.subprocess.STDOUT))
        logs.append(f)

    return (processes, logs)
Esempio n. 13
0
def initialize_from_checkpoint(state):
  """Initialize the reinforcement learning loop from a checkpoint."""
  # The checkpoint's work_dir should contain the most recently trained model.
  model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir,
                                       'work_dir/model.ckpt-*.pb'))
  if len(model_paths) != 1:
    raise RuntimeError('Expected exactly one model in the checkpoint work_dir, '
                       'got [{}]'.format(', '.join(model_paths)))
  start_model_path = model_paths[0]

  # Copy the training chunks.
  golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
  for basename in os.listdir(golden_chunks_dir):
    path = os.path.join(golden_chunks_dir, basename)
    shutil.copy(path, fsdb.golden_chunk_dir())

  # Copy the latest trained model into the models directory and use it on the
  # first round of selfplay.
  state.best_model_name = 'checkpoint'
  best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

  dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch)

  # Copy the training files.
  work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
  for basename in os.listdir(work_dir):
    path = os.path.join(work_dir, basename)
    shutil.copy(path, fsdb.working_dir())
Esempio n. 14
0
def eval_pv(eval_positions):
    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
        idx_start, len(model_paths), eval_every))
    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if idx == idx_start:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        mcts = strategies.MCTSPlayer(player.network, resign_threshold=-1)

        for name, position in eval_positions:
            mcts.initialize_game(position)
            mcts.suggest_move(position)

            path = []
            node = mcts.root
            while node.children:
                node = node.children.get(node.best_child())
                path.append("{},{}".format(node.fmove, int(node.N)))

            save_file = os.path.join(FLAGS.data_dir,
                                     "pv-{}-{}".format(name, idx))
            with open(save_file, "w") as data:
                data.write("{},  {}\n".format(idx, ",".join(path)))
Esempio n. 15
0
def initialize_from_checkpoint(state, out_files_number):
    """Initialize the reinforcement learning loop from a checkpoint."""
    # The checkpoint's work_dir should contain the most recently trained model.
    model_paths = glob.glob(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
    if len(model_paths) != 1:
        raise RuntimeError(
            'Expected exactly one model in the checkpoint work_dir, '
            'got [{}]'.format(', '.join(model_paths)))
    start_model_path = model_paths[0]

    golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks')
    for basename in os.listdir(golden_chunks_dir):
        path = os.path.join(golden_chunks_dir, basename)
        out_path = os.path.join(fsdb.golden_chunk_dir(), basename)
        buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)
        example_num = buffer.parallel_fill(tf.gfile.Glob(path),
                                           FLAGS.physical_cores)
        buffer.flush_new(out_path, example_num, out_files_number, 1)

    # Copy the latest trained model into the models directory and use it on the
    # first round of selfplay.
    state.best_model_name = 'checkpoint'
    best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    dual_net.optimize_graph(start_model_path, best_model_path,
                            FLAGS.quantization,
                            fsdb.golden_chunk_dir() + '/*.zz*',
                            FLAGS.eval_min_max_every_epoch)

    # Copy the training files.
    work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
    for basename in os.listdir(work_dir):
        path = os.path.join(work_dir, basename)
        shutil.copy(path, fsdb.working_dir())
Esempio n. 16
0
def same_run_eval(black_num=0, white_num=0):
    """Shorthand to spawn a job matching up two models from the same run,
    identified by their model number """
    if black_num <= 0 or white_num <= 0:
        print("Need real model numbers")
        return

    b = fsdb.get_model(black_num)
    w = fsdb.get_model(white_num)

    b_model_path = os.path.join(fsdb.models_dir(), b)
    w_model_path = os.path.join(fsdb.models_dir(), w)

    return launch_eval_job(b_model_path + ".pb", w_model_path + ".pb",
                           "{:d}-{:d}".format(black_num, white_num),
                           flags.FLAGS.bucket_name)
Esempio n. 17
0
def main(unused_argv):
    logging.getLogger('mlperf_compliance').propagate = False

    sgf_dir = os.path.join(fsdb.eval_dir(), 'target')
    target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb.og')
    models = load_train_times()

    timestamp_to_log = 0
    iter_evaluated = 0

    for i, (timestamp, name, path) in enumerate(models):
        minigo_print(key=constants.EVAL_START, metadata={'epoch_num': i + 1})

        iter_evaluated += 1
        winrate = wait(evaluate_model(path + '.og', target, sgf_dir, i + 1))

        minigo_print(key=constants.EVAL_ACCURACY,
                     value=winrate,
                     metadata={'epoch_num': i + 1})
        minigo_print(key=constants.EVAL_STOP, metadata={'epoch_num': i + 1})

        if winrate >= 0.50:
            timestamp_to_log = timestamp
            print('Model {} beat target after {}s'.format(name, timestamp))
            break

    minigo_print(key='eval_result',
                 metadata={
                     'iteration': iter_evaluated,
                     'timestamp': timestamp_to_log
                 })
Esempio n. 18
0
def selfplay(state):
    output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name)
    holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name)
    model_path = os.path.join(fsdb.models_dir(), state.best_model_name)

    result = checked_run([
        'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100',
        '--model={}.pb'.format(model_path),
        '--output_dir={}'.format(output_dir),
        '--holdout_dir={}'.format(holdout_dir)
    ] + cc_flags(state), 'selfplay')
    logging.info(get_lines(result, make_slice[-2:]))

    # Write examples to a single record.
    pattern = os.path.join(output_dir, '*', '*.zz')
    random.seed(state.seed)
    tf.set_random_seed(state.seed)
    np.random.seed(state.seed)
    # TODO(tommadams): This method of generating one golden chunk per generation
    # is sub-optimal because each chunk gets reused multiple times for training,
    # introducing bias. Instead, a fresh dataset should be uniformly sampled out
    # of *all* games in the training window before the start of each training run.
    buffer = example_buffer.ExampleBuffer(sampling_frac=1.0)

    # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not
    # so.
    logging.info('Writing golden chunk from "{}"'.format(pattern))
    buffer.parallel_fill(tf.gfile.Glob(pattern))
    buffer.flush(
        os.path.join(fsdb.golden_chunk_dir(),
                     state.output_model_name + '.tfrecord.zz'))
Esempio n. 19
0
 def best_model_path(self):
   if self.best_model_name is None:
     # We don't have a good model yet, use a random fake model implementation.
     return 'random:0,0.4:0.4'
   else:
     return '{},{}.pb'.format(
        FLAGS.engine, os.path.join(fsdb.models_dir(), self.best_model_name))
Esempio n. 20
0
def bootstrap(unused_argv):
    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    mask_flags.checked_run([
        'python', 'bootstrap.py',
        '--export_path={}'.format(bootstrap_model_path),
        '--flagfile=rl_loop/distributed_flags'
    ])
Esempio n. 21
0
def main(unused_argv):
    sgf_dir = os.path.join(fsdb.eval_dir(), 'target')
    target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb')
    models = load_train_times()
    for i, (timestamp, name, path) in enumerate(models):
        winrate = wait(evaluate_model(path, name, target, sgf_dir))
        if winrate >= 0.50:
            break
Esempio n. 22
0
def train(state, tf_records):
    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
    checked_run(
        'training', 'python3', 'train.py', *tf_records,
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
        '--work_dir={}'.format(fsdb.working_dir()),
        '--export_path={}'.format(model_path),
        '--training_seed={}'.format(state.seed), '--freeze=true')
def get_mg_path(model_run, model_num):
    """
    model_run = integer, e.g. 15, 16, corresponding to the v-number
    model_num = integer, e.g 939, for the model number in that run
    """
    fsdb.switch_base("minigo-pub/v{:d}-19x19".format(model_run))
    model = fsdb.get_model(model_num)
    return os.path.join(fsdb.models_dir(), model)
Esempio n. 24
0
def main(unused_argv):
    sgf_dir = os.path.join(fsdb.eval_dir(), 'target')
    target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb')
    models = load_train_times()
    for i, (timestamp, name, path) in enumerate(models):
        winrate = wait(evaluate_model(path, target, sgf_dir, i + 1))
        if winrate >= 0.50:
            print('Model {} beat target after {}s'.format(name, timestamp))
            break
Esempio n. 25
0
def evaluate(state):
    eval_model = state.train_model_name
    best_model = state.best_model_name
    eval_model_path = os.path.join(fsdb.models_dir(), eval_model)
    best_model_path = os.path.join(fsdb.models_dir(), best_model)
    sgf_dir = os.path.join(fsdb.eval_dir(), eval_model)
    result = checked_run(
        'evaluation', 'bazel-bin/cc/eval',
        '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'eval.flags')),
        '--model={}.pb'.format(eval_model_path),
        '--model_two={}.pb'.format(best_model_path),
        '--sgf_dir={}'.format(sgf_dir), '--seed={}'.format(state.seed))
    result = get_lines(result, make_slice[-7:])
    logging.info(result)
    pattern = '{}\s+\d+\s+(\d+\.\d+)%'.format(eval_model)
    win_rate = float(re.search(pattern, result).group(1)) * 0.01
    logging.info('Win rate %s vs %s: %.3f', eval_model, best_model, win_rate)
    return win_rate
Esempio n. 26
0
def evaluate(state, against_model):
    eval_model = state.train_model_name
    eval_model_path = os.path.join(fsdb.models_dir(), eval_model)
    against_model_path = os.path.join(fsdb.models_dir(), against_model)
    sgf_dir = os.path.join(fsdb.eval_dir(), eval_model)
    result = checked_run([
        'bazel-bin/cc/eval', '--num_readouts=100', '--parallel_games=100',
        '--model={}.pb'.format(eval_model_path),
        '--model_two={}.pb'.format(against_model_path),
        '--sgf_dir={}'.format(sgf_dir)
    ] + cc_flags(state), 'evaluation against ' + against_model)
    result = get_lines(result, make_slice[-7:])
    logging.info(result)
    pattern = '{}\s+\d+\s+(\d+\.\d+)%'.format(eval_model)
    win_rate = float(re.search(pattern, result).group(1)) * 0.01
    logging.info('Win rate %s vs %s: %.3f', eval_model, against_model,
                 win_rate)
    return win_rate
Esempio n. 27
0
def main(unusedargv):
    sgf_files = oneoff_utils.find_and_filter_sgf_files(FLAGS.sgf_dir,
                                                       FLAGS.min_year,
                                                       FLAGS.komi)
    pos_data, move_data, result_data, move_idxs = sample_positions_from_games(
        sgf_files=sgf_files, num_positions=FLAGS.num_positions)
    df = get_training_curve_data(fsdb.models_dir(), pos_data, move_data,
                                 result_data, FLAGS.idx_start,
                                 FLAGS.eval_every)
    save_plots(FLAGS.plot_dir, df)
def main(unusedargv):
    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    # List vars constructed when using dual_net.
    dual_net_list(model_paths[0])

    # Calculate l2 cost over a sequence of our models.
    df = get_l2_cost_data(model_paths, FLAGS.idx_start, FLAGS.eval_every)
    print(df)
    save_plots(FLAGS.plot_dir, df)
Esempio n. 29
0
def initialize_from_checkpoint(state):
  """Initialize the reinforcement learning loop from a checkpoint."""
  # The checkpoint's work_dir should contain the most recently trained model.
  model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir,
                                       'work_dir/model.ckpt-*.pb'))
  print(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb'))
  print(os.getcwd())
  if len(model_paths) != 1:
    raise RuntimeError(
      'Expected exactly one model in the checkpoint work_dir'
      '({}), got [{}]'.format(
        os.path.join(FLAGS.checkpoint_dir, 'work_dir'), ', '.join(model_paths)))
  start_model_path = model_paths[0]

  # Copy the latest trained model into the models directory and use it on the
  # first round of selfplay.
  state.best_model_name = 'checkpoint'

  shutil.copy(start_model_path,
              os.path.join(fsdb.models_dir(), state.best_model_name + '.pb'))

  start_model_files = glob.glob(os.path.join(
    FLAGS.checkpoint_dir, 'work_dir/model.ckpt-9383_raw.ckpt*'))

  for file_name in start_model_files:
    shutil.copy(file_name,
        os.path.join(fsdb.models_dir(),
                     state.best_model_name +
                     os.path.basename(file_name)[len("model.ckpt-9383"):]))

  # Copy the training chunks.
  golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, "..", 'golden_chunks')
  for basename in os.listdir(golden_chunks_dir):
    path = os.path.join(golden_chunks_dir, basename)
    shutil.copy(path, fsdb.golden_chunk_dir())

  # Copy the training files.
  work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir')
  for basename in os.listdir(work_dir):
    path = os.path.join(work_dir, basename)
    shutil.copy(path, fsdb.working_dir())
Esempio n. 30
0
async def train(state, tf_records):
    """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
    tf_records: a list of paths to TensorFlow records to train on.
  """
    new_env = os.environ.copy()
    model_path = os.path.join(fsdb.models_dir(), state.train_model_name)

    if FLAGS.use_mgpu_horovod:
        # assign leading cores of sockets to train
        await run(
            new_env, 'mpiexec', '--allow-run-as-root', '--map-by',
            'ppr:{}:socket,pe=2'.format(
                str(FLAGS.num_gpus_train // FLAGS.num_socket)), '-np',
            str(FLAGS.num_gpus_train), 'python3', 'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(model_path), '--training_seed={}'.format(
                state.seed), '--use_mgpu_horovod=true', '--freeze=true')
    else:
        new_env['CUDA_VISIBLE_DEVICES'] = '0'
        await run(
            new_env, 'python3', 'train.py', *tf_records,
            '--flagfile={}'.format(os.path.join(FLAGS.flags_dir,
                                                'train.flags')),
            '--work_dir={}'.format(fsdb.working_dir()),
            '--export_path={}'.format(model_path),
            '--training_seed={}'.format(state.seed), '--freeze=true')

    minigo_print(key='save_model', value={'iteration': state.iter_num})

    # Append the time elapsed from when the RL was started to when this model
    # was trained.
    elapsed = time.time() - state.start_time
    timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
    with gfile.Open(timestamps_path, 'a') as f:
        print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)