def validate(*tf_records):
    """Validate a model's performance on a set of holdout data."""
    if FLAGS.use_tpu:

        def _input_fn(params):
            return preprocessing.get_tpu_input_tensors(
                params['train_batch_size'],
                params['input_layout'],
                tf_records,
                shuffle_examples=FLAGS.shuffle_examples,
                shuffle_buffer_size=FLAGS.shuffle_buffer_size,
                filter_amount=1.0)
    else:

        def _input_fn():
            return preprocessing.get_input_tensors(FLAGS.train_batch_size,
                                                   FLAGS.input_layout,
                                                   tf_records,
                                                   filter_amount=1.0,
                                                   shuffle_examples=False)

    steps = FLAGS.examples_to_validate // FLAGS.train_batch_size
    if FLAGS.use_tpu:
        steps //= FLAGS.num_tpu_cores

    estimator = dual_net.get_estimator()
    with utils.logged_timer('Validating'):
        estimator.evaluate(_input_fn, steps=steps, name=FLAGS.validate_name)
def run_game(load_file,
             selfplay_dir=None,
             holdout_dir=None,
             sgf_dir=None,
             holdout_pct=0.05):
    """Takes a played game and record results and game data."""
    if sgf_dir is not None:
        minimal_sgf_dir = os.path.join(sgf_dir, 'clean')
        full_sgf_dir = os.path.join(sgf_dir, 'full')
        utils.ensure_dir_exists(minimal_sgf_dir)
        utils.ensure_dir_exists(full_sgf_dir)
    if selfplay_dir is not None:
        utils.ensure_dir_exists(selfplay_dir)
        utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer('Loading weights from %s ... ' % load_file):
        network = dual_net.DualNetwork(load_file)

    with utils.logged_timer('Playing game'):
        player = play(network)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
    game_data = player.extract_data()
    if sgf_dir is not None:
        with gfile.GFile(
                os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)),
                'w') as f:
            f.write(player.to_sgf(use_comments=False))
        with gfile.GFile(
                os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)),
                'w') as f:
            f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    if selfplay_dir is not None:
        # Hold out 5% of games for validation.
        if random.random() < holdout_pct:
            fname = os.path.join(holdout_dir,
                                 '{}.tfrecord.zz'.format(output_name))
        else:
            fname = os.path.join(selfplay_dir,
                                 '{}.tfrecord.zz'.format(output_name))

        preprocessing.write_tf_examples(fname, tf_examples)
Example #3
0
def main(unused_args):
    """Train on examples and export the updated model weights."""
    tf_records = tf.gfile.Glob(FLAGS.train_data_path)
    logging.info('Training on %s records: %s to %s', len(tf_records),
                 tf_records[0], tf_records[-1])
    with utils.logged_timer('Training'):
        train(*tf_records)
    if FLAGS.freeze:
        if FLAGS.use_tpu:
            dual_net.freeze_graph_tpu(FLAGS.export_path)
        else:
            dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
                                  FLAGS.trt_max_batch_size,
                                  FLAGS.trt_precision)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
Example #4
0
async def checked_run(cmd, env=None):
    """Run the given subprocess command in a coroutine.

  Args:
    cmd: the command to run and its arguments.
    env: optional OS environment to run the process under.

  Returns:
    The output that the command wrote to stdout & stderr.

  Raises:
    RuntimeError: if the command returns a non-zero result.
  """

    # Start the subprocess.
    logging.info('Running: %s', await expand_cmd_str(cmd))
    with minigo_utils.logged_timer('{} finished'.format(get_cmd_name(cmd))):
        p = await asyncio.create_subprocess_exec(
            *cmd,
            env=env,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.STDOUT)

        # Stream output from the process stdout.
        lines = []
        while True:
            line = await p.stdout.readline()
            if not line:
                break
            line = line.decode()[:-1]
            lines.append(line)
            logging.info(line)

        # Wait for the process to finish, check it was successful & build stdout.
        await p.wait()
        output = '\n'.join(lines)[:-1]
        if p.returncode:
            raise RuntimeError('Return code {} from process: {}\n{}'.format(
                p.returncode, await expand_cmd_str(cmd), output))

        return output
def main(argv):
    """Validate a model's performance on a set of holdout data."""
    validation_paths = tf.gfile.Glob(FLAGS.validate_data_path)
    if FLAGS.expand_validation_dirs:
        tf_records = []
        with utils.logged_timer('Building lists of holdout files'):
            dirs = validation_paths
            while dirs:
                d = dirs.pop()
                for path, newdirs, files in os.walk(d):
                    tf_records.extend(
                        os.path.join(path, f) for f in files
                        if f.endswith('.zz'))
                    dirs.extend(os.path.join(path, d) for d in newdirs)

    else:
        tf_records = validation_paths[:100]

    if not tf_records:
        print('Validation paths:', validation_paths)
        print(['{}:\n\t{}'.format(p, os.listdir(p)) for p in validation_paths])
        raise RuntimeError('Did not find any holdout files for validating!')
    validate(*tf_records)
Example #6
0
def main(unused_argv):
    """Run the reinforcement learning loop."""
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                  '%Y-%m-%d %H:%M:%S')

    # ML Perf Logging.

    mlp_log.mlperf_print('cache_clear', True)
    mlp_log.mlperf_print('init_start', None)

    mlp_log.mlperf_print(key='train_batch_size',
                         value=FLAGS.training_batch_size)
    mlp_log.mlperf_print(key='filter_amount', value=FLAGS.filter_amount)
    mlp_log.mlperf_print(key='window_size', value=FLAGS.window_size)
    mlp_log.mlperf_print(key='lr_boundaries',
                         value=str(FLAGS.lr_boundaries).strip('[]'))
    mlp_log.mlperf_print(key='lr_rates', value=str(FLAGS.lr_rates).strip('[]'))

    mlp_log.mlperf_print(key='opt_weight_decay', value=FLAGS.l2_strength)
    mlp_log.mlperf_print(key='min_selfplay_games_per_generation',
                         value=FLAGS.mlperf_num_games)
    mlp_log.mlperf_print(key='train_samples', value=FLAGS.mlperf_num_games)
    mlp_log.mlperf_print(key='eval_samples', value=FLAGS.mlperf_num_games)
    mlp_log.mlperf_print(key='num_readouts', value=FLAGS.mlperf_num_readouts)
    mlp_log.mlperf_print(key='value_init_penalty',
                         value=FLAGS.mlperf_value_init_penalty)
    mlp_log.mlperf_print(key='holdout_pct', value=FLAGS.mlperf_holdout_pct)
    mlp_log.mlperf_print(key='disable_resign_pct',
                         value=FLAGS.mlperf_disable_resign_pct)
    mlp_log.mlperf_print(key='resign_threshold',
                         value=(sum(FLAGS.mlperf_resign_threshold) /
                                len(FLAGS.mlperf_resign_threshold)))
    mlp_log.mlperf_print(key='parallel_games',
                         value=FLAGS.mlperf_parallel_games)
    mlp_log.mlperf_print(key='virtual_losses',
                         value=FLAGS.mlperf_virtual_losses)
    mlp_log.mlperf_print(key='gating_win_rate',
                         value=FLAGS.mlperf_gating_win_rate)
    mlp_log.mlperf_print(key='eval_games', value=FLAGS.mlperf_eval_games)

    for handler in logger.handlers:
        handler.setFormatter(formatter)

    # The training loop must be bootstrapped; either by running bootstrap.sh
    # to generate training data from random games, or by running
    # copy_checkpoint.sh to copy an already generated checkpoint.
    model_dirs = list_selfplay_dirs(FLAGS.selfplay_dir)

    iteration_model_names = []
    if not model_dirs:
        raise RuntimeError(
            'Couldn\'t find any selfplay games under %s. Either bootstrap.sh '
            'or init_from_checkpoint.sh must be run before the train loop is '
            'started')
    model_num = int(os.path.basename(model_dirs[0]))
    tpu_name = FLAGS.tpu_name.split(':')[0]
    session_config = tf.ConfigProto(allow_soft_placement=True,
                                    log_device_placement=True)
    timeout_run_options = tf.RunOptions(
        timeout_in_ms=FLAGS.worker_reset_timeout_ms)

    mlp_log.mlperf_print('init_stop', None)
    mlp_log.mlperf_print('run_start', None)
    with minigo_utils.logged_timer('Total time'):
        state = State(model_num)
        while state.iter_num < FLAGS.iterations:
            state.iter_num += 1
            iteration_model_names.append(state.train_model_name)
            mlp_log.mlperf_print(key='epoch_start',
                                 value=None,
                                 metadata={'epoch_num': state.iter_num})
            train_once(state)
            mlp_log.mlperf_print(key='epoch_stop',
                                 value=None,
                                 metadata={'epoch_num': state.iter_num})
            mlp_log.mlperf_print(key='save_model',
                                 value='{iteration_num: ' +
                                 str(state.iter_num) + ' }')

            # In the case where iterations are fast, TPUEstimator can deadlock
            # between iterations on TPU Init. We attempt to manually make sure
            # the worker can Init with deadlines so we don't get stuck.
            while True:
                try:
                    tf.logging.info('Attempting to shutdown worker.')
                    gc.collect()
                    with tf.Graph().as_default():
                        with tf.Session(tpu_name,
                                        config=session_config) as sess:
                            sess.run(tf.tpu.shutdown_system(job='tpu_worker'),
                                     options=timeout_run_options)
                    tf.logging.info('Attempting to initialize worker.')
                    with tf.Graph().as_default():
                        with tf.Session(tpu_name,
                                        config=session_config) as sess:
                            init_result = sess.run(
                                tf.tpu.initialize_system(job='tpu_worker'),
                                options=timeout_run_options)
                    if init_result:
                        tf.logging.info('Worker reset.')
                        break
                except tf.errors.DeadlineExceededError:
                    pass
    with tf.gfile.GFile(FLAGS.abort_file_path, 'w') as f:
        f.write('abort')

    total_file_count = 0
    for iteration_model_name in iteration_model_names:
        total_file_count = total_file_count + len(
            tf.io.gfile.glob(FLAGS.selfplay_dir + '/' + iteration_model_name +
                             '/*/*/*'))

    mlp_log.mlperf_print(key='actual_selfplay_games_per_generation',
                         value=int(total_file_count /
                                   len(iteration_model_names)))
def play_match(black_model, white_model, games, sgf_dir):
    """Plays matches between two neural nets.

    Args:
        black_model: Path to the model for black player
        white_model: Path to the model for white player
  """
    with utils.logged_timer('Loading weights'):
        black_net = dual_net.DualNetwork(black_model)
        white_net = dual_net.DualNetwork(white_model)

    readouts = FLAGS.num_readouts

    black = MCTSPlayer(black_net, two_player_mode=True)
    white = MCTSPlayer(white_net, two_player_mode=True)

    black_name = os.path.basename(black_net.save_file)
    white_name = os.path.basename(white_net.save_file)

    for i in range(games):
        num_move = 0  # The move number of the current game

        for player in [black, white]:
            player.initialize_game()
            first_node = player.root.select_leaf()
            prob, val = player.network.run(first_node.position)
            first_node.incorporate_results(prob, val, first_node)

        while True:
            start = time.time()
            active = white if num_move % 2 else black
            inactive = black if num_move % 2 else white

            current_readouts = active.root.N
            while active.root.N < current_readouts + readouts:
                active.tree_search()

            # print some stats on the search
            if FLAGS.verbose >= 3:
                print(active.root.position)

            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                active.set_result(-1 * active.root.position.to_play,
                                  was_resign=True)
                inactive.set_result(active.root.position.to_play,
                                    was_resign=True)

            if active.is_done():
                fname = '{:d}-{:s}-vs-{:s}-{:d}.sgf'.format(
                    int(time.time()), white_name, black_name, i)
                active.set_result(active.root.position.result(),
                                  was_resign=False)
                with gfile.GFile(os.path.join(sgf_dir, fname), 'w') as _file:
                    sgfstr = sgf_wrapper.make_sgf(active.position.recent,
                                                  active.result_string,
                                                  black_name=black_name,
                                                  white_name=white_name)
                    _file.write(sgfstr)
                print('Finished game', i, active.result_string)
                break

            move = active.pick_move()
            active.play_move(move)
            inactive.play_move(move)

            dur = time.time() - start
            num_move += 1

            if (FLAGS.verbose > 1) or (FLAGS.verbose == 1
                                       and num_move % 10 == 9):
                timeper = (dur / readouts) * 100.0
                print(active.root.position)
                print('%d: %d readouts, %.3f s/100. (%.2f sec)' %
                      (num_move, readouts, timeper, dur))