def validate(*tf_records): """Validate a model's performance on a set of holdout data.""" if FLAGS.use_tpu: def _input_fn(params): return preprocessing.get_tpu_input_tensors( params['train_batch_size'], params['input_layout'], tf_records, shuffle_examples=FLAGS.shuffle_examples, shuffle_buffer_size=FLAGS.shuffle_buffer_size, filter_amount=1.0) else: def _input_fn(): return preprocessing.get_input_tensors(FLAGS.train_batch_size, FLAGS.input_layout, tf_records, filter_amount=1.0, shuffle_examples=False) steps = FLAGS.examples_to_validate // FLAGS.train_batch_size if FLAGS.use_tpu: steps //= FLAGS.num_tpu_cores estimator = dual_net.get_estimator() with utils.logged_timer('Validating'): estimator.evaluate(_input_fn, steps=steps, name=FLAGS.validate_name)
def run_game(load_file, selfplay_dir=None, holdout_dir=None, sgf_dir=None, holdout_pct=0.05): """Takes a played game and record results and game data.""" if sgf_dir is not None: minimal_sgf_dir = os.path.join(sgf_dir, 'clean') full_sgf_dir = os.path.join(sgf_dir, 'full') utils.ensure_dir_exists(minimal_sgf_dir) utils.ensure_dir_exists(full_sgf_dir) if selfplay_dir is not None: utils.ensure_dir_exists(selfplay_dir) utils.ensure_dir_exists(holdout_dir) with utils.logged_timer('Loading weights from %s ... ' % load_file): network = dual_net.DualNetwork(load_file) with utils.logged_timer('Playing game'): player = play(network) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) game_data = player.extract_data() if sgf_dir is not None: with gfile.GFile( os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=False)) with gfile.GFile( os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf()) tf_examples = preprocessing.make_dataset_from_selfplay(game_data) if selfplay_dir is not None: # Hold out 5% of games for validation. if random.random() < holdout_pct: fname = os.path.join(holdout_dir, '{}.tfrecord.zz'.format(output_name)) else: fname = os.path.join(selfplay_dir, '{}.tfrecord.zz'.format(output_name)) preprocessing.write_tf_examples(fname, tf_examples)
def main(unused_args): """Train on examples and export the updated model weights.""" tf_records = tf.gfile.Glob(FLAGS.train_data_path) logging.info('Training on %s records: %s to %s', len(tf_records), tf_records[0], tf_records[-1]) with utils.logged_timer('Training'): train(*tf_records) if FLAGS.freeze: if FLAGS.use_tpu: dual_net.freeze_graph_tpu(FLAGS.export_path) else: dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt, FLAGS.trt_max_batch_size, FLAGS.trt_precision) if FLAGS.export_path: dual_net.export_model(FLAGS.export_path)
async def checked_run(cmd, env=None): """Run the given subprocess command in a coroutine. Args: cmd: the command to run and its arguments. env: optional OS environment to run the process under. Returns: The output that the command wrote to stdout & stderr. Raises: RuntimeError: if the command returns a non-zero result. """ # Start the subprocess. logging.info('Running: %s', await expand_cmd_str(cmd)) with minigo_utils.logged_timer('{} finished'.format(get_cmd_name(cmd))): p = await asyncio.create_subprocess_exec( *cmd, env=env, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT) # Stream output from the process stdout. lines = [] while True: line = await p.stdout.readline() if not line: break line = line.decode()[:-1] lines.append(line) logging.info(line) # Wait for the process to finish, check it was successful & build stdout. await p.wait() output = '\n'.join(lines)[:-1] if p.returncode: raise RuntimeError('Return code {} from process: {}\n{}'.format( p.returncode, await expand_cmd_str(cmd), output)) return output
def main(argv): """Validate a model's performance on a set of holdout data.""" validation_paths = tf.gfile.Glob(FLAGS.validate_data_path) if FLAGS.expand_validation_dirs: tf_records = [] with utils.logged_timer('Building lists of holdout files'): dirs = validation_paths while dirs: d = dirs.pop() for path, newdirs, files in os.walk(d): tf_records.extend( os.path.join(path, f) for f in files if f.endswith('.zz')) dirs.extend(os.path.join(path, d) for d in newdirs) else: tf_records = validation_paths[:100] if not tf_records: print('Validation paths:', validation_paths) print(['{}:\n\t{}'.format(p, os.listdir(p)) for p in validation_paths]) raise RuntimeError('Did not find any holdout files for validating!') validate(*tf_records)
def main(unused_argv): """Run the reinforcement learning loop.""" logger = logging.getLogger() logger.setLevel(logging.INFO) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') # ML Perf Logging. mlp_log.mlperf_print('cache_clear', True) mlp_log.mlperf_print('init_start', None) mlp_log.mlperf_print(key='train_batch_size', value=FLAGS.training_batch_size) mlp_log.mlperf_print(key='filter_amount', value=FLAGS.filter_amount) mlp_log.mlperf_print(key='window_size', value=FLAGS.window_size) mlp_log.mlperf_print(key='lr_boundaries', value=str(FLAGS.lr_boundaries).strip('[]')) mlp_log.mlperf_print(key='lr_rates', value=str(FLAGS.lr_rates).strip('[]')) mlp_log.mlperf_print(key='opt_weight_decay', value=FLAGS.l2_strength) mlp_log.mlperf_print(key='min_selfplay_games_per_generation', value=FLAGS.mlperf_num_games) mlp_log.mlperf_print(key='train_samples', value=FLAGS.mlperf_num_games) mlp_log.mlperf_print(key='eval_samples', value=FLAGS.mlperf_num_games) mlp_log.mlperf_print(key='num_readouts', value=FLAGS.mlperf_num_readouts) mlp_log.mlperf_print(key='value_init_penalty', value=FLAGS.mlperf_value_init_penalty) mlp_log.mlperf_print(key='holdout_pct', value=FLAGS.mlperf_holdout_pct) mlp_log.mlperf_print(key='disable_resign_pct', value=FLAGS.mlperf_disable_resign_pct) mlp_log.mlperf_print(key='resign_threshold', value=(sum(FLAGS.mlperf_resign_threshold) / len(FLAGS.mlperf_resign_threshold))) mlp_log.mlperf_print(key='parallel_games', value=FLAGS.mlperf_parallel_games) mlp_log.mlperf_print(key='virtual_losses', value=FLAGS.mlperf_virtual_losses) mlp_log.mlperf_print(key='gating_win_rate', value=FLAGS.mlperf_gating_win_rate) mlp_log.mlperf_print(key='eval_games', value=FLAGS.mlperf_eval_games) for handler in logger.handlers: handler.setFormatter(formatter) # The training loop must be bootstrapped; either by running bootstrap.sh # to generate training data from random games, or by running # copy_checkpoint.sh to copy an already generated checkpoint. model_dirs = list_selfplay_dirs(FLAGS.selfplay_dir) iteration_model_names = [] if not model_dirs: raise RuntimeError( 'Couldn\'t find any selfplay games under %s. Either bootstrap.sh ' 'or init_from_checkpoint.sh must be run before the train loop is ' 'started') model_num = int(os.path.basename(model_dirs[0])) tpu_name = FLAGS.tpu_name.split(':')[0] session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) timeout_run_options = tf.RunOptions( timeout_in_ms=FLAGS.worker_reset_timeout_ms) mlp_log.mlperf_print('init_stop', None) mlp_log.mlperf_print('run_start', None) with minigo_utils.logged_timer('Total time'): state = State(model_num) while state.iter_num < FLAGS.iterations: state.iter_num += 1 iteration_model_names.append(state.train_model_name) mlp_log.mlperf_print(key='epoch_start', value=None, metadata={'epoch_num': state.iter_num}) train_once(state) mlp_log.mlperf_print(key='epoch_stop', value=None, metadata={'epoch_num': state.iter_num}) mlp_log.mlperf_print(key='save_model', value='{iteration_num: ' + str(state.iter_num) + ' }') # In the case where iterations are fast, TPUEstimator can deadlock # between iterations on TPU Init. We attempt to manually make sure # the worker can Init with deadlines so we don't get stuck. while True: try: tf.logging.info('Attempting to shutdown worker.') gc.collect() with tf.Graph().as_default(): with tf.Session(tpu_name, config=session_config) as sess: sess.run(tf.tpu.shutdown_system(job='tpu_worker'), options=timeout_run_options) tf.logging.info('Attempting to initialize worker.') with tf.Graph().as_default(): with tf.Session(tpu_name, config=session_config) as sess: init_result = sess.run( tf.tpu.initialize_system(job='tpu_worker'), options=timeout_run_options) if init_result: tf.logging.info('Worker reset.') break except tf.errors.DeadlineExceededError: pass with tf.gfile.GFile(FLAGS.abort_file_path, 'w') as f: f.write('abort') total_file_count = 0 for iteration_model_name in iteration_model_names: total_file_count = total_file_count + len( tf.io.gfile.glob(FLAGS.selfplay_dir + '/' + iteration_model_name + '/*/*/*')) mlp_log.mlperf_print(key='actual_selfplay_games_per_generation', value=int(total_file_count / len(iteration_model_names)))
def play_match(black_model, white_model, games, sgf_dir): """Plays matches between two neural nets. Args: black_model: Path to the model for black player white_model: Path to the model for white player """ with utils.logged_timer('Loading weights'): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) readouts = FLAGS.num_readouts black = MCTSPlayer(black_net, two_player_mode=True) white = MCTSPlayer(white_net, two_player_mode=True) black_name = os.path.basename(black_net.save_file) white_name = os.path.basename(white_net.save_file) for i in range(games): num_move = 0 # The move number of the current game for player in [black, white]: player.initialize_game() first_node = player.root.select_leaf() prob, val = player.network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() active = white if num_move % 2 else black inactive = black if num_move % 2 else white current_readouts = active.root.N while active.root.N < current_readouts + readouts: active.tree_search() # print some stats on the search if FLAGS.verbose >= 3: print(active.root.position) # First, check the roots for hopeless games. if active.should_resign(): # Force resign active.set_result(-1 * active.root.position.to_play, was_resign=True) inactive.set_result(active.root.position.to_play, was_resign=True) if active.is_done(): fname = '{:d}-{:s}-vs-{:s}-{:d}.sgf'.format( int(time.time()), white_name, black_name, i) active.set_result(active.root.position.result(), was_resign=False) with gfile.GFile(os.path.join(sgf_dir, fname), 'w') as _file: sgfstr = sgf_wrapper.make_sgf(active.position.recent, active.result_string, black_name=black_name, white_name=white_name) _file.write(sgfstr) print('Finished game', i, active.result_string) break move = active.pick_move() active.play_move(move) inactive.play_move(move) dur = time.time() - start num_move += 1 if (FLAGS.verbose > 1) or (FLAGS.verbose == 1 and num_move % 10 == 9): timeper = (dur / readouts) * 100.0 print(active.root.position) print('%d: %d readouts, %.3f s/100. (%.2f sec)' % (num_move, readouts, timeper, dur))