def rl_loop(): state = State() bootstrap(state) selfplay(state) while state.iter_num < 100: holdout_dir = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num) tf_records = os.path.join(fsdb.golden_chunk_dir(), '*.zz') tf_records = sorted(tensorflow.gfile.Glob(tf_records), reverse=True)[:5] state.iter_num += 1 # Train on shuffled game data of the last 5 selfplay rounds. train(state, tf_records) # These could run in parallel. validate(state, holdout_dir) model_win_rate = evaluate_model(state) target_win_rate = evaluate_target(state) # This could run in parallel to the rest. selfplay(state) if model_win_rate >= 0.55: # Promote the trained model to the play model. state.play_model_num = state.train_model_num state.play_model_name = state.train_model_name state.train_model_num += 1 elif model_win_rate < 0.4: # Bury the selfplay games which produced a significantly worse model. logging.info('Burying %s.', tf_records[0]) shutil.move(tf_records[0], tf_records[0] + '.bury') yield target_win_rate
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. for file_name in [ "target.pb", "target_raw.ckpt.data-00000-of-00001", "target_raw.ckpt.index", "target_raw.ckpt.meta"]: shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name, os.path.join(fsdb.models_dir(), file_name)) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) model_path = os.path.join(fsdb.models_dir(), state.best_model_name) result = checked_run([ 'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}.pb'.format(model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): for target_win_rate in rl_loop(): if target_win_rate > 0.5: return logging.info('Passed exit criteria.') logging.info('Failed to converge.')
async def start_selfplay(): output_dir = os.path.join(fsdb.selfplay_dir(), "$MODEL") holdout_dir = os.path.join(fsdb.holdout_dir(), "$MODEL") model_pattern = os.path.join(fsdb.models_dir(), '%d.pb') logs = [] processes = [] loop = asyncio.get_event_loop() for i, device in enumerate(FLAGS.selfplay_devices): cmd = [ 'bazel-bin/cc/concurrent_selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'selfplay.flags')), '--run_forever=1', '--device={}'.format(device), '--model={}'.format(model_pattern), '--output_dir={}/{}'.format(output_dir, i), '--holdout_dir={}/{}'.format(holdout_dir, i)] cmd_str = await expand_cmd_str(cmd) f = open(os.path.join(FLAGS.base_dir, 'selfplay_%d.log' % i), 'w') f.write(cmd_str + '\n\n') f.flush() logging.info('Running: %s', cmd_str) processes.append(await asyncio.create_subprocess_exec( *cmd, stdout=f, stderr=asyncio.subprocess.STDOUT)) logs.append(f) return (processes, logs)
def run_tpu(no_resign=False): os.environ[ 'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt' flagset = [ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--model={}'.format( os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true', '--output_bigtable={}'.format(FLAGS.output_bigtable) ] if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ: flagset.append('--tpu_name={}'.format( os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'])) if no_resign: flagset.extend(['--flagfile=rl_loop/distributed_flags_nr']) else: flagset.extend([ '--flags_path={}'.format(fsdb.flags_path()), '--flagfile=rl_loop/distributed_flags' ]) mask_flags.checked_run(flagset)
def validate_holdout_selfplay(): """Validate on held-out selfplay data.""" holdout_dirs = ( os.path.join(fsdb.holdout_dir(), d) for d in reversed(gfile.ListDirectory(fsdb.holdout_dir())) if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d)) for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d))) # This is a roundabout way of computing how many hourly directories we need # to read in order to encompass 20,000 holdout games. holdout_dirs = set(itertools.islice(holdout_dirs), 20000) cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [ '--use_tpu', '--tpu_name={}'.format(TPU_NAME), '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs' ] mask_flags.run(cmd)
def run_tpu(): mask_flags.checked_run([ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--checkpoint_dir={}'.format(fsdb.working_dir()), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format( fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()), '--run_forever=true', '--flagfile=rl_loop/distributed_flags' ])
def rl_loop(): state = State() # Play the first round of selfplay games with a fake model that returns # random noise. We do this instead of playing multiple games using a single # model bootstrapped with random noise to avoid any initial bias. # TODO(tommadams): disable holdout games for first round of selfplay. selfplay(state) state.engine = FLAGS.engine # Train a real model from the random selfplay games. tf_records = get_golden_chunk_records(1) state.iter_num += 1 train(state, tf_records) # Select the newly trained model as the best. state.best_model_name = state.train_model_name state.gen_num += 1 # Run selfplay using the new model. selfplay(state) # Now start the full training loop. while state.iter_num <= 100: # Build holdout glob before incrementing the iteration number because we # want to run validation on the previous generation. holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, '*') # Train on shuffled game data of the last 5 selfplay rounds, ignoring the # random bootstrapping round. # TODO(tommadams): potential improvments: # - "slow window": increment number of models in window by 1 every 2 # generations. # - uniformly resample the window each iteration (see TODO in selfplay # for more info). tf_records = get_golden_chunk_records(min(5, state.iter_num)) state.iter_num += 1 train(state, tf_records) # These could all run in parallel. validate(state, holdout_glob) model_win_rate = evaluate(state, state.best_model_name) target_win_rate = evaluate(state, 'target') selfplay(state) # TODO(tommadams): 0.6 is required for 95% confidence at 100 eval games. # TODO(tommadams): if a model doesn't get promoted after N iterations, # consider deleting the most recent N training checkpoints because training # might have got stuck in a local minima. if model_win_rate >= 0.55: # Promote the trained model to the best model and increment the generation # number. state.best_model_name = state.train_model_name state.gen_num += 1 yield target_win_rate
async def selfplay(state): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) commands = [] num_selfplay_processes = len(FLAGS.selfplay_devices) if num_selfplay_processes == 1: commands.append([ 'bazel-bin/cc/selfplay', '--flagfile={}'.format( os.path.join(FLAGS.flags_dir, 'selfplay.flags')), '--num_games={}'.format(FLAGS.selfplay_num_games), '--parallel_games={}'.format(FLAGS.selfplay_num_games_per_thread), '--model={}:0,{}'.format(FLAGS.engine, state.best_model_path), '--output_dir={}/{}'.format(output_dir, 0), '--holdout_dir={}/{}'.format(holdout_dir, 0) ]) else: for i, device in enumerate(FLAGS.selfplay_devices): a = ((i - 1) * FLAGS.selfplay_num_games) // (num_selfplay_processes - 1) b = (i * FLAGS.selfplay_num_games) // (num_selfplay_processes - 1) num_games = b - a parallel_games = ( (num_games + FLAGS.selfplay_num_games_per_thread - 1) // FLAGS.selfplay_num_games_per_thread) commands.append([ 'bazel-bin/cc/selfplay', '--flagfile={}'.format( os.path.join(FLAGS.flags_dir, 'selfplay.flags')), '--num_games={}'.format(num_games), '--parallel_games={}'.format(parallel_games), '--model={}:{},{}'.format(FLAGS.engine, device, state.best_model_path), '--output_dir={}/{}'.format(output_dir, i), '--holdout_dir={}/{}'.format(holdout_dir, i) ]) all_lines = await run_commands(commands) black_wins_total = white_wins_total = num_games = 0 for lines in all_lines: result = '\n'.join(lines[-6:]) logging.info(result) stats = parse_win_stats_table(result, 1)[0] num_games += stats.total_wins black_wins_total += stats.black_wins.total white_wins_total += stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_wins_total / num_games, white_wins_total / num_games)
def rl_loop(): """The main reinforcement learning (RL) loop.""" state = State() if FLAGS.checkpoint_dir: # Start from a partially trained model. initialize_from_checkpoint(state) else: # Play the first round of selfplay games with a fake model that returns # random noise. We do this instead of playing multiple games using a single # model bootstrapped with random noise to avoid any initial bias. wait(selfplay(state, 'bootstrap')) # Train a real model from the random selfplay games. tf_records = get_golden_chunk_records() state.iter_num += 1 wait(train(state, tf_records)) # Select the newly trained model as the best. state.best_model_name = state.train_model_name state.gen_num += 1 # Run selfplay using the new model. wait(selfplay(state)) # Now start the full training loop. while state.iter_num <= FLAGS.iterations: # Build holdout glob before incrementing the iteration number because we # want to run validation on the previous generation. holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, '*') # Train on shuffled game data from recent selfplay rounds. tf_records = get_golden_chunk_records() state.iter_num += 1 wait(train(state, tf_records)) if FLAGS.parallel_post_train: # Run eval, validation & selfplay in parallel. model_win_rate, _, _ = wait([ evaluate_trained_model(state), validate(state, holdout_glob), selfplay(state) ]) else: # Run eval, validation & selfplay sequentially. model_win_rate = wait(evaluate_trained_model(state)) wait(validate(state, holdout_glob)) wait(selfplay(state)) if model_win_rate >= FLAGS.gating_win_rate: # Promote the trained model to the best model and increment the generation # number. state.best_model_name = state.train_model_name state.gen_num += 1
async def bootstrap_selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name) lines = await run( 'bazel-bin/cc/concurrent_selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'bootstrap.flags')), '--output_dir={}/0'.format(output_dir), '--holdout_dir={}/0'.format(holdout_dir)) logging.info('\n'.join(lines[-6:]))
async def validate(state): dirs = [x.path for x in os.scandir(fsdb.holdout_dir()) if x.is_dir()] src_dirs = sorted(dirs, reverse=True)[:FLAGS.window_size] await run( 'python3', 'validate.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'validate.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--expand_validation_dirs', *src_dirs)
def rl_loop(): state = State() # Play the first round of selfplay games with a fake model that returns # random noise. We do this instead of playing multiple games using a single # model bootstrapped with random noise to avoid any initial bias. selfplay(state, 'bootstrap') # Train a real model from the random selfplay games. tf_records = get_golden_chunk_records(1) state.iter_num += 1 train(state, tf_records) # Select the newly trained model as the best. state.best_model_name = state.train_model_name state.gen_num += 1 # Run selfplay using the new model. selfplay(state) # Now start the full training loop. while state.iter_num <= FLAGS.iterations: # Build holdout glob before incrementing the iteration number because we # want to run validation on the previous generation. holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, '*') # Calculate the window size from which we'll select training chunks. window = 1 + state.iter_num if window >= FLAGS.slow_window_size: window = ( FLAGS.slow_window_size + (window - FLAGS.slow_window_size) // FLAGS.slow_window_speed) window = min(window, FLAGS.max_window_size) # Train on shuffled game data from recent selfplay rounds. tf_records = get_golden_chunk_records(window) state.iter_num += 1 train(state, tf_records) # These could all run in parallel. validate(state, holdout_glob) model_win_rate = evaluate(state) selfplay(state) # TODO(tommadams): if a model doesn't get promoted after N iterations, # consider deleting the most recent N training checkpoints because training # might have got stuck in a local minima. if model_win_rate >= FLAGS.gating_win_rate: # Promote the trained model to the best model and increment the generation # number. state.best_model_name = state.train_model_name state.gen_num += 1
async def selfplay(state, flagfile='selfplay', seed_factor=0): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. seed_factor: Factor to increase seed. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) lines = await run( 'bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(get_ckpt_path(state.best_model_path)), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir), '--seed={}'.format(state.seed+100*seed_factor)) result = '\n'.join(lines[-6:]) logging.info(result) result = '\n'.join(lines[-50:]) try: stats = parse_win_stats_table(result, 1)[0] num_games = stats.total_wins logging.info('Black won %0.3f, white won %0.3f', stats.black_wins.total / num_games, stats.white_wins.total / num_games) except AssertionError: # Poplar logging might screw up lines extraction approach. logging.error("No results to parse: \n %s" % lines[-50:]) if not MULTI_SP: # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def main(unused_argv): """Run the reinforcement learning loop.""" logging.getLogger('mlperf_compliance').propagate = False ##-->multi-node setup if FLAGS.use_multinode: mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_size = mpi_comm.Get_size() print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format( mpi_rank, mpi_size, socket.gethostname())) else: mpi_comm = None mpi_rank = 0 mpi_size = 1 print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [ fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir() ] ##-->sharedFS for dataExchange. tmp solution 5/6/2019 if FLAGS.use_multinode: ensure_dir_exists(FLAGS.shared_dir_exchange) for d in dirs: ensure_dir_exists(d) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) shutil.copy(FLAGS.target_path + '.og', os.path.join(fsdb.models_dir(), 'target.pb.og')) with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)): try: rl_loop(mpi_comm, mpi_rank, mpi_size) finally: asyncio.get_event_loop().close()
async def bootstrap_selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.train_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.train_model_name) features = 'extra' if FLAGS.use_extra_features else 'agz' lines = await run( 'bazel-bin/cc/concurrent_selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'bootstrap.flags')), '--model={}:0.4:0.4'.format(features), '--num_games={}'.format(FLAGS.min_games_per_iteration), '--output_dir={}/0'.format(output_dir), '--holdout_dir={}/0'.format(holdout_dir)) logging.info('\n'.join(lines[-6:]))
async def bootstrap_selfplay(state): output_name = '000000-000000' output_dir = os.path.join(fsdb.selfplay_dir(), output_name) holdout_dir = os.path.join(fsdb.holdout_dir(), output_name) sgf_dir = os.path.join(fsdb.sgf_dir(), output_name) lines = await run( 'bazel-bin/cc/selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'bootstrap.flags')), '--num_games={}'.format(FLAGS.selfplay_num_games), '--parallel_games=32', '--model=random:0,0.4:0.4', '--output_dir={}/0'.format(output_dir), '--holdout_dir={}/0'.format(holdout_dir), '--sgf_dir={}'.format(sgf_dir)) logging.info('\n'.join(lines[-6:]))
async def selfplay(state, flagfile='selfplay'): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) # instead of 2 workers in 1 process per device, we do 2 processes with 1 worker all_tasks = [] loop = asyncio.get_event_loop() for i in range(FLAGS.num_gpus_selfplay * 2): # 2 worker per device all_tasks.append( loop.create_task( selfplay_sub(state, output_dir, holdout_dir, flagfile, i))) all_lines = await asyncio.gather(*all_tasks, return_exceptions=True) black_wins_total = white_wins_total = num_games = 0 for lines in all_lines: if type(lines) == RuntimeError or type(lines) == OSError: raise lines continue result = '\n'.join(lines[-6:]) logging.info(result) stats = parse_win_stats_table(result, 1)[0] num_games += stats.total_wins black_wins_total += stats.black_wins.total white_wins_total += stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_wins_total / num_games, white_wins_total / num_games) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) logging.info('Writing golden chunk from "{}"'.format(pattern)) if FLAGS.use_multinode: mpi_rank = MPI.COMM_WORLD.Get_rank() divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank) else: divide_record(state, pattern, FLAGS.num_gpus_train, -1)
async def selfplay_multi(state, num_ipus): """ Start *num_ipu* selfplay processes """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) flagfile = 'selfplay' all_tasks = [] loop = asyncio.get_event_loop() for i in range(num_ipus): all_tasks.append(loop.create_task(selfplay_sub(state, output_dir, holdout_dir, flagfile, i))) all_lines = await asyncio.gather(*all_tasks, return_exceptions=True) black_wins_total = white_wins_total = num_games = 0 for lines in all_lines: if type(lines) == RuntimeError or type(lines) == OSError: raise lines result = '\n'.join(lines[-6:]) logging.info(result) stats = parse_win_stats_table(result, 1)[0] num_games += stats.total_wins black_wins_total += stats.black_wins.total white_wins_total += stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_wins_total / num_games, white_wins_total / num_games) # copy paste from selfplay to aggregate results # potentially should be parallized to training? # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def run_cc(): _, model_name = fsdb.get_latest_model() num_games_finished = len(fsdb.get_games(model_name)) if num_games_finished > 25000: print("{} has enough games! ({})".format(model_name, num_games_finished)) time.sleep(10 * 60) sys.exit() mask_flags.checked_run([ 'bazel-bin/cc/selfplay', '--model=tf,{}'.format(model_name), '--mode=selfplay', '--output_dir={}/{}'.format(fsdb.selfplay_dir(), model_name), '--holdout_dir={}/{}'.format(fsdb.holdout_dir(), model_name), '--sgf_dir={}/{}'.format(fsdb.sgf_dir(), model_name), '--flagfile=rl_loop/distributed_flags' ])
def main(unused_argv): """Run the reinforcement learning loop.""" mll.init_start() print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(), fsdb.mpi_log_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node)) logging.info('Train nodes = {}'.format(FLAGS.train_node)) logging.info('Eval nodes = {}'.format(FLAGS.eval_node)) with logged_timer('Total time'): try: mll.init_stop() mll.run_start() rl_loop() finally: asyncio.get_event_loop().close()
def selfplay(state): play_output_name = state.play_output_name play_output_dir = os.path.join(fsdb.selfplay_dir(), play_output_name) play_holdout_dir = os.path.join(fsdb.holdout_dir(), play_output_name) result = checked_run([ 'external/minigo/cc/main', '--mode=selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}'.format( state.play_model_path), '--output_dir={}'.format(play_output_dir), '--holdout_dir={}'.format(play_holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. logging.info('Extracting examples') random.seed(state.seed) tensorflow.set_random_seed(state.seed) numpy.random.seed(state.seed) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill( tensorflow.gfile.Glob(os.path.join(play_output_dir, '*.zz'))) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), play_output_name + '.tfrecord.zz'))
async def selfplay(state, flagfile='selfplay'): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) multi_instance, num_instance, flag_list = extract_multi_instance( ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) sp_cmd = ['bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(state.best_model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir)] if not multi_instance: lines = await run( *sp_cmd, '--seed={}'.format(state.seed)) else: if FLAGS.selfplay_node == []: # run selfplay locally lines = await run( 'python3', 'ml_perf/execute.py', '--num_instance={}'.format(num_instance), '--', *sp_cmd, '--seed={}'.format(state.seed)) else: with logged_timer('selfplay mn'): # run one selfplay instance per host lines = await run_distributed( ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], num_instance, FLAGS.selfplay_node, None, None, state.seed, *sp_cmd) result = '\n'.join(lines) with logged_timer('parse win stats'): stats = parse_win_stats_table(result, 1)[0] num_games = stats.total_wins black_total = stats.black_wins.total white_total = stats.white_wins.total logging.info('Black won %0.3f, white won %0.3f', black_total / num_games, white_total / num_games) bias = abs(white_total - black_total)/num_games logging.info('Black total %d, white total %d, total games %d, bias %0.3f.', black_total, white_total, num_games, bias) with logged_timer('generate golden chunk'): # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') files = tf.gfile.Glob(pattern) random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) threads = FLAGS.golden_chunk_split file_list = [] files_number = len(files) chunk_size = files_number // threads # split files into N seperate parts for i in range(threads): if i == threads - 1: file_list += [[i, files[chunk_size * i :]]] else: file_list += [[i, files[chunk_size * i : chunk_size * (i + 1)]]] pool = mp.Pool(threads) pool.map(functools.partial(gen_golden_chunk, state=state), file_list) return bias
def main(unused_argv): """Run the reinforcement learning loop.""" utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(bootstrap_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), bootstrap_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), bootstrap_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) trained_model_name = shipname.generate(1) trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) # Validate the trained model on held out game mask_flags.checked_run([ 'python3', 'validate.py', os.path.join(fsdb.holdout_dir(), bootstrap_name), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) # Verify that trained model works for selfplay # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--load_file={}'.format(trained_model_path)]) mask_flags.checked_run([ 'python3', 'evaluate.py', bootstrap_model_path, trained_model_path, '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()), '--flagfile=rl_loop/local_flags' ]) print("Completed integration test!")
def rl_loop(): """The main reinforcement learning (RL) loop.""" # The 'window_size' reflect the split of golden chunk after selfplay # basically each selfplay generate N golden chunks instead of one to # accelerate write golden chunks (N determined by FLAGS.golden_chunk_slit). # Yet this make effective_window_size dynamic. It should increase by N-1 # to keep the effective window size not change. Then increase by N if no big # chunk left. Until it reach FLAGS.window_size * FLAGS.golden_chunk_split window_size = 0 state = State() numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores train_instance_num = FLAGS.train_instance_per_numa * len( FLAGS.train_node) * numa_per_node selfplay_node_num = max(len(FLAGS.selfplay_node), 1) selfplay_num = selfplay_node_num out_files_number = int(train_instance_num / gcd(train_instance_num, selfplay_num) * selfplay_node_num) FLAGS.golden_chunk_split = out_files_number window_size = out_files_number * FLAGS.window_size if FLAGS.checkpoint_dir != None: # Start from a partially trained model. initialize_from_checkpoint(state, out_files_number) window_size = len(get_golden_chunk_records(window_size)) mll.init_stop() mll.run_start() state.start_time = time.time() else: # Play the first round of selfplay games with a fake model that returns # random noise. We do this instead of playing multiple games using a single # model bootstrapped with random noise to avoid any initial bias. mll.init_stop() mll.run_start() state.start_time = time.time() mll.epoch_start(state.iter_num) wait(selfplay(state, 'bootstrap')) window_size += FLAGS.golden_chunk_split # Train a real model from the random selfplay games. state.iter_num += 1 wait(train(state, window_size)) post_train(state) # Select the newly trained model as the best. state.best_model_name = state.train_model_name state.gen_num += 1 # Run selfplay using the new model. wait(selfplay(state)) window_size += FLAGS.golden_chunk_split mll.epoch_stop(state.iter_num - 1) first_iter = True state_copy = None model_win_rate = -1.0 # Now start the full training loop. while state.iter_num <= FLAGS.iterations: with logged_timer('iteration time {}'.format(state.iter_num)): mll.epoch_start(state.iter_num) # Build holdout glob before incrementing the iteration number because we # want to run validation on the previous generation. holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, '*') if FLAGS.parallel_post_train == 0: state.iter_num += 1 wait(train(state, window_size)) post_train(state) # Run eval, validation & selfplay sequentially. wait(selfplay(state)) model_win_rate = wait(evaluate_trained_model(state)) if model_win_rate >= FLAGS.gating_win_rate: # Promote the trained model to the best model and increment the generation # number. state.best_model_name = state.train_model_name state.gen_num += 1 mll.epoch_stop(state.iter_num - 1) # ^ compensate iter_num += 1 above if FLAGS.parallel_post_train == 1: state.iter_num += 1 wait([train(state, window_size), selfplay(state)]) post_train(state) # Run eval, validation & selfplay in parallel. model_win_rate = wait(evaluate_trained_model(state)) if model_win_rate >= FLAGS.gating_win_rate: # Promote the trained model to the best model and increment the generation # number. state.best_model_name = state.train_model_name state.gen_num += 1 mll.epoch_stop(state.iter_num - 1) # ^ compensate iter_num += 1 above if FLAGS.parallel_post_train == 2: state_copy = copy.copy(state) state.iter_num += 1 # run training and evaluation/validation/selfplay in parallel # this is software pipeline-ish parallelism # start train[iter] # | start valiation[iter-1] # | wait for validation # | if not first time start evaluation[iter-1] # | if not first time wait for evaluation # | if not first time check for promotion # | start selfplay[iter] # | wait selfplay # wait train train_handle = asyncio.gather(train(state, window_size), return_exceptions=True) if not first_iter: post_train(state_copy) model_win_rate = wait(evaluate_trained_model(state_copy)) if model_win_rate >= FLAGS.gating_win_rate: # Promote the trained model to the best model state.best_model_name = state_copy.train_model_name mll.epoch_stop(state.iter_num - 1 - 1) # ^---^-- compensate iter_num += 1 above # +-- it is actually last iteration else: first_iter = False wait(selfplay(state)) asyncio.get_event_loop().run_until_complete(train_handle) if not first_iter: if model_win_rate >= FLAGS.gating_win_rate: # Increment the generation number. train_model_name_before = state.train_model_name state.gen_num += 1 # Output dependency: # In parallel post train mode 1, there is output dependence between # evaluation of iteration i (gen_num++) and train of iteration i+1 # (use gen_num for export model path). In parallel post train mode # 2 (this mode), the evluation of iteration i is postponed to # iteration i+1 after the training started, thus train of iteration # i+1 won't generate correct model name when promotion needs to # happen. This part fix up the model name when evaluation decides # there's a promotion train_model_name_after = state.train_model_name model_paths = glob.glob( os.path.join( fsdb.models_dir(), '{}.*'.format(train_model_name_before))) for model in model_paths: logging.info('moving {} --> {}'.format( model, train_model_name_after.join( model.rsplit(train_model_name_before, 1)))) shutil.copy( model, train_model_name_after.join( model.rsplit(train_model_name_before, 1))) # after the main loop, if parallel_post_train = 2 # needs to print epoch_stop for last epoch if FLAGS.parallel_post_train == 2: mll.epoch_stop(state.iter_num - 1)
def main(unused_argv): for i in range(0, NUM_LOOP): if i == 0: src_model_name = shipname.generate(0) fsdb.switch_base(os.path.join(base_dir, src_model_name)) src_model_path = os.path.join(fsdb.models_dir(), src_model_name) bootstrap_model_path = os.path.join(fsdb.models_dir(), src_model_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) dst_model_name = shipname.generate(1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) else: src_model_name = dst_model_name src_model_path = os.path.join(fsdb.models_dir(), src_model_name) dst_model_name = shipname.generate(i + 1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) #bootstrap_name = shipname.generate(0) #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) print(src_model_name) print(src_model_path) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(src_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), dst_model_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), dst_model_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) #trained_model_name = shipname.generate(1) trained_model_name = dst_model_name trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) print("Finished!")
def selfplay_noasync(state, flagfile='selfplay'): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) base_seed = state.seed * FLAGS.num_gpus_selfplay * 2 if FLAGS.use_multinode: mpi_rank = MPI.COMM_WORLD.Get_rank() base_seed = base_seed + (mpi_rank * 1433) mpi_info = MPI.Info.Create() num_workers = 2 * FLAGS.num_gpus_selfplay cores_per_worker = (FLAGS.cores_per_socket * FLAGS.num_socket) // num_workers # TODO: set hosts to self play nodes here. mpi_info.Set("host", socket.gethostname()) mpi_info.Set("bind_to", "none") icomm = MPI.COMM_SELF.Spawn("ompi_bind_DGX1.sh", maxprocs=num_workers, args=[ 'bazel-bin/cc/selfplay_mpi', '--flagfile={}.flags'.format( os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(state.best_model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir), '--seed={}'.format(base_seed) ], info=mpi_info) icomm.barrier() icomm.Disconnect() black_wins_total = white_wins_total = num_games = 0 #for lines in all_lines: # if type(lines) == RuntimeError or type(lines) == OSError: # raise lines # continue # result = '\n'.join(lines[-6:]) # logging.info(result) # stats = parse_win_stats_table(result, 1)[0] # num_games += stats.total_wins # black_wins_total += stats.black_wins.total # white_wins_total += stats.white_wins.total #logging.info('Black won %0.3f, white won %0.3f', # black_wins_total / num_games, # white_wins_total / num_games) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) logging.info('Writing golden chunk from "{}"'.format(pattern)) if FLAGS.use_multinode: mpi_rank = MPI.COMM_WORLD.Get_rank() divide_record(state, pattern, FLAGS.num_gpus_train, mpi_rank) else: divide_record(state, pattern, FLAGS.num_gpus_train, -1)
async def selfplay(state, flagfile='selfplay'): """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. Args: state: the RL loop State instance. flagfile: the name of the flagfile to use for selfplay, either 'selfplay' (the default) or 'boostrap'. """ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) output_dir = '/tmp/minigo' + output_dir multi_instance, num_instance, flag_list = extract_multi_instance([ '--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)) ]) sp_cmd = [ 'bazel-bin/cc/selfplay', '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), '--model={}'.format(state.best_model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] if not multi_instance: lines = await run(*sp_cmd, '--seed={}'.format(state.seed)) else: if FLAGS.selfplay_node == []: # run selfplay locally lines = await run('python3', 'ml_perf/execute.py', '--num_instance={}'.format(num_instance), '--', *sp_cmd, '--seed={}'.format(state.seed)) else: with logged_timer('selfplay mn'): # run one selfplay instance per host lines = await run_distributed( ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], num_instance, FLAGS.selfplay_node, None, None, state.seed, *sp_cmd) #result = '\n'.join(lines) #with logged_timer('parse win stats'): # stats = parse_win_stats_table(result, 1)[0] # num_games = stats.total_wins # black_total = stats.black_wins.total # white_total = stats.white_wins.total # logging.info('Black won %0.3f, white won %0.3f', # black_total / num_games, # white_total / num_games) # bias = abs(white_total - black_total)/num_games # logging.info('Black total %d, white total %d, total games %d, bias %0.3f.', # black_total, white_total, num_games, bias) with logged_timer('generate golden chunk'): # Write examples to a single record. hosts = FLAGS.selfplay_node if hosts == []: hosts = ['localhost'] num_instance = len(hosts) numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores train_instance_num = FLAGS.train_instance_per_numa * len( FLAGS.train_node) * numa_per_node selfplay_node_num = len(hosts) selfplay_num = selfplay_node_num out_files_number = int(train_instance_num / gcd(train_instance_num, selfplay_num)) cmd = [ 'python3', 'ml_perf/divide_golden_chunk.py', '--read_path={}'.format(output_dir + "/*"), '--write_path={}'.format( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz')), '--out_files_number={}'.format(out_files_number), '--physical_cores={}'.format(FLAGS.physical_cores), '--base_dir={}'.format(FLAGS.base_dir) ] lines = await run_distributed([], 1, hosts, None, None, state.seed, *cmd)