def initialize_from_checkpoint(state, out_files_number): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) example_num = buffer.parallel_fill(tf.gfile.Glob(path), FLAGS.physical_cores) buffer.flush_new(out_path, example_num, out_files_number, 1) # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir() + '/*.zz*', FLAGS.eval_min_max_every_epoch) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. for file_name in [ "target.pb", "target_raw.ckpt.data-00000-of-00001", "target_raw.ckpt.index", "target_raw.ckpt.meta"]: shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name, os.path.join(fsdb.models_dir(), file_name)) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): for target_win_rate in rl_loop(): if target_win_rate > 0.5: return logging.info('Passed exit criteria.') logging.info('Failed to converge.')
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
async def train(state, selfplay_processes): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ wait_for_training_examples(state, selfplay_processes, FLAGS.min_games_per_iteration) tf_records = await sample_training_examples(state) model_path = os.path.join(fsdb.models_dir(), state.train_model_name) await run( 'python3', 'train.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--use_extra_features={}'.format(FLAGS.use_extra_features), '--freeze=true', *tf_records) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f) if FLAGS.validate and state.iter_num > 1: try: await validate(state) except Exception as e: logging.error(e)
async def train(state, tf_records): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ model_path = os.path.join(fsdb.models_dir(), state.train_model_name) await run( 'python3', 'train.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--use_extra_features={}'.format(FLAGS.use_extra_features), '--freeze=true', *tf_records) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) shutil.copy(path, fsdb.golden_chunk_dir()) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def run_tpu(no_resign=False): os.environ[ 'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH'] = '/etc/ssl/certs/ca-certificates.crt' flagset = [ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--model={}'.format( os.path.join(fsdb.working_dir(), 'model.ckpt-%d.pb')), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--run_forever=true', '--output_bigtable={}'.format(FLAGS.output_bigtable) ] if 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ: flagset.append('--tpu_name={}'.format( os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'])) if no_resign: flagset.extend(['--flagfile=rl_loop/distributed_flags_nr']) else: flagset.extend([ '--flags_path={}'.format(fsdb.flags_path()), '--flagfile=rl_loop/distributed_flags' ]) mask_flags.checked_run(flagset)
def py_flags(state): return [ '--work_dir={}'.format(fsdb.working_dir()), '--trunk_layers=10', '--conv_width=64', '--value_cost_weight=0.25', '--training_seed={}'.format(state.seed), ]
def train(state, tf_records): model_path = os.path.join(fsdb.models_dir(), state.train_model_name) checked_run( 'training', 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true')
def run_tpu(): mask_flags.checked_run([ 'bazel-bin/cc/main', '--mode=selfplay', '--engine=tpu', '--checkpoint_dir={}'.format(fsdb.working_dir()), '--output_dir={}'.format(fsdb.selfplay_dir()), '--holdout_dir={}'.format(fsdb.holdout_dir()), '--sgf_dir={}'.format( fsdb.sgf_dir()), '--flags_path={}'.format(fsdb.flags_path()), '--run_forever=true', '--flagfile=rl_loop/distributed_flags' ])
def validate_pro(): """Validate on professional data.""" cmd = [ 'python3', 'validate.py', FLAGS.pro_dataset, '--use_tpu', '--tpu_name={}'.format(TPU_NAME), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/distributed_flags', '--validate_name=pro' ] mask_flags.run(cmd)
def freeze(save_path, rewrite_tpu=False): cmd = [ 'python3', 'freeze_graph.py', '--work_dir={}'.format(fsdb.working_dir()), '--model_path={}'.format(save_path) ] if rewrite_tpu: cmd.extend(['--use_tpu', '--tpu_name={}'.format(TPU_NAME)]) return mask_flags.run(cmd)
async def validate(state): dirs = [x.path for x in os.scandir(fsdb.holdout_dir()) if x.is_dir()] src_dirs = sorted(dirs, reverse=True)[:FLAGS.window_size] await run( 'python3', 'validate.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'validate.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--expand_validation_dirs', *src_dirs)
async def train(state, tf_records): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ new_env = os.environ.copy() model_path = os.path.join(fsdb.models_dir(), state.train_model_name) if FLAGS.use_mgpu_horovod: # assign leading cores of sockets to train await run( new_env, 'mpiexec', '--allow-run-as-root', '--map-by', 'ppr:{}:socket,pe=2'.format( str(FLAGS.num_gpus_train // FLAGS.num_socket)), '-np', str(FLAGS.num_gpus_train), 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format( state.seed), '--use_mgpu_horovod=true', '--freeze=true') else: new_env['CUDA_VISIBLE_DEVICES'] = '0' await run( new_env, 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true') minigo_print(key='save_model', value={'iteration': state.iter_num}) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
async def validate(state, holdout_glob): """Validate the trained model against holdout games. Args: state: the RL loop State instance. holdout_glob: a glob that matches holdout games. """ await checked_run( 'python3', 'validate.py', holdout_glob, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'validate.flags')), '--work_dir={}'.format(fsdb.working_dir()))
def train(): model_num, model_name = fsdb.get_latest_model() print("Training on gathered game data, initializing from {}".format( model_name)) new_model_num = model_num + 1 new_model_name = shipname.generate(new_model_num) print("New model will be {}".format(new_model_name)) save_file = os.path.join(fsdb.models_dir(), new_model_name) # TODO(jacksona): Refactor train.py to take the filepath as a flag. cmd = [ 'python3', 'train.py', '__unused_file__', '--use_tpu', '--use_bt', '--work_dir={}'.format(fsdb.working_dir()), '--tpu_name={}'.format(TPU_NAME), '--flagfile=rl_loop/distributed_flags', '--export_path={}'.format(save_file) ] completed_process = mask_flags.run(cmd) if completed_process.returncode > 0: print("Training failed!") return completed_process # Train.py already copies the {data,index,meta} files to $BUCKET/models # Persist the checkpoint two ways: # Freeze the .ckpt file in the work_dir for the TPU selfplayers # Freeze a non-tpu version of the graph for later GPU use. latest_checkpoint = tf.train.latest_checkpoint(fsdb.working_dir()) p = freeze(latest_checkpoint, rewrite_tpu=True) if p.returncode > 0: print("== TPU freeze failed!") return p p = freeze(save_file, rewrite_tpu=False) if p.returncode > 0: print("== Model freeze failed!") return p return completed_process
async def train(state, tf_records=None): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ if tf_records is None: # Train on shuffled game data from recent selfplay rounds. tf_records = get_golden_chunk_records() model_path = os.path.join(fsdb.models_dir(), state.train_model_name) if DISTRIBUTION_STRATEGY == "even": await run( 'numactl', '--cpunodebind={}'.format(1), '--membind={}'.format(1), 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true') elif DISTRIBUTION_STRATEGY == "skewed": await run( 'numactl', '--cpunodebind={}'.format(1), '--membind={}'.format(1), 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true') # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def main(unused_argv): """Run the reinforcement learning loop.""" logging.getLogger('mlperf_compliance').propagate = False ##-->multi-node setup if FLAGS.use_multinode: mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_size = mpi_comm.Get_size() print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format( mpi_rank, mpi_size, socket.gethostname())) else: mpi_comm = None mpi_rank = 0 mpi_size = 1 print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [ fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir() ] ##-->sharedFS for dataExchange. tmp solution 5/6/2019 if FLAGS.use_multinode: ensure_dir_exists(FLAGS.shared_dir_exchange) for d in dirs: ensure_dir_exists(d) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) shutil.copy(FLAGS.target_path + '.og', os.path.join(fsdb.models_dir(), 'target.pb.og')) with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)): try: rl_loop(mpi_comm, mpi_rank, mpi_size) finally: asyncio.get_event_loop().close()
async def validate(state, holdout_glob): """Validate the trained model against holdout games. Args: state: the RL loop State instance. holdout_glob: a glob that matches holdout games. """ if not glob.glob(holdout_glob): print('Glob "{}" didn\'t match any files, skipping validation'.format( holdout_glob)) else: await run( 'python3', 'validate.py', holdout_glob, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'validate.flags')), '--work_dir={}'.format(fsdb.working_dir()))
def main(unused_argv): """Run the reinforcement learning loop.""" mll.init_start() print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(), fsdb.mpi_log_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node)) logging.info('Train nodes = {}'.format(FLAGS.train_node)) logging.info('Eval nodes = {}'.format(FLAGS.eval_node)) with logged_timer('Total time'): try: mll.init_stop() mll.run_start() rl_loop() finally: asyncio.get_event_loop().close()
def spawn_train_workers(state): # need to be removed tf_records = get_golden_chunk_records(state) comm_world = MPI.COMM_WORLD # spawn one worker process print("Spawning worker processes on {}".format(socket.gethostname())) mpi_info = MPI.Info.Create() num_workers = FLAGS.num_gpus_train # subtract 1 core from this value, oversubscription might not work cores_per_worker = (FLAGS.cores_per_socket * FLAGS.num_socket) // num_workers - 1 mpi_info.Set("host", socket.gethostname()) mpi_info.Set( "map_by", "ppr:{}:socket,PE={}".format(num_workers // FLAGS.num_socket, cores_per_worker)) icomm = MPI.COMM_SELF.Spawn( "python3", maxprocs=num_workers, args=[ 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format( os.path.join(fsdb.models_dir(), 'new_model')), '--training_seed=13337', '--num_selfplays={}'.format(comm_world.size - 1), '--window_iters={}'.format(FLAGS.window_size), '--total_iters={}'.format(FLAGS.iterations), '--golden_chunk_pattern={}'.format( os.path.join(fsdb.golden_chunk_dir(), '*.zz*')), '--freeze=true', '--use_multinode=true', '--use_mgpu_horovod=true' ], info=mpi_info) return icomm
def freeze(save_path, rewrite_tpu=False): cmd = ['python3', 'freeze_graph.py', '--work_dir={}'.format(fsdb.working_dir()), '--model_path={}'.format(save_path)] return mask_flags.run(cmd)
def main(unused_argv): """Run the reinforcement learning loop.""" utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(bootstrap_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), bootstrap_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), bootstrap_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) trained_model_name = shipname.generate(1) trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) # Validate the trained model on held out game mask_flags.checked_run([ 'python3', 'validate.py', os.path.join(fsdb.holdout_dir(), bootstrap_name), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) # Verify that trained model works for selfplay # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--load_file={}'.format(trained_model_path)]) mask_flags.checked_run([ 'python3', 'evaluate.py', bootstrap_model_path, trained_model_path, '--games=1', '--eval_sgf_dir={}'.format(fsdb.eval_dir()), '--flagfile=rl_loop/local_flags' ]) print("Completed integration test!")
def validate(state, holdout_glob): checked_run('validation', 'python3', 'validate.py', holdout_glob, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()))
async def train(state, window_size): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. """ train_node = FLAGS.train_node num_node = len(train_node) if num_node == 0: dist_train = False else: dist_train = True if dist_train: intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1 numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores instance_per_node = numa_per_node * FLAGS.train_instance_per_numa mpi_async_progress = '' for i in range(numa_per_node): for j in range(FLAGS.train_instance_per_numa): if (not i == 0) or (not j == 0): mpi_async_progress += ',' mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j) else: intra_threads = FLAGS.physical_cores model_path = os.path.join(fsdb.models_dir(), state.train_model_name) cmd = [ 'python3', 'train.py', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--window_size={}'.format(window_size), '--data_path={}'.format(fsdb.golden_chunk_dir()), '--training_seed={}'.format(state.seed), '--freeze=True', '--num_inter_threads=1', '--num_intra_threads={}'.format(intra_threads) ] if (dist_train): genvs = [ 'HOROVOD_FUSION_THRESHOLD=134217728', 'KMP_BLOCKTIME=0', 'KMP_HW_SUBSET=1T', 'OMP_BIND_PROC=true', 'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress, 'OMP_NUM_THREADS={}'.format(intra_threads) ] hosts = [] proclists = [] numa_nodes = [] for node in range(num_node): # add all instance to the list for numa in range(numa_per_node): for instance in range(FLAGS.train_instance_per_numa): hosts += [train_node[node]] proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads proclists += ['{}'.format(proclist)] numa_nodes += ['{}'.format(numa)] lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes, None, *cmd, '--dist_train=True') else: lines = await run(*cmd) print('\n'.join(lines), file=sys.stderr)
def main(unused_argv): for i in range(0, NUM_LOOP): if i == 0: src_model_name = shipname.generate(0) fsdb.switch_base(os.path.join(base_dir, src_model_name)) src_model_path = os.path.join(fsdb.models_dir(), src_model_name) bootstrap_model_path = os.path.join(fsdb.models_dir(), src_model_name) mask_flags.checked_run([ 'python3', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--work_dir={}'.format(fsdb.working_dir()), '--flagfile=rl_loop/local_flags' ]) dst_model_name = shipname.generate(1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) else: src_model_name = dst_model_name src_model_path = os.path.join(fsdb.models_dir(), src_model_name) dst_model_name = shipname.generate(i + 1) fsdb.switch_base(os.path.join(base_dir, dst_model_name)) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.sgf_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) #bootstrap_name = shipname.generate(0) #bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) print(src_model_name) print(src_model_path) selfplay_cmd = [ 'python3', 'selfplay.py', '--load_file={}'.format(src_model_path), '--selfplay_dir={}'.format( os.path.join(fsdb.selfplay_dir(), dst_model_name)), '--holdout_dir={}'.format( os.path.join(fsdb.holdout_dir(), dst_model_name)), '--sgf_dir={}'.format(fsdb.sgf_dir()), '--holdout_pct=0', '--flagfile=rl_loop/local_flags' ] # Selfplay twice mask_flags.checked_run(selfplay_cmd) mask_flags.checked_run(selfplay_cmd) # and once more to generate a held out game for validation # exploits flags behavior where if you pass flag twice, second one wins. mask_flags.checked_run(selfplay_cmd + ['--holdout_pct=100']) # Double check that at least one sgf has been generated. assert os.listdir(os.path.join(fsdb.sgf_dir(), 'full')) print("Making shuffled golden chunk from selfplay data...") # TODO(amj): refactor example_buffer so it can be called the same way # as everything else. eb.make_chunk_for(output_dir=fsdb.golden_chunk_dir(), local_dir=fsdb.working_dir(), game_dir=fsdb.selfplay_dir(), model_num=1, positions=64, threads=8, sampling_frac=1) tf_records = sorted( gfile.Glob(os.path.join(fsdb.golden_chunk_dir(), '*.tfrecord.zz'))) #trained_model_name = shipname.generate(1) trained_model_name = dst_model_name trained_model_path = os.path.join(fsdb.models_dir(), trained_model_name) # Train on shuffled game data mask_flags.checked_run([ 'python3', 'train.py', *tf_records, '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(trained_model_path), '--flagfile=rl_loop/local_flags' ]) print("Finished!")
def py_flags(state): return [ '--work_dir={}'.format(fsdb.working_dir()), '--training_seed={}'.format(state.seed), ]