def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] for d in dirs: ensure_dir_exists(d); # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. for file_name in [ "target.pb", "target_raw.ckpt.data-00000-of-00001", "target_raw.ckpt.index", "target_raw.ckpt.meta"]: shutil.copy(FLAGS.target_path[:-len("target.pb")] + file_name, os.path.join(fsdb.models_dir(), file_name)) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) shutil.copy( start_model_path + '.og', os.path.join(fsdb.models_dir(), state.best_model_name + '.pb.og')) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) buffer.parallel_fill(tf.gfile.Glob(path)) buffer.flush(out_path, FLAGS.num_gpus_train) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'rl_loop.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): try: rl_loop() finally: asyncio.get_event_loop().close()
async def train(state, tf_records): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ model_path = os.path.join(fsdb.models_dir(), state.train_model_name) await run( 'python3', 'train.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--use_extra_features={}'.format(FLAGS.use_extra_features), '--freeze=true', *tf_records) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def main(unused_argv): """Run the reinforcement learning loop.""" print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) utils.ensure_dir_exists(fsdb.models_dir()) utils.ensure_dir_exists(fsdb.selfplay_dir()) utils.ensure_dir_exists(fsdb.holdout_dir()) utils.ensure_dir_exists(fsdb.eval_dir()) utils.ensure_dir_exists(fsdb.golden_chunk_dir()) utils.ensure_dir_exists(fsdb.working_dir()) # Copy the target model to the models directory so we can find it easily. shutil.copy('ml_perf/target.pb', fsdb.models_dir()) logging.getLogger().addHandler( logging.FileHandler(os.path.join(FLAGS.base_dir, 'reinforcement.log'))) formatter = logging.Formatter('[%(asctime)s] %(message)s', '%Y-%m-%d %H:%M:%S') for handler in logging.getLogger().handlers: handler.setFormatter(formatter) with utils.logged_timer('Total time'): for target_win_rate in rl_loop(): if target_win_rate > 0.5: return logging.info('Passed exit criteria.') logging.info('Failed to converge.')
async def train(state, selfplay_processes): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ wait_for_training_examples(state, selfplay_processes, FLAGS.min_games_per_iteration) tf_records = await sample_training_examples(state) model_path = os.path.join(fsdb.models_dir(), state.train_model_name) await run( 'python3', 'train.py', '--gpu_device_list={}'.format(','.join(FLAGS.train_devices)), '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--use_extra_features={}'.format(FLAGS.use_extra_features), '--freeze=true', *tf_records) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f) if FLAGS.validate and state.iter_num > 1: try: await validate(state) except Exception as e: logging.error(e)
def load_train_times(): models = [] path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(path, 'r') as f: for line in f.readlines(): line = line.strip() if line: timestamp, name = line.split(' ') path = 'tf,' + os.path.join(fsdb.models_dir(), name + '.pb') models.append((float(timestamp), name, path)) return models
def post_train(state): model_path = os.path.join(fsdb.models_dir(), state.train_model_name) dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) mll.save_model(state.iter_num-1) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)
def main(unused_argv): """Run the reinforcement learning loop.""" logging.getLogger('mlperf_compliance').propagate = False ##-->multi-node setup if FLAGS.use_multinode: mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_size = mpi_comm.Get_size() print('[MPI Init] MPI rank {}, mpi size is {} host is {}'.format( mpi_rank, mpi_size, socket.gethostname())) else: mpi_comm = None mpi_rank = 0 mpi_size = 1 print('Wiping dir %s' % FLAGS.base_dir, flush=True) shutil.rmtree(FLAGS.base_dir, ignore_errors=True) dirs = [ fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir() ] ##-->sharedFS for dataExchange. tmp solution 5/6/2019 if FLAGS.use_multinode: ensure_dir_exists(FLAGS.shared_dir_exchange) for d in dirs: ensure_dir_exists(d) # Copy the flag files so there's no chance of them getting accidentally # overwritten while the RL loop is running. flags_dir = os.path.join(FLAGS.base_dir, 'flags') shutil.copytree(FLAGS.flags_dir, flags_dir) FLAGS.flags_dir = flags_dir # Copy the target model to the models directory so we can find it easily. shutil.copy(FLAGS.target_path, os.path.join(fsdb.models_dir(), 'target.pb')) shutil.copy(FLAGS.target_path + '.og', os.path.join(fsdb.models_dir(), 'target.pb.og')) with logged_timer('Total time from mpi_rank={}'.format(mpi_rank)): try: rl_loop(mpi_comm, mpi_rank, mpi_size) finally: asyncio.get_event_loop().close()
def get_files_exchange(state, mpi_rank): ##-->Train gets selfplay ##-->Self-play gets eval-model if mpi_rank == FLAGS.train_rank: selfplay_files = glob.glob( os.path.join(FLAGS.shared_dir_exchange, state.output_model_name + '-mpirank-*.zz*')) for filename in selfplay_files: print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, filename, state.iter_num)) shutil.copy(filename, fsdb.golden_chunk_dir()) else: ##self-play needs to get training eval model dst_dir = os.path.join(fsdb.models_dir()) src_file = os.path.join(FLAGS.shared_dir_exchange, state.train_model_name + '.pb') print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, src_file, state.iter_num)) shutil.copy(src_file, dst_dir) src_file = os.path.join(FLAGS.shared_dir_exchange, state.train_model_name + '.pb' + '.og') print('Rank = {}, Getting file={} iter={} from SharedFS'.format( mpi_rank, src_file, state.iter_num)) shutil.copy(src_file, dst_dir)
async def convert(state): """Freeze the trained model and convert to TRT. Args: state: the RL loop State instance. """ # set to use only second from last GPU new_env = os.environ.copy() new_env['CUDA_VISIBLE_DEVICES'] = str(FLAGS.num_gpus_train - 2) model_path = os.path.join(fsdb.models_dir(), state.train_model_name) if FLAGS.use_multinode: ##convert is running in parallel with eval, and val cores_per_worker = (FLAGS.cores_per_socket * FLAGS.num_socket) // FLAGS.num_gpus_selfplay start = (FLAGS.num_gpus_train - 2) * cores_per_worker end = start + cores_per_worker - 1 ##cpu-str cpus = str(start) + '-' + str(end) await run(new_env, 'taskset', '-c', cpus, 'python3', 'freeze_graph.py', '--model_path={}'.format(model_path), '--trt_batch={}'.format(FLAGS.trt_batch)) else: await run(new_env, 'python3', 'freeze_graph.py', '--model_path={}'.format(model_path), '--trt_batch={}'.format(FLAGS.trt_batch))
async def start_selfplay(): output_dir = os.path.join(fsdb.selfplay_dir(), "$MODEL") holdout_dir = os.path.join(fsdb.holdout_dir(), "$MODEL") model_pattern = os.path.join(fsdb.models_dir(), '%d.pb') logs = [] processes = [] loop = asyncio.get_event_loop() for i, device in enumerate(FLAGS.selfplay_devices): cmd = [ 'bazel-bin/cc/concurrent_selfplay', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'selfplay.flags')), '--run_forever=1', '--device={}'.format(device), '--model={}'.format(model_pattern), '--output_dir={}/{}'.format(output_dir, i), '--holdout_dir={}/{}'.format(holdout_dir, i)] cmd_str = await expand_cmd_str(cmd) f = open(os.path.join(FLAGS.base_dir, 'selfplay_%d.log' % i), 'w') f.write(cmd_str + '\n\n') f.flush() logging.info('Running: %s', cmd_str) processes.append(await asyncio.create_subprocess_exec( *cmd, stdout=f, stderr=asyncio.subprocess.STDOUT)) logs.append(f) return (processes, logs)
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError('Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) shutil.copy(path, fsdb.golden_chunk_dir()) # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def eval_pv(eval_positions): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) mcts = strategies.MCTSPlayer(player.network, resign_threshold=-1) for name, position in eval_positions: mcts.initialize_game(position) mcts.suggest_move(position) path = [] node = mcts.root while node.children: node = node.children.get(node.best_child()) path.append("{},{}".format(node.fmove, int(node.N))) save_file = os.path.join(FLAGS.data_dir, "pv-{}-{}".format(name, idx)) with open(save_file, "w") as data: data.write("{}, {}\n".format(idx, ",".join(path)))
def initialize_from_checkpoint(state, out_files_number): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob( os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir, ' 'got [{}]'.format(', '.join(model_paths))) start_model_path = model_paths[0] golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) out_path = os.path.join(fsdb.golden_chunk_dir(), basename) buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) example_num = buffer.parallel_fill(tf.gfile.Glob(path), FLAGS.physical_cores) buffer.flush_new(out_path, example_num, out_files_number, 1) # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir() + '/*.zz*', FLAGS.eval_min_max_every_epoch) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
def same_run_eval(black_num=0, white_num=0): """Shorthand to spawn a job matching up two models from the same run, identified by their model number """ if black_num <= 0 or white_num <= 0: print("Need real model numbers") return b = fsdb.get_model(black_num) w = fsdb.get_model(white_num) b_model_path = os.path.join(fsdb.models_dir(), b) w_model_path = os.path.join(fsdb.models_dir(), w) return launch_eval_job(b_model_path + ".pb", w_model_path + ".pb", "{:d}-{:d}".format(black_num, white_num), flags.FLAGS.bucket_name)
def main(unused_argv): logging.getLogger('mlperf_compliance').propagate = False sgf_dir = os.path.join(fsdb.eval_dir(), 'target') target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb.og') models = load_train_times() timestamp_to_log = 0 iter_evaluated = 0 for i, (timestamp, name, path) in enumerate(models): minigo_print(key=constants.EVAL_START, metadata={'epoch_num': i + 1}) iter_evaluated += 1 winrate = wait(evaluate_model(path + '.og', target, sgf_dir, i + 1)) minigo_print(key=constants.EVAL_ACCURACY, value=winrate, metadata={'epoch_num': i + 1}) minigo_print(key=constants.EVAL_STOP, metadata={'epoch_num': i + 1}) if winrate >= 0.50: timestamp_to_log = timestamp print('Model {} beat target after {}s'.format(name, timestamp)) break minigo_print(key='eval_result', metadata={ 'iteration': iter_evaluated, 'timestamp': timestamp_to_log })
def selfplay(state): output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) model_path = os.path.join(fsdb.models_dir(), state.best_model_name) result = checked_run([ 'bazel-bin/cc/selfplay', '--parallel_games=2048', '--num_readouts=100', '--model={}.pb'.format(model_path), '--output_dir={}'.format(output_dir), '--holdout_dir={}'.format(holdout_dir) ] + cc_flags(state), 'selfplay') logging.info(get_lines(result, make_slice[-2:])) # Write examples to a single record. pattern = os.path.join(output_dir, '*', '*.zz') random.seed(state.seed) tf.set_random_seed(state.seed) np.random.seed(state.seed) # TODO(tommadams): This method of generating one golden chunk per generation # is sub-optimal because each chunk gets reused multiple times for training, # introducing bias. Instead, a fresh dataset should be uniformly sampled out # of *all* games in the training window before the start of each training run. buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not # so. logging.info('Writing golden chunk from "{}"'.format(pattern)) buffer.parallel_fill(tf.gfile.Glob(pattern)) buffer.flush( os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz'))
def best_model_path(self): if self.best_model_name is None: # We don't have a good model yet, use a random fake model implementation. return 'random:0,0.4:0.4' else: return '{},{}.pb'.format( FLAGS.engine, os.path.join(fsdb.models_dir(), self.best_model_name))
def bootstrap(unused_argv): bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) mask_flags.checked_run([ 'python', 'bootstrap.py', '--export_path={}'.format(bootstrap_model_path), '--flagfile=rl_loop/distributed_flags' ])
def main(unused_argv): sgf_dir = os.path.join(fsdb.eval_dir(), 'target') target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') models = load_train_times() for i, (timestamp, name, path) in enumerate(models): winrate = wait(evaluate_model(path, name, target, sgf_dir)) if winrate >= 0.50: break
def train(state, tf_records): model_path = os.path.join(fsdb.models_dir(), state.train_model_name) checked_run( 'training', 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true')
def get_mg_path(model_run, model_num): """ model_run = integer, e.g. 15, 16, corresponding to the v-number model_num = integer, e.g 939, for the model number in that run """ fsdb.switch_base("minigo-pub/v{:d}-19x19".format(model_run)) model = fsdb.get_model(model_num) return os.path.join(fsdb.models_dir(), model)
def main(unused_argv): sgf_dir = os.path.join(fsdb.eval_dir(), 'target') target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') models = load_train_times() for i, (timestamp, name, path) in enumerate(models): winrate = wait(evaluate_model(path, target, sgf_dir, i + 1)) if winrate >= 0.50: print('Model {} beat target after {}s'.format(name, timestamp)) break
def evaluate(state): eval_model = state.train_model_name best_model = state.best_model_name eval_model_path = os.path.join(fsdb.models_dir(), eval_model) best_model_path = os.path.join(fsdb.models_dir(), best_model) sgf_dir = os.path.join(fsdb.eval_dir(), eval_model) result = checked_run( 'evaluation', 'bazel-bin/cc/eval', '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'eval.flags')), '--model={}.pb'.format(eval_model_path), '--model_two={}.pb'.format(best_model_path), '--sgf_dir={}'.format(sgf_dir), '--seed={}'.format(state.seed)) result = get_lines(result, make_slice[-7:]) logging.info(result) pattern = '{}\s+\d+\s+(\d+\.\d+)%'.format(eval_model) win_rate = float(re.search(pattern, result).group(1)) * 0.01 logging.info('Win rate %s vs %s: %.3f', eval_model, best_model, win_rate) return win_rate
def evaluate(state, against_model): eval_model = state.train_model_name eval_model_path = os.path.join(fsdb.models_dir(), eval_model) against_model_path = os.path.join(fsdb.models_dir(), against_model) sgf_dir = os.path.join(fsdb.eval_dir(), eval_model) result = checked_run([ 'bazel-bin/cc/eval', '--num_readouts=100', '--parallel_games=100', '--model={}.pb'.format(eval_model_path), '--model_two={}.pb'.format(against_model_path), '--sgf_dir={}'.format(sgf_dir) ] + cc_flags(state), 'evaluation against ' + against_model) result = get_lines(result, make_slice[-7:]) logging.info(result) pattern = '{}\s+\d+\s+(\d+\.\d+)%'.format(eval_model) win_rate = float(re.search(pattern, result).group(1)) * 0.01 logging.info('Win rate %s vs %s: %.3f', eval_model, against_model, win_rate) return win_rate
def main(unusedargv): sgf_files = oneoff_utils.find_and_filter_sgf_files(FLAGS.sgf_dir, FLAGS.min_year, FLAGS.komi) pos_data, move_data, result_data, move_idxs = sample_positions_from_games( sgf_files=sgf_files, num_positions=FLAGS.num_positions) df = get_training_curve_data(fsdb.models_dir(), pos_data, move_data, result_data, FLAGS.idx_start, FLAGS.eval_every) save_plots(FLAGS.plot_dir, df)
def main(unusedargv): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) # List vars constructed when using dual_net. dual_net_list(model_paths[0]) # Calculate l2 cost over a sequence of our models. df = get_l2_cost_data(model_paths, FLAGS.idx_start, FLAGS.eval_every) print(df) save_plots(FLAGS.plot_dir, df)
def initialize_from_checkpoint(state): """Initialize the reinforcement learning loop from a checkpoint.""" # The checkpoint's work_dir should contain the most recently trained model. model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) print(os.path.join(FLAGS.checkpoint_dir, 'work_dir/model.ckpt-*.pb')) print(os.getcwd()) if len(model_paths) != 1: raise RuntimeError( 'Expected exactly one model in the checkpoint work_dir' '({}), got [{}]'.format( os.path.join(FLAGS.checkpoint_dir, 'work_dir'), ', '.join(model_paths))) start_model_path = model_paths[0] # Copy the latest trained model into the models directory and use it on the # first round of selfplay. state.best_model_name = 'checkpoint' shutil.copy(start_model_path, os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) start_model_files = glob.glob(os.path.join( FLAGS.checkpoint_dir, 'work_dir/model.ckpt-9383_raw.ckpt*')) for file_name in start_model_files: shutil.copy(file_name, os.path.join(fsdb.models_dir(), state.best_model_name + os.path.basename(file_name)[len("model.ckpt-9383"):])) # Copy the training chunks. golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, "..", 'golden_chunks') for basename in os.listdir(golden_chunks_dir): path = os.path.join(golden_chunks_dir, basename) shutil.copy(path, fsdb.golden_chunk_dir()) # Copy the training files. work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') for basename in os.listdir(work_dir): path = os.path.join(work_dir, basename) shutil.copy(path, fsdb.working_dir())
async def train(state, tf_records): """Run training and write a new model to the fsdb models_dir. Args: state: the RL loop State instance. tf_records: a list of paths to TensorFlow records to train on. """ new_env = os.environ.copy() model_path = os.path.join(fsdb.models_dir(), state.train_model_name) if FLAGS.use_mgpu_horovod: # assign leading cores of sockets to train await run( new_env, 'mpiexec', '--allow-run-as-root', '--map-by', 'ppr:{}:socket,pe=2'.format( str(FLAGS.num_gpus_train // FLAGS.num_socket)), '-np', str(FLAGS.num_gpus_train), 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format( state.seed), '--use_mgpu_horovod=true', '--freeze=true') else: new_env['CUDA_VISIBLE_DEVICES'] = '0' await run( new_env, 'python3', 'train.py', *tf_records, '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), '--work_dir={}'.format(fsdb.working_dir()), '--export_path={}'.format(model_path), '--training_seed={}'.format(state.seed), '--freeze=true') minigo_print(key='save_model', value={'iteration': state.iter_num}) # Append the time elapsed from when the RL was started to when this model # was trained. elapsed = time.time() - state.start_time timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt') with gfile.Open(timestamps_path, 'a') as f: print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)