def test_inference(self): with tempfile.TemporaryDirectory() as working_dir, \ tempfile.TemporaryDirectory() as export_dir: dualnet.bootstrap(working_dir, model_params.DummyMiniGoParams()) exported_model = os.path.join(export_dir, 'bootstrap-model') dualnet.export_model(working_dir, exported_model) n1 = dualnet.DualNetRunner(exported_model, model_params.DummyMiniGoParams()) n1.run(go.Position(utils_test.BOARD_SIZE)) n2 = dualnet.DualNetRunner(exported_model, model_params.DummyMiniGoParams()) n2.run(go.Position(utils_test.BOARD_SIZE))
def evaluate(trained_models_dir, black_model_name, white_model_name, evaluate_dir, params): """Evaluate with two models. With the model name, construct two DualNetRunners to play as black and white in a Go match. Two models play several names, and the model that wins by a margin of 55% will be the winner. Args: trained_models_dir: Directories where the completed generations/models are. black_model_name: The name of the model playing black. white_model_name: The name of the model playing white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'' params: An object of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ black_model = os.path.join(trained_models_dir, black_model_name) white_model = os.path.join(trained_models_dir, white_model_name) print('Evaluate models between {} and {}'.format(black_model_name, white_model_name)) _ensure_dir_exists(evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match(params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def _prepare_selfplay( model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params): """Set directories and load the network for selfplay. Args: model_name: The name of the model for self-play trained_models_dir: Directories where the completed generations/models are. selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'. holdout_dir: Where to write the holdout data. Set as 'base_dir/data/holdout/'. sgf_dir: Where to write the sgf (Smart Game Format) files. Set as 'base_dir/sgf/'. params: A MiniGoParams instance of hyperparameters for the model. Returns: The directories and network model for selfplay. """ # Set paths for the model with 'model_name' model_path = os.path.join(trained_models_dir, model_name) output_dir = os.path.join(selfplay_dir, model_name) holdout_dir = os.path.join(holdout_dir, model_name) # clean_sgf is to write sgf file without comments. # full_sgf is to write sgf file with comments. clean_sgf = os.path.join(sgf_dir, model_name, 'clean') full_sgf = os.path.join(sgf_dir, model_name, 'full') _ensure_dir_exists(output_dir) _ensure_dir_exists(holdout_dir) _ensure_dir_exists(clean_sgf) _ensure_dir_exists(full_sgf) selfplay_dirs = { 'output_dir': output_dir, 'holdout_dir': holdout_dir, 'clean_sgf': clean_sgf, 'full_sgf': full_sgf } # cache the network model for self-play with utils.logged_timer('Loading weights from {} ... '.format(model_path)): network = dualnet.DualNetRunner(model_path, params) return selfplay_dirs, network
def main(_): """Run the reinforcement learning loop.""" tf.logging.set_verbosity(tf.logging.INFO) params = _set_params(FLAGS) # A dummy model for debug/testing purpose with fewer games and iterations if FLAGS.test: params = model_params.DummyMiniGoParams() base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/' else: # Set directories for models and datasets base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/' dirs = utils.MiniGoDirectory(base_dir) # Run selfplay only if user specifies the argument. if FLAGS.selfplay: selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model( dirs.trained_models_dir)[1] max_games = FLAGS.selfplay_max_games or params.max_games_per_generation run_selfplay(selfplay_model_name, max_games, dirs, params) return # Run the RL pipeline # if no models have been trained, start from bootstrap model if not os.path.isdir(dirs.trained_models_dir): print('No trained model exists! Starting from Bootstrap...') print('Creating random initial weights...') bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params) else: print('A MiniGo base directory has been found! ') print('Start from the last checkpoint...') _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir) for rl_iter in range(params.max_iters_per_pipeline): print('RL_iteration: {}'.format(rl_iter)) # Self-play with the best model to generate training data run_selfplay( best_model_so_far, params.max_games_per_generation, dirs, params) # gather selfplay data for training print('Gathering game output...') gather(dirs.selfplay_dir, dirs.training_chunk_dir, params) # train the next generation model model_num, _ = utils.get_latest_model(dirs.trained_models_dir) print('Training on gathered game data...') train(dirs.trained_models_dir, dirs.estimator_model_dir, dirs.training_chunk_dir, model_num + 1, params) # validate the latest model if needed if FLAGS.validation: print('Validating on the holdout game data...') validate(dirs.trained_models_dir, dirs.holdout_dir, dirs.estimator_model_dir, params) _, current_model = utils.get_latest_model(dirs.trained_models_dir) if FLAGS.evaluation: # Perform evaluation if needed print('Evaluate models between {} and {}'.format( best_model_so_far, current_model)) black_model = os.path.join(dirs.trained_models_dir, best_model_so_far) white_model = os.path.join(dirs.trained_models_dir, current_model) _ensure_dir_exists(dirs.evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) best_model_so_far = evaluate( best_model_so_far, black_net, current_model, white_net, dirs.evaluate_dir, params) print('Winner of evaluation: {}!'.format(best_model_so_far)) else: best_model_so_far = current_model
def selfplay(model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params): """Perform selfplay with a specific model. Args: model_name: The name of the model used for selfplay. trained_models_dir: The path to the model files. selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'. holdout_dir: Where to write the holdout data. Set as 'base_dir/data/holdout/'. sgf_dir: Where to write the sgf (Smart Game Format) files. Set as 'base_dir/sgf/'. params: An object of hyperparameters for the model. """ print('Playing a game with model {}'.format(model_name)) # Set paths for the model with 'model_name' model_path = os.path.join(trained_models_dir, model_name) output_dir = os.path.join(selfplay_dir, model_name) holdout_dir = os.path.join(holdout_dir, model_name) # clean_sgf is to write sgf file without comments. # full_sgf is to write sgf file with comments. clean_sgf = os.path.join(sgf_dir, model_name, 'clean') full_sgf = os.path.join(sgf_dir, model_name, 'full') _ensure_dir_exists(output_dir) _ensure_dir_exists(holdout_dir) _ensure_dir_exists(clean_sgf) _ensure_dir_exists(full_sgf) with utils.logged_timer('Loading weights from {} ... '.format(model_path)): network = dualnet.DualNetRunner(model_path, params) with utils.logged_timer('Playing game'): player = selfplay_mcts.play(params.board_size, network, params.selfplay_readouts, params.selfplay_resign_threshold, params.simultaneous_leaves, params.selfplay_verbose) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) def _write_sgf_data(dir_sgf, use_comments): with tf.gfile.GFile( os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=use_comments)) _write_sgf_data(clean_sgf, use_comments=False) _write_sgf_data(full_sgf, use_comments=True) game_data = player.extract_data() tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params) # Hold out 5% of games for evaluation. if random.random() < params.holdout_pct: fname = os.path.join(holdout_dir, ('{}' + _TF_RECORD_SUFFIX).format(output_name)) else: fname = os.path.join(output_dir, ('{}' + _TF_RECORD_SUFFIX).format(output_name)) preprocessing.write_tf_examples(fname, tf_examples)