def test_inference(self): with tempfile.TemporaryDirectory() as working_dir, \ tempfile.TemporaryDirectory() as export_dir: dualnet.bootstrap(working_dir, model_params.DummyMiniGoParams()) exported_model = os.path.join(export_dir, 'bootstrap-model') dualnet.export_model(working_dir, exported_model) n1 = dualnet.DualNetRunner(exported_model, model_params.DummyMiniGoParams()) n1.run(go.Position(utils_test.BOARD_SIZE)) n2 = dualnet.DualNetRunner(exported_model, model_params.DummyMiniGoParams()) n2.run(go.Position(utils_test.BOARD_SIZE))
def test_train(self): with tempfile.TemporaryDirectory() as working_dir, \ tempfile.NamedTemporaryFile() as tf_record: preprocessing.make_dataset_from_sgf( utils_test.BOARD_SIZE, 'example_game.sgf', tf_record.name) dualnet.train( working_dir, [tf_record.name], 1, model_params.DummyMiniGoParams())
def extract_data(self, tf_record, filter_amount=1): pos_tensor, label_tensors = preprocessing.get_input_tensors( model_params.DummyMiniGoParams(), 1, [tf_record], num_repeats=1, shuffle_records=False, shuffle_examples=False, filter_amount=filter_amount) recovered_data = [] with tf.Session() as sess: while True: try: pos_value, label_values = sess.run( [pos_tensor, label_tensors]) recovered_data.append( (pos_value, label_values['pi_tensor'], label_values['value_tensor'])) except tf.errors.OutOfRangeError: break return recovered_data
def main(_): """Run the reinforcement learning loop.""" tf.logging.set_verbosity(tf.logging.INFO) params = _set_params(FLAGS) # A dummy model for debug/testing purpose with fewer games and iterations if FLAGS.test: params = model_params.DummyMiniGoParams() base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/' else: # Set directories for models and datasets base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/' dirs = utils.MiniGoDirectory(base_dir) # Run selfplay only if user specifies the argument. if FLAGS.selfplay: selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model( dirs.trained_models_dir)[1] max_games = FLAGS.selfplay_max_games or params.max_games_per_generation run_selfplay(selfplay_model_name, max_games, dirs, params) return # Run the RL pipeline # if no models have been trained, start from bootstrap model if not os.path.isdir(dirs.trained_models_dir): print('No trained model exists! Starting from Bootstrap...') print('Creating random initial weights...') bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params) else: print('A MiniGo base directory has been found! ') print('Start from the last checkpoint...') _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir) for rl_iter in range(params.max_iters_per_pipeline): print('RL_iteration: {}'.format(rl_iter)) # Self-play with the best model to generate training data run_selfplay( best_model_so_far, params.max_games_per_generation, dirs, params) # gather selfplay data for training print('Gathering game output...') gather(dirs.selfplay_dir, dirs.training_chunk_dir, params) # train the next generation model model_num, _ = utils.get_latest_model(dirs.trained_models_dir) print('Training on gathered game data...') train(dirs.trained_models_dir, dirs.estimator_model_dir, dirs.training_chunk_dir, model_num + 1, params) # validate the latest model if needed if FLAGS.validation: print('Validating on the holdout game data...') validate(dirs.trained_models_dir, dirs.holdout_dir, dirs.estimator_model_dir, params) _, current_model = utils.get_latest_model(dirs.trained_models_dir) if FLAGS.evaluation: # Perform evaluation if needed print('Evaluate models between {} and {}'.format( best_model_so_far, current_model)) black_model = os.path.join(dirs.trained_models_dir, best_model_so_far) white_model = os.path.join(dirs.trained_models_dir, current_model) _ensure_dir_exists(dirs.evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) best_model_so_far = evaluate( best_model_so_far, black_net, current_model, white_net, dirs.evaluate_dir, params) print('Winner of evaluation: {}!'.format(best_model_so_far)) else: best_model_so_far = current_model
def main(_): """Run the reinforcement learning loop.""" tf.logging.set_verbosity(tf.logging.INFO) params = _set_params_from_board_size(FLAGS.board_size) # A dummy model for debug/testing purpose with fewer games and iterations if FLAGS.debug: params = model_params.DummyMiniGoParams() # Set directories for models and datasets base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_board_size/' dirs = utils.MiniGoDirectory(base_dir) # if no models have been trained, start from bootstrap model if os.path.isdir(base_dir) is False: print('No trained model exists! Starting from Bootstrap...') print('Creating random initial weights...') bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params) else: print('A MiniGo base directory has been found! ') print('Start from the last checkpoint...') _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir) for rl_iter in range(params.max_iters_per_pipeline): print('RL_iteration: {}'.format(rl_iter)) # Self-play to generate at least params.max_games_per_generation games selfplay(best_model_so_far, dirs.trained_models_dir, dirs.selfplay_dir, dirs.holdout_dir, dirs.sgf_dir, params) games = tf.gfile.Glob( os.path.join(dirs.selfplay_dir, best_model_so_far, '*.zz')) while len(games) < params.max_games_per_generation: selfplay(best_model_so_far, dirs.trained_models_dir, dirs.selfplay_dir, dirs.holdout_dir, dirs.sgf_dir, params) if FLAGS.validation: params = model_params.DummyValidationParams() selfplay(best_model_so_far, dirs.trained_models_dir, dirs.selfplay_dir, dirs.holdout_dir, dirs.sgf_dir, params) games = tf.gfile.Glob( os.path.join(dirs.selfplay_dir, best_model_so_far, '*.zz')) print('Gathering game output...') gather(dirs.selfplay_dir, dirs.training_chunk_dir, params) print('Training on gathered game data...') train(dirs.trained_models_dir, dirs.estimator_model_dir, dirs.training_chunk_dir, params) if FLAGS.validation: print('Validating on the holdout game data...') validate(dirs.trained_models_dir, dirs.holdout_dir, dirs.estimator_model_dir, params) _, current_model = utils.get_latest_model(dirs.trained_models_dir) if FLAGS.evaluation: # Perform evaluation if needed print('Evaluating the latest model...') best_model_so_far = evaluate(dirs.trained_models_dir, best_model_so_far, current_model, dirs.evaluate_dir, params) print('Winner: {}!'.format(best_model_so_far)) else: best_model_so_far = current_model