def main(argv): global counter print(argv, '\n', '=' * 20) ''' if args.query: t2t_query.main(argv) exit() ''' if train_not_test: while counter < limit or args.no_limit: tf.flags.FLAGS.set_default('train_steps', counter + args.increment) tf.flags.FLAGS.train_steps = counter + args.increment print('flag:', tf.flags.FLAGS.get_flag_value('train_steps', 5), str(counter + args.increment)) t2t_trainer.main(argv) counter += args.increment print('=' * 50, counter, limit, '=' * 50) else: t2t_decoder.main(argv) pass
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch, use_autoencoder=False): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * (epoch + 2) with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "eval_steps": 100, "train_steps": train_steps, # Hack: If training on autoencoded frames, autoencoder_path needs to be # set so that the problem reports the right sizes for frames. "autoencoder_path": "dummy" if use_autoencoder else None, }): t2t_trainer.main([])
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch, use_autoencoder=False): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * (epoch + 2) with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "eval_steps": 100, "train_steps": train_steps, "autoencoder_path": "dummy" if use_autoencoder else None, }): t2t_trainer.main([])
def train(hparams, output_dir): prefix = output_dir #remove trash # prefix = "~/trash/loop_{}".format(random.randint(10000, 99999)) data_dir = os.path.expanduser(prefix + "/data") tmp_dir = os.path.expanduser(prefix + "/tmp") output_dir = os.path.expanduser(prefix + "/output") tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) tf.gfile.MakeDirs(output_dir) last_model = "" start_time = time.time() line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> " for iloop in range(hparams.epochs): time_delta = time.time() - start_time print(line+"Step {}.1. - generate data from policy. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) FLAGS.problems = "gym_discrete_problem" FLAGS.agent_policy_path = last_model gym_problem = problems.problem(FLAGS.problems) gym_problem.num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, str(iloop)) tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) time_delta = time.time() - start_time print(line+"Step {}.2. - generate env model. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) # 2. generate env model FLAGS.data_dir = iter_data_dir FLAGS.output_dir = output_dir FLAGS.model = hparams.generative_model FLAGS.hparams_set = hparams.generative_model_params FLAGS.train_steps = hparams.model_train_steps FLAGS.eval_steps = 1 t2t_trainer.main([]) time_delta = time.time() - start_time print(line+"Step {}.3. - evalue env model. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) gym_simulated_problem = problems.problem("gym_simulated_discrete_problem") gym_simulated_problem.num_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.generate_data(iter_data_dir, tmp_dir) # time_delta = time.time() - start_time print(line+"Step {}.4. - train PPO in model env." " Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) ppo_epochs_num=hparams.ppo_epochs_num ppo_hparams = trainer_lib.create_hparams("atari_base", "epochs_num={},simulated_environment=True,eval_every_epochs=0,save_models_every_epochs={}".format(ppo_epochs_num+1, ppo_epochs_num), data_dir=output_dir) ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_dir = tempfile.mkdtemp(dir=data_dir, prefix="ppo_") in_graph_wrappers = [(TimeLimitWrapper, {"timelimit": 150}), (PongT2TGeneratorHackWrapper, {"add_value": -2})] + gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) rl_trainer_lib.train(ppo_hparams, "PongNoFrameskip-v4", ppo_dir) last_model = ppo_dir + "/model{}.ckpt".format(ppo_epochs_num)
def testTrain(self): FLAGS.problem = "tiny_algo" FLAGS.model = "transformer" FLAGS.hparams_set = "transformer_tiny" FLAGS.train_steps = 1 FLAGS.eval_steps = 1 FLAGS.output_dir = tf.test.get_temp_dir() FLAGS.data_dir = tf.test.get_temp_dir() t2t_trainer.main(None)
def testTrain(self): FLAGS.problem = "tiny_algo" FLAGS.model = "transformer" FLAGS.hparams_set = "transformer_tiny" FLAGS.train_steps = 1 FLAGS.eval_steps = 1 FLAGS.output_dir = tf.test.get_temp_dir() FLAGS.data_dir = tf.test.get_temp_dir() t2t_trainer.main(None)
def train(generate_data=True): FLAGS.problem = "fix_grammar_mistakes" FLAGS.model = "transformer" FLAGS.generate_data = True FLAGS.hparams_set = "transformer_base_small_gpu" FLAGS.t2t_usr_dir = "src" FLAGS.output_dir = "t2t_output" FLAGS.data_dir = "t2t_data" t2t_trainer.main(None)
def train(generate_data=True): FLAGS.problem = "english_grammar_error" FLAGS.model = "transformer" FLAGS.generate_data = True FLAGS.hparams_set = "transformer_big_single_gpu" FLAGS.t2t_usr_dir = "src" FLAGS.output_dir = "finetune" FLAGS.data_dir = "t2t_finetune" FLAGS.train_steps = 350000 t2t_trainer.main(None)
def test_e2e_export_and_query(self): """Test that we can export and query the model via tf.serving.""" FLAGS.t2t_usr_dir = _get_t2t_usr_dir() FLAGS.problem = "github_function_docstring" FLAGS.data_dir = "/mnt/nfs-east1-d/data" FLAGS.tmp_dir = "/mnt/nfs-east1-d/tmp" FLAGS.output_dir = tempfile.mkdtemp() #FLAGS.export_dir = os.path.join(FLAGS.output_dir, "export") FLAGS.model = "similarity_transformer_dev" FLAGS.hparams_set = "similarity_transformer_tiny" FLAGS.train_steps = 1 FLAGS.schedule = "train" timeout_secs = 10 usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.main(None) export.main(None) # ---- # Start model server # Will start a tf model server on an un-used port and # kill process on exit. _, server, _ = TensorflowModelServer().RunServer( FLAGS.model, FLAGS.output_dir) # ---- # Query the server return doc_query = [1, 2, 3] # Dummy encoded doc query code_query = [1, 2, 3] # Dummy encoded code query # Alternatively for query, without going through query.main() # TODO: Is servable_name the same as model name? request_fn = serving_utils.make_grpc_request_fn( servable_name=FLAGS.model, server=server, timeout_secs=timeout_secs) # Compute embeddings # TODO: May need to customize how these queries are fed in, potentially # side-stepping serving_utils.predict. encoded_string = serving_utils.predict([doc_query], problem_object, request_fn) encoded_code = serving_utils.predict([code_query], problem_object, request_fn)
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch): """Train autoencoder on problem_name.""" train_steps = hparams.autoencoder_train_steps * (epoch + 2) with temporary_flags({ "problem": problem_name, "data_dir": data_dir, "output_dir": output_dir, "model": "autoencoder_ordered_discrete", "hparams_set": "autoencoder_discrete_pong", "train_steps": train_steps, "eval_steps": 100, }): t2t_trainer.main([])
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch): """Train autoencoder on problem_name.""" train_steps = hparams.autoencoder_train_steps * (epoch + 2) with temporary_flags({ "problem": problem_name, "data_dir": data_dir, "output_dir": output_dir, "model": "autoencoder_ordered_discrete", "hparams_set": "autoencoder_discrete_pong", "train_steps": train_steps, "eval_steps": 100, }): t2t_trainer.main([])
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * (epoch + 2) with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "eval_steps": 100, "train_steps": train_steps, }): t2t_trainer.main([])
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch): """Train autoencoder on problem_name.""" additional_steps = 1 + hparams.autoencoder_train_steps_initial_multiplier train_steps = hparams.autoencoder_train_steps * (epoch + additional_steps) with temporary_flags({ "problem": problem_name, "data_dir": data_dir, "output_dir": output_dir, "model": "autoencoder_ordered_discrete", "hparams_set": hparams.autoencoder_hparams_set, "train_steps": train_steps, "eval_steps": 100, }): t2t_trainer.main([])
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch, use_autoencoder=False): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * (epoch + 2) with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "eval_steps": 100, "train_steps": train_steps, # Hack: If training on autoencoded frames, autoencoder_path needs to be # set so that the problem reports the right sizes for frames. "autoencoder_path": "dummy" if use_autoencoder else None, }): t2t_trainer.main([])
def test_trains(self): """Test that we can export and query the model via tf.serving.""" FLAGS.t2t_usr_dir = _get_t2t_usr_dir() FLAGS.problem = "github_function_docstring" FLAGS.data_dir = "/mnt/nfs-east1-d/data" FLAGS.tmp_dir = "/mnt/nfs-east1-d/tmp" FLAGS.output_dir = tempfile.mkdtemp() FLAGS.model = "similarity_transformer_dev" FLAGS.hparams_set = "similarity_transformer_tiny" FLAGS.train_steps = 1000 FLAGS.schedule = "train" FLAGS.hparams = "'loss_variant=slicenet'" usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.main(None)
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * ( epoch + hparams.inital_epoch_train_steps_multiplier) model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) learning_rate = model_hparams.learning_rate_constant if epoch > 0: learning_rate *= hparams.learning_rate_bump with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "hparams": "learning_rate_constant=%.6f" % learning_rate, "eval_steps": 100, "local_eval_frequency": 2000, "train_steps": train_steps, }): t2t_trainer.main([])
def test_train_and_export(self): # pylint: disable=no-self-use """Test that we can train and export the model.""" test_data_dir = os.path.join(os.path.dirname(__file__), "test_data") # If we set t2t_usr_dir t2t_train.main will end up importing that # directory which causes an error because the model ends up being registered # twice. FLAGS.problem = "kf_github_function_docstring" FLAGS.data_dir = tempfile.mkdtemp() FLAGS.tmp_dir = tempfile.mkdtemp() logging.info("Using data_dir %s", FLAGS.data_dir) logging.info("Using tmp_dir %s", FLAGS.tmp_dir) FLAGS.output_dir = tempfile.mkdtemp() logging.info("Using output_dir %s", FLAGS.output_dir) FLAGS.model = similarity_transformer.MODEL_NAME FLAGS.hparams_set = "transformer_tiny" FLAGS.train_steps = 1 FLAGS.eval_steps = 5 # We want to trigger eval. FLAGS.local_eval_frequency = 1 FLAGS.schedule = "continuous_train_and_eval" problem = registry.problem(FLAGS.problem) # Override the data path prefix and number of shards so we use # the test data rather than downloading from GCS. problem.DATA_PATH_PREFIX = os.path.join(test_data_dir, "raw_data") problem.NUM_SHARDS = 1 # Generating the data can be slow because it uses an iterative process # to compute the vocab. # During development you can reuse data_dir between runs; if the vocab # and processed input files already exists in that directory it won't # need to regenerate them. problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) t2t_trainer.main(None) export.main(None)
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch): """Train the world model on problem_name.""" train_steps = hparams.model_train_steps * (epoch + 2) model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) learning_rate = model_hparams.learning_rate_constant # Bump learning rate after first epoch by 3x. # We picked 3x because our default learning rate schedule decreases with # 1/square root of the time step; 1/sqrt(10k) = 0.01 and 1/sqrt(100k) ~ 0.0032 # so by bumping it up 3x we about "go back" from 100k steps to 10k, which is # approximately as much as "going back 1 epoch" would be in default schedule. # In your experiments, you may want to optimize this rate to your schedule. if epoch > 0: learning_rate *= 3 with temporary_flags({ "data_dir": data_dir, "output_dir": output_dir, "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "hparams": "learning_rate_constant=%.6f" % learning_rate, "eval_steps": 100, "train_steps": train_steps, }): t2t_trainer.main([])
def main(argv): argv = common_flags.update_argv(argv) return t2t_trainer.main(argv)
def train(hparams, output_dir): """Training function.""" prefix = output_dir data_dir = os.path.expanduser(prefix + "/data") tmp_dir = os.path.expanduser(prefix + "/tmp") output_dir = os.path.expanduser(prefix + "/output") tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) tf.gfile.MakeDirs(output_dir) last_model = "" start_time = time.time() line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> " epoch_metrics = [] for iloop in range(hparams.epochs): # Generate random frames. if iloop == 0: time_delta = time.time() - start_time tf.logging.info("%s Step %d.0 - generate random data. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = "" gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, "0random") tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) mean_reward = gym_problem.sum_of_rewards / max(1.0, gym_problem.dones) tf.logging.info("%s Step 0.0 random reward: %.4f" % (line, mean_reward)) time_delta = time.time() - start_time tf.logging.info("%s Step %d.1 - generate env model. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) # Train env model FLAGS.data_dir = iter_data_dir FLAGS.output_dir = output_dir FLAGS.model = hparams.generative_model FLAGS.hparams_set = hparams.generative_model_params FLAGS.train_steps = hparams.model_train_steps * (iloop + 2) FLAGS.eval_steps = 10 t2t_trainer.main([]) # Evaluate and dump frames from env model time_delta = time.time() - start_time tf.logging.info("%s Step %d.1a - evaluate env model. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) gym_simulated_problem = registry.problem( "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game) sim_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.settable_num_steps = sim_steps gym_simulated_problem.real_env_problem = gym_problem gym_simulated_problem.generate_data(iter_data_dir, tmp_dir) model_reward_accuracy = 0.0 if gym_simulated_problem.dones != 0: n = float(gym_simulated_problem.dones) model_reward_accuracy = ( gym_simulated_problem.successful_episode_reward_predictions / n) # Train PPO agent time_delta = time.time() - start_time tf.logging.info("%s Step %d.2 - train PPO in model env. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) # Setup PPO hparams ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params, data_dir=output_dir) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.epochs_num = ppo_epochs_num ppo_hparams.simulated_environment = True ppo_hparams.eval_every_epochs = 0 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_hparams.num_agents = hparams.ppo_num_agents ppo_hparams.problem = gym_problem in_graph_wrappers = [ (TimeLimitWrapper, {"timelimit": hparams.ppo_time_limit}), (MaxAndSkipWrapper, {"skip": 4})] in_graph_wrappers += gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) ppo_dir = generator_utils.make_tmp_dir(dir=data_dir, prefix="ppo_") rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name, ppo_dir) last_model = ppo_dir # Generate environment frames. time_delta = time.time() - start_time tf.logging.info("%s Step %d.3 - generate environment data. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = last_model gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, str(iloop)) tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) mean_reward = 0.0 if gym_problem.dones != 0: mean_reward = gym_problem.sum_of_rewards / float(gym_problem.dones) tf.logging.info("%s Step %d mean reward: %.4f" % (line, iloop, mean_reward)) # Report metrics. eval_metrics = {"model_reward_accuracy": model_reward_accuracy, "mean_reward": mean_reward} epoch_metrics.append(eval_metrics) # Report the evaluation metrics from the final epoch return epoch_metrics[-1]
from tensor2tensor.utils import registry from tensor2tensor import problems from tensor2tensor.bin import t2t_trainer import sys # print(problems.available()) #Show all problems t2t_trainer.main(sys.argv)
def main(argv): t2t_trainer.main(argv)
def train(hparams, output_dir): """Training function.""" prefix = output_dir data_dir = os.path.expanduser(prefix + "/data") tmp_dir = os.path.expanduser(prefix + "/tmp") output_dir = os.path.expanduser(prefix + "/output") tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) tf.gfile.MakeDirs(output_dir) last_model = "" start_time = time.time() line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> " for iloop in range(hparams.epochs): time_delta = time.time() - start_time print(line + "Step {}.1. - generate data from policy. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = last_model gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, str(iloop)) tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) time_delta = time.time() - start_time print(line + "Step {}.2. - generate env model. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) # 2. generate env model FLAGS.data_dir = iter_data_dir FLAGS.output_dir = output_dir FLAGS.model = hparams.generative_model FLAGS.hparams_set = hparams.generative_model_params FLAGS.train_steps = hparams.model_train_steps * (iloop + 2) FLAGS.eval_steps = 10 t2t_trainer.main([]) # Dump frames from env model. time_delta = time.time() - start_time print(line + "Step {}.3. - evaluate env model. " "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) gym_simulated_problem = registry.problem( "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game) sim_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.settable_num_steps = sim_steps gym_simulated_problem.generate_data(iter_data_dir, tmp_dir) # PPO. time_delta = time.time() - start_time print(line + "Step {}.4. - train PPO in model env." " Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta)))) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams = trainer_lib.create_hparams( "atari_base", "epochs_num={},simulated_environment=True,eval_every_epochs=0," "save_models_every_epochs={}".format(ppo_epochs_num + 1, ppo_epochs_num), data_dir=output_dir) ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_dir = tempfile.mkdtemp(dir=data_dir, prefix="ppo_") in_graph_wrappers = [(TimeLimitWrapper, { "timelimit": hparams.ppo_time_limit }), (MaxAndSkipWrapper, { "skip": 4 })] in_graph_wrappers += gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) ppo_hparams.num_agents = hparams.ppo_num_agents rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name, ppo_dir) last_model = ppo_dir + "/model{}.ckpt".format(ppo_epochs_num)
def main(argv): t2t_trainer.main(argv)
def train(hparams, output_dir): """Training function.""" prefix = output_dir data_dir = os.path.expanduser(prefix + "/data") tmp_dir = os.path.expanduser(prefix + "/tmp") output_dir = os.path.expanduser(prefix + "/output") autoencoder_dir = os.path.expanduser(prefix + "/autoencoder") tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) tf.gfile.MakeDirs(output_dir) tf.gfile.MakeDirs(autoencoder_dir) last_model = "" start_time = time.time() line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> " epoch_metrics = [] iter_data_dirs = [] ae_data_dirs = [] orig_autoencoder_path = FLAGS.autoencoder_path for iloop in range(hparams.epochs): # Train autoencoder if needed. if (hparams.autoencoder_train_steps > 0 and iloop == 0 and not orig_autoencoder_path): time_delta = time.time() - start_time tf.logging.info("%s Step AE - train autoencoder. Time: %s", line, str(datetime.timedelta(seconds=time_delta))) with tf.Graph().as_default(): # Generate data. FLAGS.autoencoder_path = "" FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = "" gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps ae_data_dir = os.path.join(data_dir, "ae%d" % iloop) ae_data_dirs.append(ae_data_dir) tf.gfile.MakeDirs(ae_data_dir) gym_problem.generate_data(ae_data_dir, tmp_dir) if ae_data_dirs[:-1]: combine_world_model_train_data(gym_problem, ae_data_dir, ae_data_dirs[:-1]) # Train AE. FLAGS.data_dir = ae_data_dir FLAGS.output_dir = autoencoder_dir # TODO(lukaszkaiser): make non-hardcoded here and in gym_problems.py. FLAGS.model = "autoencoder_ordered_discrete" FLAGS.hparams_set = "autoencoder_discrete_pong" FLAGS.train_steps = hparams.autoencoder_train_steps * (iloop + 2) FLAGS.eval_steps = 100 t2t_trainer.main([]) FLAGS.autoencoder_path = autoencoder_dir # Generate random frames. if iloop == 0: time_delta = time.time() - start_time tf.logging.info("%s Step %d.0 - generate random data. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = "" gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, "0random") iter_data_dirs.append(iter_data_dir) tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) mean_reward = gym_problem.sum_of_rewards / max(1.0, gym_problem.dones) tf.logging.info("%s Step 0.0 random reward: %.4f" % (line, mean_reward)) time_delta = time.time() - start_time tf.logging.info("%s Step %d.1 - generate env model. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) # Train env model FLAGS.data_dir = iter_data_dir FLAGS.output_dir = output_dir FLAGS.model = hparams.generative_model FLAGS.hparams_set = hparams.generative_model_params FLAGS.train_steps = hparams.model_train_steps * (iloop + 2) FLAGS.eval_steps = 100 t2t_trainer.main([]) # Evaluate and dump frames from env model time_delta = time.time() - start_time tf.logging.info("%s Step %d.1a - evaluate env model. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) gym_simulated_problem = registry.problem( "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game) sim_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.settable_num_steps = sim_steps gym_simulated_problem.real_env_problem = gym_problem gym_simulated_problem.simulation_random_starts = False gym_simulated_problem.intrinsic_reward_scale = 0. gym_simulated_problem.generate_data(iter_data_dir, tmp_dir) model_reward_accuracy = 0.0 if gym_simulated_problem.dones != 0: n = float(gym_simulated_problem.dones) model_reward_accuracy = ( gym_simulated_problem.successful_episode_reward_predictions / n) tf.logging.info("%s Step %d.1a env model reward accuracy: %.4f" % ( line, iloop, model_reward_accuracy)) # Train PPO agent time_delta = time.time() - start_time tf.logging.info("%s Step %d.2 - train PPO in model env. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) # Setup PPO hparams ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params, data_dir=output_dir) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.epochs_num = ppo_epochs_num ppo_hparams.simulated_environment = True ppo_hparams.simulation_random_starts = hparams.simulation_random_starts ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.eval_every_epochs = 0 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_hparams.num_agents = hparams.ppo_num_agents ppo_hparams.problem = gym_problem in_graph_wrappers = [ (TimeLimitWrapper, {"timelimit": hparams.ppo_time_limit}), (MaxAndSkipWrapper, {"skip": 4})] in_graph_wrappers += gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) ppo_dir = generator_utils.make_tmp_dir(dir=data_dir, prefix="ppo_") rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name, ppo_dir) last_model = ppo_dir # Evaluate agent. time_delta = time.time() - start_time tf.logging.info("%s Step %d.3 - evaluate agent. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game FLAGS.agent_policy_path = last_model eval_gym_problem = registry.problem(FLAGS.problem) eval_gym_problem.settable_num_steps = hparams.true_env_generator_num_steps eval_gym_problem.eval_runs = 5 eval_data_dir = os.path.join(data_dir, str(iloop)+"eval") iter_data_dirs.append(eval_data_dir) tf.gfile.MakeDirs(eval_data_dir) eval_gym_problem.generate_data(eval_data_dir, tmp_dir) # Generate environment frames. time_delta = time.time() - start_time tf.logging.info("%s Step %d.4 - generate environment data. Time: %s", line, iloop, str(datetime.timedelta(seconds=time_delta))) gym_problem = registry.problem(FLAGS.problem) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps iter_data_dir = os.path.join(data_dir, str(iloop)) iter_data_dirs.append(iter_data_dir) tf.gfile.MakeDirs(iter_data_dir) gym_problem.generate_data(iter_data_dir, tmp_dir) combine_world_model_train_data(gym_problem, iter_data_dir, iter_data_dirs[:-1]) mean_reward = 0.0 if eval_gym_problem.dones != 0: mean_reward = eval_gym_problem.sum_of_rewards / float(eval_gym_problem.dones) tf.logging.info("%s Step %d mean reward: %.4f" % (line, iloop, mean_reward)) # Report metrics. eval_metrics = {"model_reward_accuracy": model_reward_accuracy, "mean_reward": mean_reward} epoch_metrics.append(eval_metrics) # Report the evaluation metrics from the final epoch return epoch_metrics[-1]
FLAGS.worker_job = '/job:master' FLAGS.ps_gpu = FLAGS.number_ps_gpu FLAGS.schedule = 'train' # FLAGS.schedule='continuous_eval_on_train_data' # FLAGS.schedule=continuous_train_and_eval # FLAGS.schedule='train_and_evaluate' cluster = {'ps': ps_hosts, 'master': worker_hosts} os.environ['TF_CONFIG'] = json.dumps({ 'cluster': cluster, 'task': { 'type': 'master', 'index': task_index }, 'environment': 'cloud', }) FLAGS.problems = PROBLEM FLAGS.model = 'transformer' FLAGS.hparams_set = 'transformer_librispeech' FLAGS.hparams = 'batch_size=%s' % (FLAGS.batch_size) FLAGS.train_steps = 2000000 FLAGS.eval_steps = 100 FLAGS.save_checkpoints_secs = 100 FLAGS.output_dir = CHECKPOINTS_PATH FLAGS.data_dir = DATA_PATH FLAGS.tmp_dir = os.path.expanduser("~/tmp") t2t_trainer.main(None)
def main(argv): if getattr(FLAGS, "brain_jobs", None): FLAGS.worker_job = "/job:%s" % FLAGS.brain_job_name return t2t_trainer.main(argv)