def testCreateHparams(self): # Get json_path pkg, _ = os.path.split(__file__) pkg, _ = os.path.split(pkg) json_path = os.path.join( pkg, "test_data", "transformer_test_ckpt", "hparams.json") # Create hparams hparams = trainer_lib.create_hparams("transformer_big", "hidden_size=1", hparams_path=json_path) self.assertEqual(2, hparams.num_hidden_layers) # from json self.assertEqual(1, hparams.hidden_size) # from hparams_overrides_str # Compare with base hparams base_hparams = trainer_lib.create_hparams("transformer_big") self.assertEqual(len(base_hparams.values()), len(hparams.values()))
def test_get_vis_data_from_string(self): visualizer = visualization.AttentionVisualizer( hparams_set, model_name, self.data_dir, problem_name, beam_size=8) input_sentence = 'I have two dogs.' with self.test_session() as sess: sess.run(tf.global_variables_initializer()) _, inp_text, out_text, att_mats = ( visualizer.get_vis_data_from_string(sess, input_sentence)) self.assertAllEqual( [u'I_', u'have_', u'two_', u'dogs_', u'._', u'<EOS>'], inp_text) hparams = trainer_lib.create_hparams( hparams_set, data_dir=self.data_dir, problem_name=problem_name) enc_atts, dec_atts, encdec_atts = att_mats self.assertAllEqual(hparams.num_hidden_layers, len(enc_atts)) enc_atts = enc_atts[0] dec_atts = dec_atts[0] encdec_atts = encdec_atts[0] batch_size = 1 num_heads = hparams.num_heads inp_len = len(inp_text) out_len = len(out_text) self.assertAllEqual( (batch_size, num_heads, inp_len, inp_len), enc_atts.shape) self.assertAllEqual( (batch_size, num_heads, out_len, out_len), dec_atts.shape) self.assertAllEqual( (batch_size, num_heads, out_len, inp_len), encdec_atts.shape)
def testModel(self): # HParams hparams = trainer_lib.create_hparams( "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo") # Dataset problem = hparams.problem dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir) dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes) features = dataset.make_one_shot_iterator().get_next() features = problem_lib.standardize_shapes(features) # Model model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN) logits, losses = model(features) self.assertTrue("training" in losses) loss = losses["training"] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) logits_val, loss_val = sess.run([logits, loss]) logits_shape = list(logits_val.shape) logits_shape[1] = None self.assertAllEqual(logits_shape, [10, None, 1, 1, 4]) self.assertEqual(loss_val.shape, tuple())
def test_no_crash_pendulum(self): hparams = trainer_lib.create_hparams( "ppo_continuous_action_base", TrainTest.test_config) hparams.add_hparam("environment_spec", simple_gym_spec("Pendulum-v0")) rl_trainer_lib.train(hparams)
def main(_): problem_name = FLAGS.problem if "video" not in problem_name and "gym" not in problem_name: print("This tool only works for video problems.") return mode = tf.estimator.ModeKeys.TRAIN hparams = trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(FLAGS.data_dir), problem_name=problem_name) dataset = hparams.problem.input_fn(mode, hparams) features = dataset.make_one_shot_iterator().get_next() tf.gfile.MakeDirs(FLAGS.output_dir) base_template = os.path.join(FLAGS.output_dir, FLAGS.problem) count = 0 with tf.train.MonitoredTrainingSession() as sess: while not sess.should_stop(): # TODO(mbz): figure out what the second output is. data, _ = sess.run(features) video_batch = np.concatenate((data["inputs"], data["targets"]), axis=1) for video in video_batch: print("Saving {}/{}".format(count, FLAGS.num_samples)) name = "%s_%05d" % (base_template, count) decoding.save_video(video, name + "_{:05d}.png") create_gif(name) count += 1 if count == FLAGS.num_samples: sys.exit(0)
def test_no_crash_cartpole(self): hparams = trainer_lib.create_hparams( "ppo_discrete_action_base", TrainTest.test_config) hparams.add_hparam("environment_spec", standard_atari_env_spec("CartPole-v0")) rl_trainer_lib.train(hparams)
def create_hparams(): """Create hyper-parameters object.""" return trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(FLAGS.data_dir), problem_name=FLAGS.problem, hparams_path=_get_hparams_path())
def __init__(self, config): self.translate_problem = problems.problem(config.PROBLEM) self.encoder = self.translate_problem.feature_encoders( config.VOCAB_DIR) self.hparams = trainer_lib.create_hparams(config.HPARAMS, data_dir=config.VOCAB_DIR, problem_name=config.PROBLEM) self.checkpoint_path = config.CHECKPOINT_PATH self.translate_model = registry.model(config.MODEL)(self.hparams, Modes.PREDICT)
def main(_): now = datetime.datetime.now() now_tag = now.strftime("%Y_%m_%d_%H_%M") loop_hparams = trainer_lib.create_hparams(FLAGS.loop_hparams_set, FLAGS.loop_hparams) if FLAGS.worker_to_game_map and FLAGS.total_num_workers > 1: loop_hparams.game = get_game_for_worker(FLAGS.worker_to_game_map, FLAGS.worker_id + 1) tf.logging.info("Set game to %s." % loop_hparams.game) loop_hparams.eval_rl_env_max_episode_steps = FLAGS.eval_step_limit loop_hparams.eval_batch_size = FLAGS.eval_batch_size planner_hparams = trainer_lib.create_hparams(FLAGS.planner_hparams_set, FLAGS.planner_hparams) policy_dir = FLAGS.policy_dir model_dir = FLAGS.model_dir eval_metrics_dir = FLAGS.eval_metrics_dir if FLAGS.output_dir: cur_dir = FLAGS.output_dir if FLAGS.total_num_workers > 1: cur_dir = os.path.join(cur_dir, "%d" % (FLAGS.worker_id + 1)) policy_dir = os.path.join(cur_dir, "policy") model_dir = os.path.join(cur_dir, "world_model") eval_dir_basename = "evaluator_" if FLAGS.agent == "planner": eval_dir_basename = "planner_" eval_metrics_dir = os.path.join(cur_dir, eval_dir_basename + now_tag) tf.logging.info("Writing metrics to %s." % eval_metrics_dir) if not tf.gfile.Exists(eval_metrics_dir): tf.gfile.MkDir(eval_metrics_dir) evaluate( loop_hparams, planner_hparams, policy_dir, model_dir, eval_metrics_dir, FLAGS.agent, FLAGS.mode, FLAGS.eval_with_learner, FLAGS.log_every_steps if FLAGS.log_every_steps > 0 else None, debug_video_path=FLAGS.debug_video_path, num_debug_videos=FLAGS.num_debug_videos, random_starts_step_limit=FLAGS.random_starts_step_limit, )
def get_problem_model_hparams(config): """Constructs problem, model, and hparams objects from a config.""" hparams = trainer_lib.create_hparams(config.hparams_set, config.hparams, data_dir=os.path.expanduser( config.data_dir), problem_name=config.problem) problem = registry.problem(config.problem) model = registry.model(config.model)(hparams, tf.estimator.ModeKeys.EVAL) return (problem, model, hparams)
def create_hparams(): hparams_path = None if FLAGS.output_dir: hparams_path = os.path.join(FLAGS.output_dir, "hparams.json") return trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser( FLAGS.data_dir), problem_name=FLAGS.problem, hparams_path=hparams_path)
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, epoch=0, is_final_epoch=False): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_params_names = ["epochs_num", "epoch_length", "learning_rate", "num_agents", "optimization_epochs"] for param_name in ppo_params_names: ppo_param_name = "ppo_"+ param_name if ppo_param_name in hparams: ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name)) ppo_epochs_num = hparams.ppo_epochs_num if is_final_epoch: ppo_epochs_num *= 2 ppo_hparams.epoch_length *= 2 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.world_model_dir = world_model_dir ppo_hparams.add_hparam("force_beginning_resets", True) # Adding model hparams for model specific adjustments model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) ppo_hparams.add_hparam("model_hparams", model_hparams) environment_spec = copy.copy(gym_problem.environment_spec) environment_spec.simulation_random_starts = hparams.simulation_random_starts do_flip = hparams.simulation_flip_first_random_for_beginning environment_spec.simulation_flip_first_random_for_beginning = do_flip environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.add_hparam("environment_spec", environment_spec) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, }): rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
def create_hparams(): """Create hparams.""" if FLAGS.use_tpu and "tpu" not in FLAGS.hparams_set: tf.logging.warn("Not all hyperparameter sets work on TPU. " "Prefer hparams_sets with a '_tpu' suffix, " "e.g. transformer_tpu, if available for your model.") hparams_path = os.path.join(FLAGS.output_dir, "hparams.json") return trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, hparams_path=hparams_path)
def train_agent_real_env(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, epoch=0, is_final_epoch=False): """Train the PPO agent in the real environment.""" global dumper_path, ppo_data_dumper_counter gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_params_names = [ "epochs_num", "epoch_length", "learning_rate", "num_agents", "eval_every_epochs", "optimization_epochs", "effective_num_agents" ] # This should be overridden. ppo_hparams.add_hparam("effective_num_agents", None) for param_name in ppo_params_names: ppo_param_name = "real_ppo_" + param_name if ppo_param_name in hparams: ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name)) ppo_hparams.epochs_num = _ppo_training_epochs(hparams, epoch, is_final_epoch, True) # We do not save model, as that resets frames that we need at restarts. # But we need to save at the last step, so we set it very high. ppo_hparams.save_models_every_epochs = 1000000 environment_spec = copy.copy(gym_problem.environment_spec) if hparams.gather_ppo_real_env_data: # TODO(piotrmilos):This should be refactored assert hparams.real_ppo_num_agents == 1, ( "It is required to use collect with pyfunc_wrapper") ppo_data_dumper_counter = 0 dumper_path = os.path.join(epoch_data_dir, "dumper") tf.gfile.MakeDirs(dumper_path) dumper_spec = [PyFuncWrapper, {"process_fun": ppo_data_dumper}] environment_spec.wrappers.insert(2, dumper_spec) ppo_hparams.add_hparam("environment_spec", environment_spec) with temporary_flags({ "problem": problem_name, "output_dir": world_model_dir, "data_dir": epoch_data_dir, }): rl_trainer_lib.train(ppo_hparams, event_dir + "real", agent_model_dir, name_scope="ppo_real%d" % (epoch + 1))
def main(_): decode_hp = decode_hparams(FLAGS.decode_hparams) trainer_lib.set_random_seed(FLAGS.random_seed) if FLAGS.output_dir is None: raise ValueError("Expected output_dir to be set to a valid path.") hparams = trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=FLAGS.data_dir, problem_name=FLAGS.problem) if hparams.batch_size != 1: raise ValueError("Set batch-size to be equal to 1") # prepare dataset using Predict mode. dataset_split = "test" if FLAGS.eval_use_test_set else None dataset = hparams.problem.dataset( tf.estimator.ModeKeys.PREDICT, shuffle_files=False, hparams=hparams, data_dir=FLAGS.data_dir, dataset_split=dataset_split) dataset = dataset.batch(hparams.batch_size) dataset = dataset.make_one_shot_iterator().get_next() # Obtain frame interpolations. ops = [glow_ops.get_variable_ddi, glow_ops.actnorm, glow_ops.get_dropout] var_scope = tf.variable_scope("next_frame_glow/body", reuse=tf.AUTO_REUSE) with arg_scope(ops, init=False), var_scope: interpolations, first_frame, last_frame = interpolate( dataset, hparams, decode_hp) var_list = tf.global_variables() saver = tf.train.Saver(var_list) # Get latest checkpoints from model_dir. ckpt_path = tf.train.latest_checkpoint(FLAGS.output_dir) final_dir = get_summaries_log_dir(decode_hp, FLAGS.output_dir, dataset_split) summary_writer = tf.summary.FileWriter(final_dir) global_step = decoding.latest_checkpoint_step(FLAGS.output_dir) sample_ind = 0 num_samples = decode_hp.num_samples all_summaries = [] with tf.train.MonitoredTrainingSession() as sess: saver.restore(sess, ckpt_path) while not sess.should_stop() and sample_ind < num_samples: interp_np, first_frame_np, last_frame_np = sess.run( [interpolations, first_frame, last_frame]) interp_summ = interpolations_to_summary(sample_ind, interp_np, first_frame_np[0], last_frame_np[0], hparams, decode_hp) all_summaries.extend(interp_summ) sample_ind += 1 all_summaries = tf.Summary(value=list(all_summaries)) summary_writer.add_summary(all_summaries, global_step)
def train_agent(real_env, agent_model_dir, event_dir, world_model_dir, data_dir, hparams, completed_epochs_num, epoch=0, is_final_epoch=False): """Train the PPO agent in the simulated environment.""" del data_dir frame_stack_size = hparams.frame_stack_size initial_frame_rollouts = real_env.current_epoch_rollouts( split=tf.contrib.learn.ModeKeys.TRAIN, minimal_rollout_frames=frame_stack_size, ) # TODO(koz4k): Move this to a different module. def initial_frame_chooser(batch_size): """Frame chooser.""" deterministic_initial_frames =\ initial_frame_rollouts[0][:frame_stack_size] if not hparams.simulation_random_starts: # Deterministic starts: repeat first frames from the first rollout. initial_frames = [deterministic_initial_frames] * batch_size else: # Random starts: choose random initial frames from random rollouts. initial_frames = random_rollout_subsequences( initial_frame_rollouts, batch_size, frame_stack_size) if hparams.simulation_flip_first_random_for_beginning: # Flip first entry in the batch for deterministic initial frames. initial_frames[0] = deterministic_initial_frames return np.stack( [[frame.observation.decode() for frame in initial_frame_stack] for initial_frame_stack in initial_frames]) env_fn = make_simulated_env_fn(real_env, hparams, hparams.ppo_num_agents, initial_frame_chooser, world_model_dir) base_algo_str = hparams.base_algo train_hparams = trainer_lib.create_hparams(hparams.base_algo_params) _update_hparams_from_hparams(train_hparams, hparams, base_algo_str + "_") completed_epochs_num += sim_ppo_epoch_increment(hparams, is_final_epoch) learner = LEARNERS[base_algo_str](frame_stack_size, event_dir, agent_model_dir) learner.train(env_fn, train_hparams, completed_epochs_num, simulated=True, epoch=epoch) return completed_epochs_num
def __init__(self, environment_spec, length): """Batch of environments inside the TensorFlow graph.""" observ_space = utils.get_observation_space(environment_spec) initial_frames_problem = environment_spec.initial_frames_problem observ_shape = (initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) observ_space.shape = observ_shape action_space = utils.get_action_space(environment_spec) super(SimulatedBatchEnv, self).__init__(observ_space, action_space) self.length = length self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams( FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) hparams = HParams(video_num_input_frames= environment_spec.video_num_input_frames, video_num_target_frames= environment_spec.video_num_target_frames, environment_spec=environment_spec) # TODO(piotrmilos): check if this shouldn't be tf.estimator.ModeKeys.Predict initial_frames_dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) start_frame = None if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=1000) if environment_spec.simulation_flip_first_random_for_beginning: # Later flip the first random frame in PPO batch for the true beginning. start = initial_frames_dataset.make_one_shot_iterator().get_next() start_frame = tf.expand_dims(start["inputs"], axis=0) else: dataset = initial_frames_dataset dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer( dataset, self.length, self.observ_dtype, start_frame=start_frame) self._observ = tf.Variable( tf.zeros((len(self),) + observ_shape, self.observ_dtype), trainable=False)
def make_simulated_env_fn_from_hparams( real_env, hparams, batch_size, initial_frame_chooser, model_dir, sim_video_dir=None): """Creates a simulated env_fn.""" model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) if hparams.wm_policy_param_sharing: model_hparams.optimizer_zero_grads = True return make_simulated_env_fn( reward_range=real_env.reward_range, observation_space=real_env.observation_space, action_space=real_env.action_space, frame_stack_size=hparams.frame_stack_size, frame_height=real_env.frame_height, frame_width=real_env.frame_width, initial_frame_chooser=initial_frame_chooser, batch_size=batch_size, model_name=hparams.generative_model, model_hparams=trainer_lib.create_hparams(hparams.generative_model_params), model_dir=model_dir, intrinsic_reward_scale=hparams.intrinsic_reward_scale, sim_video_dir=sim_video_dir, )
def make_simulated_env_spec(real_env, hparams): """Creates a simulated environment_spec.""" return rl.standard_atari_env_simulated_spec( real_env, intrinsic_reward_scale=hparams.intrinsic_reward_scale, model_name=hparams.generative_model, model_hparams=trainer_lib.create_hparams( hparams.generative_model_params), # Hardcoded for now. TODO(koz4k): Make it a hparam. video_num_input_frames=4, video_num_target_frames=1)
def __init__(self, model_dir, config): self._signatures = dict() self._graph = tf.Graph() with self._graph.as_default(): tf.set_random_seed(1234) # initialize the hparams, problem and model self._hparams = trainer_lib.create_hparams( config['hparams_set'], config.get('hparams_overrides', ''), os.path.join(model_dir, 'assets.extra'), config['problem']) problem = self._hparams.problem decode_hp = decoding.decode_hparams( config.get('decode_hparams', '')) run_config = trainer_lib.create_run_config(self._hparams, model_dir=model_dir, schedule="decode") model_fn = t2t_model.T2TModel.make_estimator_model_fn( config['model'], self._hparams, decode_hparams=decode_hp) # create the orediction signatures (input/output ops) serving_receiver = problem.direct_serving_input_fn(self._hparams) estimator_spec = model_fn(serving_receiver.features, None, mode=tf.estimator.ModeKeys.PREDICT, params=None, config=run_config) for key, sig_spec in estimator_spec.export_outputs.items(): # only PredictOutputs are supported, ClassificationOutput # and RegressionOutputs are weird artifacts of Google shipping # almost unmodified Tensorflow graphs through their Cloud ML # platform assert isinstance(sig_spec, tf.estimator.export.PredictOutput) sig = Signature(key, serving_receiver.receiver_tensors, sig_spec.outputs) self._signatures[key] = sig # load the model & init the session scaffold = tf.train.Scaffold() checkpoint_filename = os.path.join( model_dir, tf.saved_model.constants.VARIABLES_DIRECTORY, tf.saved_model.constants.VARIABLES_FILENAME) session_creator = tf.train.ChiefSessionCreator( scaffold, config=run_config.session_config, checkpoint_filename_with_path=checkpoint_filename) self._session = tf.train.MonitoredSession( session_creator=session_creator)
def testSparseTransformer(self): """Test sparse transformer decode.""" with self.cached_session() as sess: with tf.variable_scope("sparse_transformer", reuse=tf.AUTO_REUSE): hparams_set = "sparse_transformer_local" problem = "" hparams = trainer_lib.create_hparams(hparams_set, problem_name=problem) hparams.layer_prepostprocess_dropout = 0. hparams.dropout = 0. hparams.num_encoder_layers = 0 hparams.num_decoder_layers = 2 hparams.local_relative = False hparams.query_shape = (20,) hparams.memory_flange = (0,) hparams.max_length = 200 sparse_transformer = sptf.SparseTransformer(hparams) sparse_transformer.set_mode(tf.estimator.ModeKeys.PREDICT) sparse_transformer.vocab_size = 50 features = {} decode_step = 10 cache = {} # Testing that changing target tokens beyond decode_step has no effect # i = 0 or less should have the next cell sum == 0 i = -5 targets_prefix = tf.random.stateless_uniform( [1, decode_step - i], minval=0, maxval=sparse_transformer.vocab_size, dtype=tf.dtypes.int32, seed=(75, 48)) zeros = tf.zeros([1, hparams.max_length - decode_step + i], dtype=tf.int32) features["targets"] = tf.concat([targets_prefix, zeros], axis=-1) output_step1 = sparse_transformer.body(features, decode_step=decode_step, cache=cache) features["targets"] = tf.concat([ targets_prefix, tf.random.stateless_uniform( [1, hparams.max_length - decode_step + i], minval=0, maxval=sparse_transformer.vocab_size, dtype=tf.dtypes.int32, seed=(67, 89))], axis=-1) output_step2 = sparse_transformer.body(features, decode_step=decode_step, cache=cache) initializer = tf.global_variables_initializer() if initializer is not None: initializer.run() output1_np = sess.run(output_step1) output2_np = sess.run(output_step2) self.assertEqual(output1_np.shape, output2_np.shape)
def testCompatibility(self): model = "transformer" hp_set = "transformer_test" problem_name = "translate_ende_wmt8k" hp = trainer_lib.create_hparams( hp_set, data_dir=_DATA_DIR, problem_name=problem_name) run_config = trainer_lib.create_run_config(model_dir=_CKPT_DIR) estimator = trainer_lib.create_estimator(model, hp, run_config) for prediction in estimator.predict(self.input_fn): self.assertEqual(prediction["outputs"].dtype, np.int32)
def encode_env_frames(problem_name, ae_problem_name, ae_hparams_set, autoencoder_path, epoch_data_dir): """Encode all frames from problem_name and write out as ae_problem_name.""" with tf.Graph().as_default(): ae_hparams = trainer_lib.create_hparams(ae_hparams_set, problem_name=problem_name) problem = ae_hparams.problem model = registry.model("autoencoder_ordered_discrete")( ae_hparams, tf.estimator.ModeKeys.EVAL) ae_problem = registry.problem(ae_problem_name) ae_training_paths = ae_problem.training_filepaths( epoch_data_dir, 10, True) ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True) skip_train = False skip_eval = False for path in ae_training_paths: if tf.gfile.Exists(path): skip_train = True break for path in ae_eval_paths: if tf.gfile.Exists(path): skip_eval = True break # Encode train data if not skip_train: dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem=problem, ae_hparams=ae_hparams, autoencoder_path=autoencoder_path, out_files=ae_training_paths) # Encode eval data if not skip_eval: dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem=problem, ae_hparams=ae_hparams, autoencoder_path=autoencoder_path, out_files=ae_eval_paths)
def hparams_set_up(problem_name, data_dir, hparam_set=None, hparams_override=None): if hparam_set: hparams = trainer_lib.create_hparams( hparam_set, hparams_overrides_str=hparams_override) else: hparams = common_hparams.basic_params1() hparams.data_dir = data_dir hparams_lib.add_problem_hparams(hparams, problem_name) return hparams, hparams.problem
def build_model(hparams_set, hparamss, model_name, data_dir, problem_name, beam_size=1): """Build the graph required to fetch the attention weights. Args: hparams_set: HParams set to build the model with. hparamss: model_name: Name of model. data_dir: Path to directory containing training data. problem_name: Name of problem. beam_size: (Optional) Number of beams to use when decoding a translation. If set to 1 (default) then greedy decoding is used. Returns: Tuple of ( inputs: Input placeholder to feed in ids to be translated. targets: Targets placeholder to feed to translation when fetching attention weights. samples: Tensor representing the ids of the translation. att_mats: Tensors representing the attention weights. ) """ hparams = trainer_lib.create_hparams(hparams_set, hparamss, data_dir=data_dir, problem_name=problem_name) translate_model = registry.model(model_name)(hparams, tf.estimator.ModeKeys.EVAL) inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs') targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets') translate_model({ 'inputs': inputs, 'targets': targets, }) # Must be called after building the training graph, so that the dict will # have been filled with the attention tensors. BUT before creating the # inference graph otherwise the dict will be filled with tensors from # inside a tf.while_loop from decoding and are marked unfetchable. att_mats = get_att_mats(translate_model) with tf.variable_scope(tf.get_variable_scope(), reuse=True): samples = translate_model.infer({ 'inputs': inputs, }, beam_size=beam_size)['outputs'] return inputs, targets, samples, att_mats
def __init__(self, len, observ_shape, observ_dtype, action_shape, action_dtype): """Batch of environments inside the TensorFlow graph. Args: batch_env: Batch environment. """ self.length = len hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problems, data_dir="UNUSED") hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( hparams, tf.estimator.ModeKeys.PREDICT) self.action_shape = action_shape self.action_dtype = action_dtype with open( pkg_resources.resource_filename("tensor2tensor.rl.envs", "frame1.png"), "rb") as f: png_frame_1_raw = f.read() with open( pkg_resources.resource_filename("tensor2tensor.rl.envs", "frame2.png"), "rb") as f: png_frame_2_raw = f.read() self.frame_1 = tf.expand_dims( tf.cast(tf.image.decode_png(png_frame_1_raw), tf.float32), 0) self.frame_2 = tf.expand_dims( tf.cast(tf.image.decode_png(png_frame_2_raw), tf.float32), 0) shape = (self.length, ) + observ_shape self._observ = tf.Variable(tf.zeros(shape, observ_dtype), trainable=False) self._prev_observ = tf.Variable(tf.zeros(shape, observ_dtype), trainable=False) self._starting_observ = tf.Variable(tf.zeros(shape, observ_dtype), trainable=False) observ_dtype = tf.int64 self._observ_not_sure_why_we_need_this = tf.Variable(tf.zeros( (self.length, ) + observ_shape, observ_dtype), name='observ_new', trainable=False) self._reward_not_sure_why_we_need_this = tf.Variable(tf.zeros( (self.length, 1), observ_dtype), name='reward_new', trainable=False)
def testCompatibility(self): model = "transformer" hp_set = "transformer_test" problem_name = "translate_ende_wmt8k" hp = trainer_lib.create_hparams( hp_set, data_dir=_DATA_DIR, problem_name=problem_name) run_config = trainer_lib.create_run_config(model, model_dir=_CKPT_DIR) estimator = trainer_lib.create_estimator(model, hp, run_config) for prediction in estimator.predict(self.input_fn): self.assertEqual(prediction["outputs"].dtype, np.int32)
def example_apply_model(ckpt_path, hparams_set="img2img_transformer2d_tiny", problem_name="img2img_allen_brain_dim8to32", model_name="img2img_transformer", data_dir="/mnt/nfs-east1-d/data", input_dim=8, output_dim=32): # HACK: Avoid re-instantiating the model which causes problems... # TODO: Better way to handle this, e.g. delete from globals. if 'model' not in globals(): hp = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name=problem_name) model = registry.model(model_name)(hp, Modes.TRAIN) problem_object = problems.problem(problem_name) dataset = problem_object.dataset(Modes.TRAIN, data_dir) with tfe.restore_variables_on_create(ckpt_path): for count, example in enumerate(tfe.Iterator(dataset)): if count > 1234: break # Example input fig = plt.figure(figsize=(8, 8)) example["inputs"] = tf.reshape(example["inputs"], [1, input_dim, input_dim, 3]) fig.add_subplot(1, 3, 1) plt.imshow(example["inputs"].numpy()[0]) # Example target fig.add_subplot(1, 3, 2) example["targets"] = tf.reshape(example["targets"], [1, output_dim, output_dim, 3]) plt.imshow(example["targets"].numpy()[0]) # Dummy target (expected by model) example["targets"] = tf.reshape( tf.zeros((1, output_dim, output_dim, 3), dtype=np.uint8), [1, output_dim, output_dim, 3]) # Produce and display prediction predictions, _ = model(example) fig.add_subplot(1, 3, 3) inferred = demo.infer(predictions) plt.imshow(inferred) plt.show() return example, predictions, inferred
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, autoencoder_path=None, epoch=0): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_params_names = ["epochs_num", "epoch_length", "learning_rate", "num_agents", "optimization_epochs"] for param_name in ppo_params_names: ppo_param_name = "ppo_"+ param_name if ppo_param_name in hparams: ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name)) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.world_model_dir = world_model_dir ppo_hparams.add_hparam("force_beginning_resets", True) # Adding model hparams for model specific adjustments model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) ppo_hparams.add_hparam("model_hparams", model_hparams) environment_spec = copy.copy(gym_problem.environment_spec) environment_spec.simulation_random_starts = hparams.simulation_random_starts environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.add_hparam("environment_spec", environment_spec) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, "autoencoder_path": autoencoder_path, }): rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
def train(hparams, output_dir, report_fn=None): hparams = initialize_env_specs(hparams) learner = LEARNERS[hparams.base_algo]( hparams.frame_stack_size, FLAGS.output_dir, output_dir ) policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params) update_hparams_from_hparams( policy_hparams, hparams, hparams.base_algo + "_" ) learner.train( hparams.env_fn, policy_hparams, simulated=False, save_continuously=True, epoch=0, eval_env_fn=hparams.eval_env_fn, report_fn=report_fn )
def train_agent(real_env, learner, world_model_dir, hparams, epoch): """Train the PPO agent in the simulated environment.""" frame_stack_size = hparams.frame_stack_size initial_frame_rollouts = real_env.current_epoch_rollouts( split=tf.contrib.learn.ModeKeys.TRAIN, minimal_rollout_frames=frame_stack_size, ) # TODO(koz4k): Move this to a different module. def initial_frame_chooser(batch_size): """Frame chooser.""" deterministic_initial_frames =\ initial_frame_rollouts[0][:frame_stack_size] if not hparams.simulation_random_starts: # Deterministic starts: repeat first frames from the first rollout. initial_frames = [deterministic_initial_frames] * batch_size else: # Random starts: choose random initial frames from random rollouts. initial_frames = random_rollout_subsequences( initial_frame_rollouts, batch_size, frame_stack_size) if hparams.simulation_flip_first_random_for_beginning: # Flip first entry in the batch for deterministic initial frames. initial_frames[0] = deterministic_initial_frames return np.stack( [[frame.observation.decode() for frame in initial_frame_stack] for initial_frame_stack in initial_frames]) env_fn = make_simulated_env_fn( real_env, hparams, hparams.simulated_batch_size, initial_frame_chooser, world_model_dir, os.path.join(learner.agent_model_dir, "sim_videos_{}".format(epoch))) base_algo_str = hparams.base_algo train_hparams = trainer_lib.create_hparams(hparams.base_algo_params) if hparams.wm_policy_param_sharing: train_hparams.optimizer_zero_grads = True rl_utils.update_hparams_from_hparams(train_hparams, hparams, base_algo_str + "_") final_epoch = hparams.epochs - 1 is_special_epoch = (epoch + 3) == final_epoch or (epoch + 7) == final_epoch is_final_epoch = epoch == final_epoch env_step_multiplier = 3 if is_final_epoch else 2 if is_special_epoch else 1 learner.train(env_fn, train_hparams, simulated=True, save_continuously=True, epoch=epoch, env_step_multiplier=env_step_multiplier)
def main(_): data_dir = os.path.expanduser(FLAGS.data_dir) ckpt_dir = FLAGS.ckpt_dir percent = float(FLAGS.threshold_percentile) / 100 new_ckpt = os.path.join(ckpt_dir, 'pruned/pruned_{}'.format(percent)) hparams = trainer_lib.create_hparams(hparams_set=FLAGS.hparams_set, data_dir=data_dir, problem_name=FLAGS.problem_name) convert_lib.prune_checkpoint(hparams, ckpt_dir=ckpt_dir, threshold_percentile=percent, new_ckpt=new_ckpt)
def train(hparams, output_dir, report_fn=None): hparams = initialize_env_specs(hparams) learner = LEARNERS[hparams.base_algo](hparams.frame_stack_size, FLAGS.output_dir, output_dir) policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params) update_hparams_from_hparams(policy_hparams, hparams, hparams.base_algo + "_") learner.train(hparams.env_fn, policy_hparams, simulated=False, save_continuously=True, epoch=0, eval_env_fn=hparams.eval_env_fn, report_fn=report_fn)
def make_simulated_env_fn( real_env, hparams, batch_size, initial_frame_chooser, model_dir): """Creates a simulated env_fn.""" return rl.make_simulated_env_fn( reward_range=real_env.reward_range, observation_space=real_env.observation_space, action_space=real_env.action_space, frame_stack_size=hparams.frame_stack_size, initial_frame_chooser=initial_frame_chooser, batch_size=batch_size, model_name=hparams.generative_model, model_hparams=trainer_lib.create_hparams(hparams.generative_model_params), model_dir=model_dir, intrinsic_reward_scale=hparams.intrinsic_reward_scale, )
def __init__(self, environment_spec, length): """Batch of environments inside the TensorFlow graph.""" observ_space = utils.get_observation_space(environment_spec) initial_frames_problem = environment_spec.initial_frames_problem observ_shape = (initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) observ_space.shape = observ_shape action_space = utils.get_action_space(environment_spec) super(SimulatedBatchEnv, self).__init__(observ_space, action_space) self.length = length self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) hparams = HParams( video_num_input_frames=environment_spec.video_num_input_frames, video_num_target_frames=environment_spec.video_num_target_frames, environment_spec=environment_spec) if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=1000) else: dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length, self.observ_dtype) self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape, self.observ_dtype), trainable=False)
def __init__(self, environment_lambda, length, problem): """Batch of environments inside the TensorFlow graph.""" self.length = length self._num_frames = problem.num_input_frames # TODO(piotrmilos): For the moment we are fine with that. assert self.length == 1, "Currently SimulatedBatchEnv support only one env" initialization_env = environment_lambda() hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problem) hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( hparams, tf.estimator.ModeKeys.PREDICT) self.action_space = initialization_env.action_space self.action_shape = list(initialization_env.action_space.shape) self.action_dtype = tf.int32 obs = [] if hasattr(initialization_env.env, "get_starting_data"): obs, _, _ = initialization_env.env.get_starting_data() else: # TODO(piotrmilos): Ancient method for environments not supporting # get_starting_data. This is probably not compatibile with # self._num_frames != 2 and should be removed at some point. num_frames = self._num_frames initialization_env.reset() skip_frames = 20 for _ in range(skip_frames): initialization_env.step(0) for _ in range(num_frames): obs.append(initialization_env.step(0)[0]) initial_frames = tf.stack(obs) initial_frames = tf.cast(initial_frames, tf.float32) self.history_buffer = HistoryBuffer(initial_frames, problem=problem) height, width, channels = initialization_env.observation_space.shape # TODO(lukaszkaiser): remove this and just use Problem.frame_height. if FLAGS.autoencoder_path: height = problem.frame_height width = problem.frame_width shape = (self.length, height, width, channels) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): self._observ = tf.get_variable("observ", shape, initializer=tf.zeros_initializer, trainable=False)
def make_simulated_env_fn( real_env, hparams, batch_size, initial_frame_chooser, model_dir): """Creates a simulated env_fn.""" return rl.make_simulated_env_fn( reward_range=real_env.reward_range, observation_space=real_env.observation_space, action_space=real_env.action_space, frame_stack_size=hparams.frame_stack_size, frame_height=real_env.frame_height, frame_width=real_env.frame_width, initial_frame_chooser=initial_frame_chooser, batch_size=batch_size, model_name=hparams.generative_model, model_hparams=trainer_lib.create_hparams(hparams.generative_model_params), model_dir=model_dir, intrinsic_reward_scale=hparams.intrinsic_reward_scale, )
def testBasicFcRelu(self): x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1)) y = np.random.random_integers(0, high=9, size=(1, 1)) hparams = trainer_lib.create_hparams( "basic_fc_small", problem_name="image_mnist", data_dir=".") with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = basic.BasicFcRelu(hparams, tf.estimator.ModeKeys.TRAIN) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (1, 1, 1, 1, 10))
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, autoencoder_path=None, epoch=0): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.epochs_num = ppo_epochs_num ppo_hparams.simulated_environment = True ppo_hparams.simulation_random_starts = hparams.simulation_random_starts ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.eval_every_epochs = 50 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_hparams.num_agents = hparams.ppo_num_agents ppo_hparams.problem = gym_problem ppo_hparams.world_model_dir = world_model_dir if hparams.ppo_learning_rate: ppo_hparams.learning_rate = hparams.ppo_learning_rate # 4x for the StackAndSkipWrapper minus one to always finish for reporting. ppo_time_limit = (ppo_hparams.epoch_length - 1) * 4 in_graph_wrappers = [(TimeLimitWrapper, { "timelimit": ppo_time_limit }), (StackAndSkipWrapper, { "skip": 4 })] in_graph_wrappers += gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, "autoencoder_path": autoencoder_path, }): rl_trainer_lib.train(ppo_hparams, gym_problem.env_name, event_dir, agent_model_dir, epoch=epoch)
def testBasicFcRelu(self): x = np.random.randint(256, size=(1, 28, 28, 1)) y = np.random.randint(10, size=(1, 1)) hparams = trainer_lib.create_hparams("basic_fc_small", problem_name="image_mnist", data_dir=".") with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = basic.BasicFcRelu(hparams, tf_estimator.ModeKeys.TRAIN) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (1, 1, 1, 1, 10))
def testMultipleTargetModalities(self): # Use existing hparams and override target modality. hparams = trainer_lib.create_hparams( "transformer_tiny", data_dir=algorithmic.TinyAlgo.data_dir, problem_name="tiny_algo") # Manually turn off sharing. It is not currently supported for multitargets. hparams.shared_embedding_and_softmax_weights = 0 # pylint: disable=line-too-long hparams.problem_hparams.modality = { "targets": hparams.problem_hparams.modality["targets"], "targets_A": hparams.problem_hparams.modality["targets"], "targets_B": hparams.problem_hparams.modality["targets"], } hparams.problem_hparams.vocab_size = { "targets": hparams.problem_hparams.vocab_size["targets"], "targets_A": hparams.problem_hparams.vocab_size["targets"], "targets_B": hparams.problem_hparams.vocab_size["targets"], } hparams.problem._hparams = hparams.problem_hparams # Dataset problem = hparams.problem dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, algorithmic.TinyAlgo.data_dir) dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes) features = dataset.make_one_shot_iterator().get_next() features = data_reader.standardize_shapes(features) features["targets_A"] = features["targets_B"] = features["targets"] # Model model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN) def body(args, mb=model.body): out = mb(args) return {"targets": out, "targets_A": out, "targets_B": out} model.body = body logits, losses = model(features) self.assertTrue("training" in losses) loss = losses["training"] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run([logits, loss])
def make_simulated_env_kwargs(real_env, hparams, **extra_kwargs): """Extracts simulated env kwargs from real_env and loop hparams.""" objs_and_attrs = [(real_env, [ "reward_range", "observation_space", "action_space", "frame_height", "frame_width" ]), (hparams, ["frame_stack_size", "intrinsic_reward_scale"])] kwargs = { attr: getattr(obj, attr) # pylint: disable=g-complex-comprehension for (obj, attrs) in objs_and_attrs for attr in attrs } kwargs["model_name"] = hparams.generative_model kwargs["model_hparams"] = trainer_lib.create_hparams( hparams.generative_model_params) if hparams.wm_policy_param_sharing: kwargs["model_hparams"].optimizer_zero_grads = True kwargs.update(extra_kwargs) return kwargs
def train_agent( real_env, learner, world_model_dir, hparams, epoch, is_final_epoch): """Train the PPO agent in the simulated environment.""" frame_stack_size = hparams.frame_stack_size initial_frame_rollouts = real_env.current_epoch_rollouts( split=tf.contrib.learn.ModeKeys.TRAIN, minimal_rollout_frames=frame_stack_size, ) # TODO(koz4k): Move this to a different module. def initial_frame_chooser(batch_size): """Frame chooser.""" deterministic_initial_frames =\ initial_frame_rollouts[0][:frame_stack_size] if not hparams.simulation_random_starts: # Deterministic starts: repeat first frames from the first rollout. initial_frames = [deterministic_initial_frames] * batch_size else: # Random starts: choose random initial frames from random rollouts. initial_frames = random_rollout_subsequences( initial_frame_rollouts, batch_size, frame_stack_size ) if hparams.simulation_flip_first_random_for_beginning: # Flip first entry in the batch for deterministic initial frames. initial_frames[0] = deterministic_initial_frames return np.stack([ [frame.observation.decode() for frame in initial_frame_stack] for initial_frame_stack in initial_frames ]) env_fn = make_simulated_env_fn( real_env, hparams, hparams.simulated_batch_size, initial_frame_chooser, world_model_dir ) base_algo_str = hparams.base_algo train_hparams = trainer_lib.create_hparams(hparams.base_algo_params) update_hparams_from_hparams( train_hparams, hparams, base_algo_str + "_" ) env_step_multiplier = 1 if not is_final_epoch else 2 learner.train( env_fn, train_hparams, simulated=True, save_continuously=True, epoch=epoch, env_step_multiplier=env_step_multiplier )
def train_agent_real_env(env, learner, hparams, epoch): """Train the PPO agent in the real environment.""" base_algo_str = hparams.base_algo train_hparams = trainer_lib.create_hparams(hparams.base_algo_params) update_hparams_from_hparams( train_hparams, hparams, "real_" + base_algo_str + "_" ) env_fn = rl.make_real_env_fn(env) num_env_steps = real_env_step_increment(hparams) learner.train( env_fn, train_hparams, simulated=False, save_continuously=False, epoch=epoch, num_env_steps=num_env_steps ) # Save unfinished rollouts to history. env.reset()
def get_mnist_random_output(self, model_name, hparams_set=None, mode=tf.estimator.ModeKeys.TRAIN): hparams_set = hparams_set or model_name x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1)) y = np.random.random_integers(0, high=9, size=(1, 1)) features = { "targets": tf.constant(x, dtype=tf.int32), "inputs": tf.constant(y, dtype=tf.int32), } hparams = trainer_lib.create_hparams( hparams_set, problem_name="image_mnist_rev", data_dir=".") model = registry.model(model_name)(hparams, mode) tf.train.create_global_step() logits, _ = model(features) with self.test_session() as session: session.run(tf.global_variables_initializer()) res = session.run(logits) return res
def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1): """Build the graph required to fetch the attention weights. Args: hparams_set: HParams set to build the model with. model_name: Name of model. data_dir: Path to directory containing training data. problem_name: Name of problem. beam_size: (Optional) Number of beams to use when decoding a translation. If set to 1 (default) then greedy decoding is used. Returns: Tuple of ( inputs: Input placeholder to feed in ids to be translated. targets: Targets placeholder to feed to translation when fetching attention weights. samples: Tensor representing the ids of the translation. att_mats: Tensors representing the attention weights. ) """ hparams = trainer_lib.create_hparams( hparams_set, data_dir=data_dir, problem_name=problem_name) translate_model = registry.model(model_name)( hparams, tf.estimator.ModeKeys.EVAL) inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs') targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets') translate_model({ 'inputs': inputs, 'targets': targets, }) # Must be called after building the training graph, so that the dict will # have been filled with the attention tensors. BUT before creating the # inference graph otherwise the dict will be filled with tensors from # inside a tf.while_loop from decoding and are marked unfetchable. att_mats = get_att_mats(translate_model) with tf.variable_scope(tf.get_variable_scope(), reuse=True): samples = translate_model.infer({ 'inputs': inputs, }, beam_size=beam_size)['outputs'] return inputs, targets, samples, att_mats
def evaluate_single_config(hparams, stochastic, max_num_noops, agent_model_dir): """Evaluate the PPO agent in the real environment.""" eval_hparams = trainer_lib.create_hparams(hparams.base_algo_params) env = setup_env( hparams, batch_size=hparams.eval_batch_size, max_num_noops=max_num_noops ) env.start_new_epoch(0) env_fn = rl.make_real_env_fn(env) learner = LEARNERS[hparams.base_algo]( hparams.frame_stack_size, base_event_dir=None, agent_model_dir=agent_model_dir ) learner.evaluate(env_fn, eval_hparams, stochastic) rollouts = env.current_epoch_rollouts() env.close() return tuple( compute_mean_reward(rollouts, clipped) for clipped in (True, False) )
def __init__(self, environment_spec, length, other_hparams): """Batch of environments inside the TensorFlow graph.""" del other_hparams self.length = length initial_frames_problem = environment_spec.initial_frames_problem self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams( FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) _, self.action_shape, self.action_dtype = get_action_space(environment_spec) hparams = HParams(video_num_input_frames= environment_spec.video_num_input_frames, video_num_target_frames= environment_spec.video_num_target_frames, environment_spec=environment_spec) if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=100) else: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length) shape = (self.length, initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path, epoch_data_dir): """Encode all frames from problem_name and write out as ae_problem_name.""" with tf.Graph().as_default(): ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong", problem_name=problem_name) problem = ae_hparams.problem model = registry.model("autoencoder_ordered_discrete")( ae_hparams, tf.estimator.ModeKeys.EVAL) ae_problem = registry.problem(ae_problem_name) ae_training_paths = ae_problem.training_filepaths(epoch_data_dir, 10, True) ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True) skip_train = False skip_eval = False for path in ae_training_paths: if tf.gfile.Exists(path): skip_train = True break for path in ae_eval_paths: if tf.gfile.Exists(path): skip_eval = True break # Encode train data if not skip_train: dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_training_paths) # Encode eval data if not skip_eval: dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_eval_paths)
def __init__(self, processor_configuration): """Creates the Transformer estimator. Args: processor_configuration: A ProcessorConfiguration protobuffer with the transformer fields populated. """ # Do the pre-setup tensor2tensor requires for flags and configurations. transformer_config = processor_configuration["transformer"] FLAGS.output_dir = transformer_config["model_dir"] usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) data_dir = os.path.expanduser(transformer_config["data_dir"]) # Create the basic hyper parameters. self.hparams = trainer_lib.create_hparams( transformer_config["hparams_set"], transformer_config["hparams"], data_dir=data_dir, problem_name=transformer_config["problem"]) decode_hp = decoding.decode_hparams() decode_hp.add_hparam("shards", 1) decode_hp.add_hparam("shard_id", 0) # Create the estimator and final hyper parameters. self.estimator = trainer_lib.create_estimator( transformer_config["model"], self.hparams, t2t_trainer.create_run_config(self.hparams), decode_hparams=decode_hp, use_tpu=False) # Fetch the vocabulary and other helpful variables for decoding. self.source_vocab = self.hparams.problem_hparams.vocabulary["inputs"] self.targets_vocab = self.hparams.problem_hparams.vocabulary["targets"] self.const_array_size = 10000 # Prepare the Transformer's debug data directory. run_dirs = sorted(glob.glob(os.path.join("/tmp/t2t_server_dump", "run_*"))) for run_dir in run_dirs: shutil.rmtree(run_dir)
def testMultipleTargetModalities(self): # Use existing hparams and override target modality. hparams = trainer_lib.create_hparams( "transformer_tiny", data_dir=algorithmic.TinyAlgo.data_dir, problem_name="tiny_algo") # Manually turn off sharing. It is not currently supported for multitargets. hparams.shared_embedding_and_softmax_weights = 0 # pylint: disable=line-too-long hparams.problem_hparams.modality = { "targets": hparams.problem_hparams.modality["targets"], "targets_A": hparams.problem_hparams.modality["targets"], "targets_B": hparams.problem_hparams.modality["targets"], } hparams.problem._hparams = hparams.problem_hparams # Dataset problem = hparams.problem dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, algorithmic.TinyAlgo.data_dir) dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes) features = dataset.make_one_shot_iterator().get_next() features = problem_lib.standardize_shapes(features) features["targets_A"] = features["targets_B"] = features["targets"] # Model model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN) def body(args, mb=model.body): out = mb(args) return {"targets": out, "targets_A": out, "targets_B": out} model.body = body logits, losses = model(features) self.assertTrue("training" in losses) loss = losses["training"] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run([logits, loss])
def train_world_model( env, data_dir, output_dir, hparams, world_model_steps_num, epoch ): """Train the world model on problem_name.""" world_model_steps_num += world_model_step_increment( hparams, is_initial_epoch=(epoch == 0) ) model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) model_hparams.learning_rate = model_hparams.learning_rate_constant if epoch > 0: model_hparams.learning_rate *= hparams.learning_rate_bump train_supervised( problem=env, model_name=hparams.generative_model, hparams=model_hparams, data_dir=data_dir, output_dir=output_dir, train_steps=world_model_steps_num, eval_steps=100, local_eval_frequency=2000 ) return world_model_steps_num
def testMultipleTargetModalities(self): # HParams hparams = trainer_lib.create_hparams( "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo") tm = hparams.problem.get_hparams().target_modality hparams.problem.get_hparams().target_modality = { "targets": tm, "A": tm, "B": tm } # Dataset problem = hparams.problem dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir) dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes) features = dataset.make_one_shot_iterator().get_next() features = problem_lib.standardize_shapes(features) features["A"] = features["B"] = features["targets"] # Model model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN) def body(args, mb=model.body): out = mb(args) return {"targets": out, "A": out, "B": out} model.body = body logits, losses = model(features) self.assertTrue("training" in losses) loss = losses["training"] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run([logits, loss])
def test_no_crash_cartpole(self): hparams = trainer_lib.create_hparams( "discrete_action_base", "epochs_num=11,video_during_eval=False") rl_trainer_lib.train(hparams, "CartPole-v0")
def test_no_crash_pendulum(self): hparams = trainer_lib.create_hparams( "continuous_action_base", "epochs_num=11,video_during_eval=False") rl_trainer_lib.train(hparams, "Pendulum-v0")
def main(_): hparams = registry.hparams(FLAGS.loop_hparams_set) hparams.parse(FLAGS.loop_hparams) output_dir = FLAGS.output_dir subdirectories = ["data", "tmp", "world_model", "ppo"] using_autoencoder = hparams.autoencoder_train_steps > 0 if using_autoencoder: subdirectories.append("autoencoder") directories = setup_directories(output_dir, subdirectories) if hparams.game in gym_env.ATARI_GAMES: game_with_mode = hparams.game + "_deterministic-v4" else: game_with_mode = hparams.game if using_autoencoder: simulated_problem_name = ( "gym_simulated_discrete_problem_with_agent_on_%s_autoencoded" % game_with_mode) else: simulated_problem_name = ("gym_simulated_discrete_problem_with_agent_on_%s" % game_with_mode) if simulated_problem_name not in registry.list_problems(): tf.logging.info("Game Problem %s not found; dynamically registering", simulated_problem_name) gym_env.register_game(hparams.game, game_mode="Deterministic-v4") epoch = hparams.epochs-1 epoch_data_dir = os.path.join(directories["data"], str(epoch)) ppo_model_dir = directories["ppo"] world_model_dir = directories["world_model"] gym_problem = registry.problem(simulated_problem_name) model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) environment_spec = copy.copy(gym_problem.environment_spec) environment_spec.simulation_random_starts = hparams.simulation_random_starts batch_env_hparams = trainer_lib.create_hparams(hparams.ppo_params) batch_env_hparams.add_hparam("model_hparams", model_hparams) batch_env_hparams.add_hparam("environment_spec", environment_spec) batch_env_hparams.num_agents = 1 with temporary_flags({ "problem": simulated_problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, }): sess = tf.Session() env = DebugBatchEnv(batch_env_hparams, sess) sess.run(tf.global_variables_initializer()) env.initialize() env_model_loader = tf.train.Saver( tf.global_variables("next_frame*")) trainer_lib.restore_checkpoint(world_model_dir, env_model_loader, sess, must_restore=True) model_saver = tf.train.Saver( tf.global_variables(".*network_parameters.*")) trainer_lib.restore_checkpoint(ppo_model_dir, model_saver, sess) key_mapping = gym_problem.env.env.get_keys_to_action() # map special codes key_mapping[()] = 100 key_mapping[(ord("r"),)] = 101 key_mapping[(ord("p"),)] = 102 play.play(env, zoom=2, fps=10, keys_to_action=key_mapping)
def main(_): hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) train(hparams, FLAGS.output_dir)
def create_t2t_hparams(): return trainer_lib.create_hparams( FLAGS_hparams_set, FLAGS_hparams, data_dir=os.path.expanduser(FLAGS_data_dir), problem_name=FLAGS_problem)
def create_surrogate_hparams(): return trainer_lib.create_hparams(FLAGS.surrogate_hparams_set, None)
def main(_): hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) rl_trainer_lib.train(hparams, FLAGS.problem, FLAGS.output_dir)