def setUpClass(cls): tmp_dir = tf.test.get_temp_dir() shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) cls.data_dir = tmp_dir # Generate a small test dataset registry.problem("tiny_algo").generate_data(cls.data_dir, None)
def setUpClass(cls): tmp_dir = tf.test.get_temp_dir() shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) # Generate a small test dataset FLAGS.problems = "tiny_algo" TrainerUtilsTest.data_dir = tmp_dir registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir, None)
def generate_data(): # Generate data if requested. data_dir = os.path.expanduser(FLAGS.data_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) tf.gfile.MakeDirs(data_dir) tf.gfile.MakeDirs(tmp_dir) problem_name = get_problem_name() tf.logging.info("Generating data for %s" % problem_name) registry.problem(problem_name).generate_data(data_dir, tmp_dir)
def TestVideoModel(self, in_frames, out_frames, hparams, model, expected_last_dim): x = np.random.random_integers(0, high=255, size=(8, in_frames, 64, 64, 3)) y = np.random.random_integers(0, high=255, size=(8, out_frames, 64, 64, 3)) hparams.video_num_input_frames = in_frames hparams.video_num_target_frames = out_frames problem = registry.problem("video_stochastic_shapes10k") p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = model( hparams, tf.estimator.ModeKeys.TRAIN) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) expected_shape = y.shape + (expected_last_dim,) self.assertEqual(res.shape, expected_shape)
def main(_): tf.gfile.MakeDirs(FLAGS.data_dir) tf.gfile.MakeDirs(FLAGS.tmp_dir) # Create problem if not already defined problem_name = "gym_discrete_problem_with_agent_on_%s" % FLAGS.game if problem_name not in registry.list_problems(): gym_env.register_game(FLAGS.game) # Generate tf.logging.info("Running %s environment for %d steps for trajectories.", FLAGS.game, FLAGS.num_env_steps) problem = registry.problem(problem_name) problem.settable_num_steps = FLAGS.num_env_steps problem.settable_eval_phase = FLAGS.eval problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) # Log stats if problem.statistics.number_of_dones: mean_reward = (problem.statistics.sum_of_rewards / problem.statistics.number_of_dones) tf.logging.info("Mean reward: %.2f, Num dones: %d", mean_reward, problem.statistics.number_of_dones)
def __init__(self, translate_host, translate_port, source_lang, target_lang, model_name, problem, t2t_usr_dir, data_dir, preprocess_cmd, postprocess_cmd): """Initialize a TransformerTranslator object according to the given configuration settings. @param translate_port: the port at which the Moses translator operates @param recase_port: the port at which the recaser operates @param source_lang: source language (ISO-639-1 ID) @param target_lang: target language (ISO-639-1 ID) @param preprocess_cmd: bash command for text preprocessing @param postprocess_cmd: bash command for text posprocessing """ # precompile Tensorflow server addresses self.server = translate_host + ":" + translate_port # initialize text processing tools (can be shared among threads) self.tokenizer = Tokenizer({'lowercase': True, 'moses_escape': True}) self.preprocess = preprocess_cmd self.postprocess = postprocess_cmd usr_dir.import_usr_dir(t2t_usr_dir) self.problem = registry.problem(problem) hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(data_dir)) self.problem.get_hparams(hparams) self.request_fn = serving_utils.make_grpc_request_fn( servable_name=model_name, server=self.server, timeout_secs=30)
def add_problem_hparams(hparams, problem_name): """Add problem hparams for the problems.""" problem = registry.problem(problem_name) p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams
def add_problem_hparams(hparams, problem_name_or_instance): """Add problem hparams for the problems.""" if isinstance(problem_name_or_instance, Problem): problem = problem_name_or_instance else: problem = registry.problem(problem_name_or_instance) p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams
def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] hparams.problem_instances = [] for problem_name in problems.split("-"): problem = registry.problem(problem_name) p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams)
def generate_data_for_registered_problem(problem_name): tf.logging.info("Generating data for %s.", problem_name) if FLAGS.num_shards: raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id problem.generate_data( os.path.expanduser(FLAGS.data_dir), os.path.expanduser(FLAGS.tmp_dir), task_id=task_id)
def init(): # global input_encoder, output_decoder, fname, problem global problem tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("importing ghsumm/trainer from {}".format(t2t_usr_dir)) usr_dir.import_usr_dir(t2t_usr_dir) print(t2t_usr_dir) problem = registry.problem(problem_name) hparams = tf.contrib.training.HParams(data_dir=os.path.expanduser(data_dir)) problem.get_hparams(hparams)
def score_file(filename): """Score each line in a file and return the scores.""" # Prepare model. hparams = create_hparams() encoders = registry.problem(FLAGS.problem).feature_encoders(FLAGS.data_dir) has_inputs = "inputs" in encoders # Prepare features for feeding into the model. if has_inputs: inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. features = { "inputs": batch_inputs, "targets": batch_targets, } if has_inputs else {"targets": batch_targets} # Prepare the model and the graph when model runs on features. model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL) _, losses = model(features) saver = tf.train.Saver() with tf.Session() as sess: # Load weights from checkpoint. ckpts = tf.train.get_checkpoint_state(FLAGS.output_dir) ckpt = ckpts.model_checkpoint_path saver.restore(sess, ckpt) # Run on each line. with tf.gfile.Open(filename) as f: lines = f.readlines() results = [] for line in lines: tab_split = line.split("\t") if len(tab_split) > 2: raise ValueError("Each line must have at most one tab separator.") if len(tab_split) == 1: targets = tab_split[0].strip() else: targets = tab_split[1].strip() inputs = tab_split[0].strip() # Run encoders and append EOS symbol. targets_numpy = encoders["targets"].encode( targets) + [text_encoder.EOS_ID] if has_inputs: inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID] # Prepare the feed. feed = { inputs_ph: inputs_numpy, targets_ph: targets_numpy } if has_inputs else {targets_ph: targets_numpy} # Get the score. np_loss = sess.run(losses["training"], feed) results.append(np_loss) return results
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = create_hparams() hparams.force_full_predict = True batch_size = hparams.batch_size # Iterating over dev/test partition of the data. # Change the data partition if necessary. dataset = registry.problem(FLAGS.problem).dataset( tf.estimator.ModeKeys.PREDICT, shuffle_files=False, hparams=hparams) dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) data = dataset.make_one_shot_iterator().get_next() input_data = dict((k, data[k]) for k in data.keys() if k.startswith("input")) # Creat model model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT) prediction_ops = model.infer(input_data) # Confusion Matrix nr = hparams.problem.num_rewards cm_per_frame = np.zeros((nr, nr), dtype=np.uint64) cm_next_frame = np.zeros((nr, nr), dtype=np.uint64) saver = tf.train.Saver() with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state(FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) counter = 0 while not sess.should_stop(): counter += 1 if counter % 1 == 0: print(counter) # Predict next frames rew_pd, rew_gt = sess.run( [prediction_ops["target_reward"], data["target_reward"]]) for i in range(batch_size): cm_next_frame[rew_gt[i, 0, 0], rew_pd[i, 0, 0]] += 1 for gt, pd in zip(rew_gt[i], rew_pd[i]): cm_per_frame[gt, pd] += 1 print_confusion_matrix("Per-frame Confusion Matrix", cm_per_frame) print_confusion_matrix("Next-frame Confusion Matrix", cm_next_frame)
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.generate_data: t2t_trainer.generate_data() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) pruning_params = create_pruning_params() pruning_strategy = create_pruning_strategy(pruning_params.strategy) config = t2t_trainer.create_run_config(hparams) params = {"batch_size": hparams.batch_size} # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem) input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL, hparams) dataset = input_fn(params, config).repeat() features, labels = dataset.make_one_shot_iterator().get_next() sess = tf.Session() model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams, use_tpu=FLAGS.use_tpu) spec = model_fn( features, labels, tf.estimator.ModeKeys.EVAL, params=hparams, config=config) # Restore weights saver = tf.train.Saver() checkpoint_path = os.path.expanduser(FLAGS.output_dir or FLAGS.checkpoint_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) def eval_model(): preds = spec.predictions["predictions"] preds = tf.argmax(preds, -1, output_type=labels.dtype) _, acc_update_op = tf.metrics.accuracy(labels=labels, predictions=preds) sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc pruning_utils.sparsify(sess, eval_model, pruning_strategy, pruning_params)
def train_eval_and_decode(self): """Does eval and decode after training every eval_freq_in_steps.""" eval_steps = self._hparams.eval_freq_in_steps packed_dataset = "_packed" in self._hparams.problem.name mlperf_log.transformer_print(key=mlperf_log.TRAIN_LOOP) for i in range(0, self._train_spec.max_steps, eval_steps): mlperf_log.transformer_print( key=mlperf_log.TRAIN_EPOCH, value=i // eval_steps) if packed_dataset and i > 0: problem = registry.problem(self._hparams.problem.name + "_packed") p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams self._estimator.train( self._train_spec.input_fn, steps=eval_steps, hooks=self._train_spec.hooks) self._estimator.evaluate( self._eval_spec.input_fn, steps=self._eval_spec.steps, hooks=self._eval_spec.hooks) if packed_dataset: problem = registry.problem( self._hparams.problem.name.replace("_packed", "")) p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams mlperf_log.transformer_print(key=mlperf_log.EVAL_START) if self._hparams.mlperf_mode: self._decode_hparams.mlperf_decode_step = i + eval_steps self.decode(dataset_split=tf.estimator.ModeKeys.EVAL) d_hparams = self._decode_hparams if self._hparams.mlperf_mode and d_hparams.mlperf_success: mlperf_log.transformer_print( key=mlperf_log.RUN_STOP, value={"success": "true"}) break d_hparams = self._decode_hparams if self._hparams.mlperf_mode and not d_hparams.mlperf_success: mlperf_log.transformer_print( key=mlperf_log.RUN_STOP, value={"success": "false"})
def get_environment_spec(self): env_spec = standard_atari_env_spec(self.env_name) env_spec.simulated_env = True env_spec.add_hparam("simulation_random_starts", self.simulation_random_starts) env_spec.add_hparam("intrinsic_reward_scale", self.intrinsic_reward_scale) initial_frames_problem = registry.problem(self.initial_frames_problem) env_spec.add_hparam("initial_frames_problem", initial_frames_problem) env_spec.add_hparam("video_num_input_frames", self.num_input_frames) env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames) return env_spec
def translate(self, inputs): # Registrierung der Problem-Klasse problem = registry.problem(self.problem) # Instanziierung des HPrams-Objekts hparams = HParams(data_dir=os.path.expanduser(self.data_dir)) problem.get_hparams(hparams) request_fn = self.make_request_fn() inputs = inputs # Prediction outputs = serving_utils.predict([inputs], problem, request_fn) outputs, = outputs output, score = outputs return {'inputs': inputs, 'outputs': output, 'scores': score}
def train_eval_and_decode(self): """Does eval and decode after training every eval_freq_in_steps.""" eval_steps = self._hparams.eval_freq_in_steps packed_dataset = "_packed" in self._hparams.problem.name mlperf_log.transformer_print(key=mlperf_log.TRAIN_LOOP) for i in range(0, self._train_spec.max_steps, eval_steps): mlperf_log.transformer_print( key=mlperf_log.TRAIN_EPOCH, value=i // eval_steps) if packed_dataset and i > 0: problem = registry.problem(self._hparams.problem.name + "_packed") p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams self._estimator.train( self._train_spec.input_fn, steps=eval_steps, hooks=self._train_spec.hooks) self._estimator.evaluate( self._eval_spec.input_fn, steps=self._eval_spec.steps, hooks=self._eval_spec.hooks) if packed_dataset: problem = registry.problem( self._hparams.problem.name.replace("_packed", "")) p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams mlperf_log.transformer_print(key=mlperf_log.EVAL_START) self.decode(dataset_split=tf.estimator.ModeKeys.EVAL) d_hparams = self._decode_hparams if d_hparams.mlperf_mode and d_hparams.mlperf_success: mlperf_log.transformer_print( key=mlperf_log.RUN_STOP, value={"success": "true"}) break d_hparams = self._decode_hparams if d_hparams.mlperf_mode and not d_hparams.mlperf_success: mlperf_log.transformer_print( key=mlperf_log.RUN_STOP, value={"success": "false"})
def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Generate data if requested. data_dir = os.path.expanduser(FLAGS.data_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) problem_name = FLAGS.problems tf.logging.info("Generating data for %s" % problem_name) problem = registry.problem(problem_name) length = problem.get_length(data_dir, tmp_dir) length_statistics(length)
def __init__(self): tf.logging.set_verbosity(tf.logging.INFO) validate_flags() usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) self.problem = registry.problem(FLAGS.problem) self.hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(FLAGS.data_dir)) self.problem.get_hparams(self.hparams) self.request_fn = make_request_fn() self.tokenizer = MosesTokenizer('en') self.moses_detokenizer = MosesDetokenizer('zh') self.delimiter = re.compile( "(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s")
def __init__(self, batch_env): super(AutoencoderWrapper, self).__init__(batch_env) self._observ = tf.Variable( tf.zeros((len(self),) + self.observ_shape, self.observ_dtype), trainable=False) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): autoencoder_hparams = autoencoders.autoencoder_discrete_pong() problem = registry.problem("dummy_autoencoder_problem") autoencoder_hparams.problem_hparams = problem.get_hparams( autoencoder_hparams) autoencoder_hparams.problem = problem self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete( autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
def query_t2t(input_txt, data_dir, problem_name, server_name, server_address, t2t_usr_dir): usr_dir.import_usr_dir(t2t_usr_dir) problem = registry.problem(problem_name) hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(data_dir)) problem.get_hparams(hparams) request_fn = make_request_fn(server_name, server_address) inputs = input_txt outputs = serving_utils.predict([inputs], problem, request_fn) output, score = outputs print(output) return output, score
def get_environment_spec(self): env_spec = standard_atari_env_spec(self.env_name) env_spec.wrappers = [[tf_atari_wrappers.IntToBitWrapper, {}]] env_spec.simulated_env = True env_spec.add_hparam("simulation_random_starts", False) env_spec.add_hparam("intrinsic_reward_scale", 0.0) initial_frames_problem = registry.problem(self.initial_frames_problem) env_spec.add_hparam("initial_frames_problem", initial_frames_problem) env_spec.add_hparam("video_num_input_frames", self.num_input_frames) env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames) return env_spec
def get_data_filepatterns(problems, data_dir, mode): """Return the location of a dataset for a given mode.""" datasets = [] for problem in problems.split("-"): try: problem = registry.problem(problem).dataset_filename() except ValueError: problem, _, _ = problem_hparams.parse_problem_name(problem) path = os.path.join(data_dir, problem) if mode == tf.estimator.ModeKeys.TRAIN: datasets.append("%s-train*" % path) else: datasets.append("%s-dev*" % path) return datasets
def init(): global input_encoder, output_decoder, fname tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info( "Trying to import poetry/trainer from {}".format(t2t_usr_dir)) usr_dir.import_usr_dir(t2t_usr_dir) print(t2t_usr_dir) problem = registry.problem(problem_name) hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(data_dir)) problem.get_hparams(hparams) fname = "inputs" if problem.has_inputs else "targets" input_encoder = problem.feature_info[fname].encoder output_decoder = problem.feature_info["targets"].encoder
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, epoch=0, is_final_epoch=False): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_params_names = [ "epochs_num", "epoch_length", "learning_rate", "num_agents", "optimization_epochs" ] for param_name in ppo_params_names: ppo_param_name = "ppo_" + param_name if ppo_param_name in hparams: ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name)) ppo_epochs_num = hparams.ppo_epochs_num if is_final_epoch: ppo_epochs_num *= 2 ppo_hparams.epoch_length *= 2 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.world_model_dir = world_model_dir ppo_hparams.add_hparam("force_beginning_resets", True) # Adding model hparams for model specific adjustments model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) ppo_hparams.add_hparam("model_hparams", model_hparams) environment_spec = copy.copy(gym_problem.environment_spec) environment_spec.simulation_random_starts = hparams.simulation_random_starts environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.add_hparam("environment_spec", environment_spec) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, }): rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
def get_environment_spec(self): env_spec = standard_atari_env_spec(self.env_name) env_spec.simulated_env = True env_spec.add_hparam("simulation_random_starts", False) env_spec.add_hparam("simulation_flip_first_random_for_beginning", False) env_spec.add_hparam("intrinsic_reward_scale", 0.0) initial_frames_problem = registry.problem(self.initial_frames_problem) env_spec.add_hparam("initial_frames_problem", initial_frames_problem) env_spec.add_hparam("video_num_input_frames", self.num_input_frames) env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames) return env_spec
def testSingleEvalStepRawSession(self): """Illustrate how to run a T2T model in a raw session.""" # Set model name, hparams, problems as would be set on command line. model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) trainer_utils.add_problem_hparams(hparams, FLAGS.problems) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. # In INFER mode targets can be None. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. features = { "inputs": batch_inputs, "targets": batch_targets, "problem_choice": tf.constant(0), # We run on the first problem here. "input_space_id": tf.constant(hparams.problems[0].input_space_id), "target_space_id": tf.constant(hparams.problems[0].target_space_id) } # Now set a mode and create the graph by invoking model_fn. mode = tf.estimator.ModeKeys.EVAL estimator_spec = model_builder.model_fn( model_name, features, mode, hparams, problem_names=[FLAGS.problems]) predictions_dict = estimator_spec.predictions predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. predictions_dict["predictions"], axis=[2, 3]) # Having the graph, let's run it on some data. with self.test_session() as sess: sess.run(tf.global_variables_initializer()) inputs = "0 1 0" targets = "0 1 0" # Encode from raw string to numpy input array using problem encoders. inputs_numpy = encoders["inputs"].encode(inputs) targets_numpy = encoders["targets"].encode(targets) # Feed the encoded inputs and targets and run session. feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} np_predictions = sess.run(predictions, feed) # Check that the result has the correct shape: batch x length x vocab_size # where, for us, batch = 1, length = 3, vocab_size = 4. self.assertEqual(np_predictions.shape, (1, 3, 4))
def testSingleTrainStepCall(self): """Illustrate how to run a T2T model in a raw session.""" # Set model name, hparams, problems as would be set on command line. model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) trainer_utils.add_problem_hparams(hparams, FLAGS.problems) # Now set a mode and create the model. mode = tf.estimator.ModeKeys.TRAIN model = registry.model(model_name)(hparams, mode) # Create placeholder for features and make them batch-sized. inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. features = { "inputs": batch_inputs, "targets": batch_targets, "target_space_id": tf.constant(hparams.problems[0].target_space_id) } # Call the model. predictions, _ = model(features) nvars = len(tf.trainable_variables()) model(features) # Call again and check that reuse works. self.assertEqual(nvars, len(tf.trainable_variables())) # Having the graph, let's run it on some data. with self.test_session() as sess: sess.run(tf.global_variables_initializer()) inputs = "0 1 0" targets = "0 1 0" # Encode from raw string to numpy input array using problem encoders. inputs_numpy = encoders["inputs"].encode(inputs) targets_numpy = encoders["targets"].encode(targets) # Feed the encoded inputs and targets and run session. feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} np_predictions = sess.run(predictions, feed) # Check that the result has the correct shape: batch x length x vocab_size # where, for us, batch = 1, length = 3, vocab_size = 4. self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4))
def decode(estimator, hparams, decode_hp): """Decode from estimator. Interactive, from file, or from dataset.""" if FLAGS.decode_interactive: if estimator.config.use_tpu: raise ValueError("TPU can only decode from dataset.") decoding.decode_interactively(estimator, hparams, decode_hp, checkpoint_path=FLAGS.checkpoint_path) elif FLAGS.decode_from_file: if estimator.config.use_tpu: raise ValueError("TPU can only decode from dataset.") decoding.decode_from_file(estimator, FLAGS.decode_from_file, hparams, decode_hp, FLAGS.decode_to_file, checkpoint_path=FLAGS.checkpoint_path) if FLAGS.checkpoint_path and FLAGS.keep_timestamp: ckpt_time = os.path.getmtime(FLAGS.checkpoint_path + ".index") os.utime(FLAGS.decode_to_file, (ckpt_time, ckpt_time)) else: # Fathom predictions = decoding.decode_from_dataset( estimator, FLAGS.problem, hparams, decode_hp, decode_to_file=FLAGS.decode_to_file, dataset_split=dataset_to_t2t_mode(FLAGS.dataset_split), return_generator=FLAGS.fathom_output_predictions, # save logs/summaries to a directory with the same name as decode_output_file # in situations where we are calling decode without write permissions # to the model directory output_dir=os.path.splitext(FLAGS.decode_output_file)[0]) # Fathom if FLAGS.fathom_output_predictions: print('Assuming only one problem...') assert '-' not in FLAGS.problems # if we already have built problem instance in hparams, no need to create # it second time (as it's downloading files from gcs) if hasattr(hparams, 'problem'): problem = hparams.problem else: problem = registry.problem(FLAGS.problems) problem.output_predictions(predictions=predictions, num_examples=FLAGS.num_examples)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) validate_flags() usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) problem = registry.problem(FLAGS.problem) hparams = hparam.HParams(data_dir=os.path.expanduser(FLAGS.data_dir)) problem.get_hparams(hparams) request_fn = make_request_fn() while True: inputs = FLAGS.inputs_once if FLAGS.inputs_once else input(">> ") t1 = datetime.datetime.now() outputs = serving_utils.predict([inputs], problem, request_fn) t2 = datetime.datetime.now() time_taken_for_response = int((t2 - t1).total_seconds() * 1000) #print("time:", time_taken_for_response) outputs, = outputs output, score = outputs if len(score.shape) > 0: # pylint: disable=g-explicit-length-test print_str = """ Input: {inputs} Output (Scores [{score}]) (Time [{time}] milliseconds): {output} """ #time_taken_for_response = (t2 - t1) / 1000.0 score_text = ",".join(["{:.3f}".format(s) for s in score]) print( print_str.format(inputs=inputs, output=output, score=score_text, time=time_taken_for_response)) else: print_str = """ Input: {inputs} Output (Score {score:.3f}) (Time {time} milliseconds): {output} """ #time_taken_for_response = (t2 - t1) / 1000.0 print( print_str.format(inputs=inputs, output=output, score=score, time=time_taken_for_response)) if FLAGS.inputs_once: break
def testSingleEvalStepRawSession(self): """Illustrate how to run a T2T model in a raw session.""" # Set model name, hparams, problems as would be set on command line. model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) trainer_utils.add_problem_hparams(hparams, FLAGS.problems) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. # In INFER mode targets can be None. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. features = { "inputs": batch_inputs, "targets": batch_targets, "problem_choice": 0, # We run on the first problem here. "input_space_id": hparams.problems[0].input_space_id, "target_space_id": hparams.problems[0].target_space_id } # Now set a mode and create the graph by invoking model_fn. mode = tf.estimator.ModeKeys.EVAL estimator_spec = model_builder.model_fn( model_name, features, mode, hparams, problem_names=[FLAGS.problems]) predictions_dict = estimator_spec.predictions predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. predictions_dict["predictions"], axis=[2, 3]) # Having the graph, let's run it on some data. with self.test_session() as sess: sess.run(tf.global_variables_initializer()) inputs = "0 1 0" targets = "0 1 0" # Encode from raw string to numpy input array using problem encoders. inputs_numpy = encoders["inputs"].encode(inputs) targets_numpy = encoders["targets"].encode(targets) # Feed the encoded inputs and targets and run session. feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} np_predictions = sess.run(predictions, feed) # Check that the result has the correct shape: batch x length x vocab_size # where, for us, batch = 1, length = 3, vocab_size = 4. self.assertEqual(np_predictions.shape, (1, 3, 4))
def testSingleTrainStepCall(self): """Illustrate how to run a T2T model in a raw session.""" # Set model name, hparams, problems as would be set on command line. model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) trainer_utils.add_problem_hparams(hparams, FLAGS.problems) # Now set a mode and create the model. mode = tf.estimator.ModeKeys.TRAIN model = registry.model(model_name)(hparams, mode) # Create placeholder for features and make them batch-sized. inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. features = { "inputs": batch_inputs, "targets": batch_targets, "target_space_id": tf.constant(hparams.problems[0].target_space_id) } # Call the model. predictions, _ = model(features) nvars = len(tf.trainable_variables()) model(features) # Call again and check that reuse works. self.assertEqual(nvars, len(tf.trainable_variables())) # Having the graph, let's run it on some data. with self.test_session() as sess: sess.run(tf.global_variables_initializer()) inputs = "0 1 0" targets = "0 1 0" # Encode from raw string to numpy input array using problem encoders. inputs_numpy = encoders["inputs"].encode(inputs) targets_numpy = encoders["targets"].encode(targets) # Feed the encoded inputs and targets and run session. feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} np_predictions = sess.run(predictions, feed) # Check that the result has the correct shape: batch x length x vocab_size # where, for us, batch = 1, length = 3, vocab_size = 4. self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4))
def get_predictions(self, num_decodes=2): rng = np.random.RandomState(0) # num_samples=4 inputs = rng.randint(0, 255, (4, 2, 64, 64, 3)) outputs = rng.randint(0, 255, (4, 5, 64, 64, 3)) targets = rng.randint(0, 255, (4, 5, 64, 64, 3)) predictions = [] for input_, output, target in zip(inputs, outputs, targets): curr_pred = {"inputs": input_, "outputs": output, "targets": target} predictions.append(curr_pred) # num_decodes=2 predictions = [predictions] * num_decodes problem = registry.problem("video_stochastic_shapes10k") return predictions, problem
def train_eval_and_decode(self): """Does eval and decode after training every eval_freq_in_steps.""" eval_steps = self._hparams.eval_freq_in_steps packed_dataset = "_packed" in self._hparams.problem.name for i in range(0, self._train_spec.max_steps, eval_steps): if packed_dataset and i > 0: problem = registry.problem(self._hparams.problem.name + "_packed") p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams self._estimator.train(self._train_spec.input_fn, steps=eval_steps, hooks=self._train_spec.hooks) self._estimator.evaluate(self._eval_spec.input_fn, steps=self._eval_spec.steps, hooks=self._eval_spec.hooks) if packed_dataset: problem = registry.problem( self._hparams.problem.name.replace("_packed", "")) p_hparams = problem.get_hparams(self._hparams) self._hparams.problem = problem self._hparams.problem_hparams = p_hparams self.decode(dataset_split=tf.estimator.ModeKeys.EVAL)
def evaluate_world_model(simulated_problem_name, problem_name, hparams, world_model_dir, epoch_data_dir, tmp_dir, autoencoder_path=None): """Generate simulated environment data and return reward accuracy.""" gym_simulated_problem = registry.problem(simulated_problem_name) gym_problem = registry.problem(problem_name) sim_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.settable_num_steps = sim_steps gym_simulated_problem.real_env_problem = gym_problem gym_simulated_problem.simulation_random_starts = False gym_simulated_problem.intrinsic_reward_scale = 0. with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "data_dir": epoch_data_dir, "output_dir": world_model_dir, "autoencoder_path": autoencoder_path, }): gym_simulated_problem.generate_data(epoch_data_dir, tmp_dir) n = max(1., gym_simulated_problem.dones) model_reward_accuracy = ( gym_simulated_problem.successful_episode_reward_predictions / float(n)) return model_reward_accuracy
def get_environment_spec(self): env_spec = standard_atari_env_spec( self.env_name, simulated=True, resize_height_factor=self.resize_height_factor, resize_width_factor=self.resize_width_factor) env_spec.add_hparam("simulation_random_starts", True) env_spec.add_hparam("simulation_flip_first_random_for_beginning", True) env_spec.add_hparam("intrinsic_reward_scale", 0.0) initial_frames_problem = registry.problem(self.initial_frames_problem) env_spec.add_hparam("initial_frames_problem", initial_frames_problem) env_spec.add_hparam("video_num_input_frames", self.num_input_frames) env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames) return env_spec
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, autoencoder_path=None, epoch=0): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.epochs_num = ppo_epochs_num ppo_hparams.simulated_environment = True ppo_hparams.simulation_random_starts = hparams.simulation_random_starts ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.eval_every_epochs = 50 ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.epoch_length = hparams.ppo_epoch_length ppo_hparams.num_agents = hparams.ppo_num_agents ppo_hparams.problem = gym_problem ppo_hparams.world_model_dir = world_model_dir if hparams.ppo_learning_rate: ppo_hparams.learning_rate = hparams.ppo_learning_rate # 4x for the StackAndSkipWrapper minus one to always finish for reporting. ppo_time_limit = (ppo_hparams.epoch_length - 1) * 4 in_graph_wrappers = [(TimeLimitWrapper, { "timelimit": ppo_time_limit }), (StackAndSkipWrapper, { "skip": 4 })] in_graph_wrappers += gym_problem.in_graph_wrappers ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, "autoencoder_path": autoencoder_path, }): rl_trainer_lib.train(ppo_hparams, gym_problem.env_name, event_dir, agent_model_dir, epoch=epoch)
def get_environment_spec(self): env_spec = rl.standard_atari_env_spec(self.env_name) env_spec.wrappers = [ [tf_atari_wrappers.IntToBitWrapper, {}], [tf_atari_wrappers.StackWrapper, {"history": 4}] ] env_spec.simulated_env = True env_spec.add_hparam("simulation_random_starts", True) env_spec.add_hparam("simulation_flip_first_random_for_beginning", True) env_spec.add_hparam("intrinsic_reward_scale", 0.0) initial_frames_problem = registry.problem(self.initial_frames_problem) env_spec.add_hparam("initial_frames_problem", initial_frames_problem) env_spec.add_hparam("video_num_input_frames", self.num_input_frames) env_spec.add_hparam("video_num_target_frames", self.video_num_target_frames) return env_spec
def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] hparams.problem_instances = [] for problem_name in problems.split("-"): try: problem = registry.problem(problem_name) p_hparams = problem.internal_hparams(hparams) except ValueError: problem = None p_hparams = problem_hparams.problem_hparams(problem_name, hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) return hparams
def __init__(self, FLAGS, server_address='127.0.0.1:9000'): print('Initializing up2down_class.......') self.FLAGS = FLAGS self.server_address = server_address tf.logging.set_verbosity(tf.logging.ERROR) usr_dir.import_usr_dir(self.FLAGS.t2t_usr_dir) # hparams: not important but necessary, an assertion error will be raised without hparams. self.hparams = hparam.HParams( data_dir=os.path.expanduser(self.FLAGS.t2t_usr_dir)) # problem self.problem = registry.problem(self.FLAGS.problem) self.problem.get_hparams(self.hparams) # model request server self.request_fn = self.make_request_fn(self.FLAGS.model, self.server_address)
def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] hparams.problem_instances = [] for problem_name in problems.split("-"): try: problem = registry.problem(problem_name) except LookupError: all_problem_names = sorted(registry.list_problems()) error_lines = ["%s not in the set of supported problems:" % problem_name ] + all_problem_names error_msg = "\n * ".join(error_lines) raise LookupError(error_msg) p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams)
def _add_problem_hparams(self, hparams, problem_name): """Add problem hparams for the problems. This method corresponds to create_hparams() in tensor2tensor's trainer_lib module, but replaces the feature encoders with DummyFeatureEncoder's. Args: hparams (Hparams): Model hyper parameters. problem_name (string): T2T problem name. Returns: hparams object. Raises: LookupError if the problem name is not in the registry or uses the old style problem_hparams. """ if self.pop_id >= 0: try: hparams.add_hparam("pop_id", self.pop_id) except: if hparams.pop_id != self.pop_id: logging.warn("T2T pop_id does not match (%d!=%d)" % (hparams.pop_id, self.pop_id)) try: hparams.add_hparam("max_terminal_id", self.max_terminal_id) except: if hparams.max_terminal_id != self.max_terminal_id: logging.warn("T2T max_terminal_id does not match (%d!=%d)" % (hparams.max_terminal_id, self.max_terminal_id)) try: hparams.add_hparam("closing_bracket_id", self.pop_id) except: if hparams.closing_bracket_id != self.pop_id: logging.warn("T2T closing_bracket_id does not match (%d!=%d)" % (hparams.closing_bracket_id, self.pop_id)) problem = registry.problem(problem_name) problem._encoders = { "inputs": DummyTextEncoder(vocab_size=self.src_vocab_size), "targets": DummyTextEncoder(vocab_size=self.trg_vocab_size) } p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams return hparams
def generate_real_env_data(problem_name, agent_policy_path, hparams, data_dir, tmp_dir, autoencoder_path=None, eval_phase=False): """Run the agent against the real environment and return mean reward.""" tf.gfile.MakeDirs(data_dir) with temporary_flags({ "problem": problem_name, "agent_policy_path": agent_policy_path, "autoencoder_path": autoencoder_path, "only_use_ae_for_policy": True, }): gym_problem = registry.problem(problem_name) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps gym_problem.eval_phase = eval_phase gym_problem.generate_data(data_dir, tmp_dir) mean_reward = gym_problem.sum_of_rewards / (1.0 + gym_problem.dones) return mean_reward
def _create_hparams(self, src_vocab_size, trg_vocab_size, hparams_set_name, problem_name): """Creates hparams object. This method corresponds to create_hparams() in tensor2tensor's trainer_utils module, but replaces the feature encoders with DummyFeatureEncoder's. Args: src_vocab_size (int): Source vocabulary size. trg_vocab_size (int): Target vocabulary size. hparams_set_name (string): T2T hparams set name. problem_name (string): T2T problem name. Returns: hparams object. Raises: LookupError if the problem name is not in the registry or uses the old style problem_hparams. """ hparams = registry.hparams(hparams_set_name)() problem = registry.problem(problem_name) # The following hack is necessary to prevent the problem from creating # the default TextEncoders, which would fail due to the lack of a # vocabulary file. problem._encoders = { "inputs": DummyTextEncoder(vocab_size=src_vocab_size), "targets": DummyTextEncoder(vocab_size=trg_vocab_size) } try: hparams.add_hparam("max_terminal_id", self.max_terminal_id) except: if hparams.max_terminal_id != self.max_terminal_id: logging.warn("T2T max_terminal_id does not match (%d!=%d)" % (hparams.max_terminal_id, self.max_terminal_id)) try: hparams.add_hparam("closing_bracket_id", self.pop_id) except: if hparams.closing_bracket_id != self.pop_id: logging.warn("T2T closing_bracket_id does not match (%d!=%d)" % (hparams.closing_bracket_id, self.pop_id)) p_hparams = problem.get_hparams(hparams) hparams.problem_instances = [problem] hparams.problems = [p_hparams] return hparams
def _add_problem_hparams(self, hparams, problem_name): """Add problem hparams for the problems. This method corresponds to create_hparams() in tensor2tensor's trainer_lib module, but replaces the feature encoders with DummyFeatureEncoder's. Args: hparams (Hparams): Model hyper parameters. problem_name (string): T2T problem name. Returns: hparams object. Raises: LookupError if the problem name is not in the registry or uses the old style problem_hparams. """ if self.pop_id >= 0: try: hparams.add_hparam("pop_id", self.pop_id) except: if hparams.pop_id != self.pop_id: logging.warn("T2T pop_id does not match (%d!=%d)" % (hparams.pop_id, self.pop_id)) try: hparams.add_hparam("max_terminal_id", self.max_terminal_id) except: if hparams.max_terminal_id != self.max_terminal_id: logging.warn("T2T max_terminal_id does not match (%d!=%d)" % (hparams.max_terminal_id, self.max_terminal_id)) try: hparams.add_hparam("closing_bracket_id", self.pop_id) except: if hparams.closing_bracket_id != self.pop_id: logging.warn("T2T closing_bracket_id does not match (%d!=%d)" % (hparams.closing_bracket_id, self.pop_id)) problem = registry.problem(problem_name) problem._encoders = { "inputs": DummyTextEncoder(vocab_size=self.src_vocab_size), "targets": DummyTextEncoder(vocab_size=self.trg_vocab_size) } p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams return hparams
def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] hparams.problem_instances = [] for problem_name in problems.split("-"): try: problem = registry.problem(problem_name) except LookupError: all_problem_names = sorted(registry.list_problems()) error_lines = ["%s not in the set of supported problems:" % problem_name ] + all_problem_names error_msg = "\n * ".join(error_lines) raise LookupError(error_msg) p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams)
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path, epoch_data_dir): """Encode all frames from problem_name and write out as ae_problem_name.""" with tf.Graph().as_default(): ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong", problem_name=problem_name) problem = ae_hparams.problem model = registry.model("autoencoder_ordered_discrete")( ae_hparams, tf.estimator.ModeKeys.EVAL) ae_problem = registry.problem(ae_problem_name) ae_training_paths = ae_problem.training_filepaths( epoch_data_dir, 10, True) ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True) skip_train = False skip_eval = False for path in ae_training_paths: if tf.gfile.Exists(path): skip_train = True break for path in ae_eval_paths: if tf.gfile.Exists(path): skip_eval = True break # Encode train data if not skip_train: dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_training_paths) # Encode eval data if not skip_eval: dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_eval_paths)
def __init__(self, config): os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_DEVICE FLAGS.data_dir = config.VOCAB_DIR FLAGS.problems = config.PROBLEM_NAME FLAGS.model = config.MODEL_NAME FLAGS.hparams_set = config.HPARAMS_SET FLAGS.output_dir = config.MODEL_DIR FLAGS.decode_hparams = config.DECODE_HPARAMS batch_size = config.BATCH_SIZE self.hparams = create_hparams() self.encoders = registry.problem(FLAGS.problems).feature_encoders( FLAGS.data_dir) self.ckpt = tf.train.get_checkpoint_state( FLAGS.output_dir).model_checkpoint_path self.inputs_ph = tf.placeholder( shape=(batch_size, None), dtype=tf.int32) # Just length dimension. self.batch_inputs = tf.reshape(self.inputs_ph, [batch_size, -1, 1, 1]) # Make it 4D. self.features = {"inputs": self.batch_inputs} # Prepare the model and the graph when model runs on features. tf.logging.info(f"[{file_name}] SessFieldPredict: register T2TModel") self.model = registry.model(FLAGS.model)(self.hparams, tf.estimator.ModeKeys.PREDICT) self.model_spec = self.model.estimator_spec_predict(self.features) self.prediction = self.model_spec.predictions self.inputs_vocab = self.hparams.problems[0].vocabulary["inputs"] self.targets_vocab = self.hparams.problems[0].vocabulary["targets"] self.problem_name = FLAGS.problems gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.GPU_MEM_FRAC) self.sess_config = tf.ConfigProto(gpu_options=gpu_options) self.sess_config.gpu_options.allow_growth = config.GPU_MEM_GROWTH self.batch_size = batch_size tf.logging.info(f"[{file_name}] SessFieldPredict: registered") self.sess = tf.Session(config=self.sess_config) saver = tf.train.Saver() tf.logging.info(f"[{file_name}] Decode: model loading ... ") saver.restore(self.sess, self.ckpt) tf.logging.info(f"[{file_name}] Decode: model loaded.")
def __init__(self, model_name, problem_name, hparams_set, queries, output_dir=None, data_dir=None, model_dir=None, tmp_dir=None, export_dir=None, decode_hparams="", default_tmp=None, tfms_path=None, mode="train"): self.model_name = model_name self.model = registry.model(model_name) self.problem_name = problem_name self.hparams_set = hparams_set self.queries = queries self.problem = registry.problem(self.problem_name) self.problem.mode = mode tmp = tempfile.mkdtemp() if default_tmp is None else default_tmp self.output_dir = output_dir if output_dir is not None else tmp self.data_dir = data_dir if data_dir is not None else tmp self.model_dir = model_dir if model_dir is not None else tmp self.tmp_dir = tmp_dir if tmp_dir is not None else tmp self.export_dir = (export_dir if export_dir is not None else tmp + "/export") self.decode_hparams = decode_hparams # HACK self.tf_model_server_path = tfms_path self.train_dataset = None self.has_run_datagen = False self._lookup_hparams()
def generate_real_env_data(problem_name, agent_policy_path, hparams, data_dir, tmp_dir, autoencoder_path=None, eval_phase=False): """Run the agent against the real environment and return mean reward.""" tf.gfile.MakeDirs(data_dir) with temporary_flags({ "problem": problem_name, "agent_policy_path": agent_policy_path, "autoencoder_path": autoencoder_path, "only_use_ae_for_policy": True, }): gym_problem = registry.problem(problem_name) gym_problem.settable_num_steps = hparams.true_env_generator_num_steps gym_problem.eval_phase = eval_phase gym_problem.generate_data(data_dir, tmp_dir) mean_reward = gym_problem.statistics.sum_of_rewards / \ (1.0 + gym_problem.statistics.number_of_dones) return mean_reward
def _testImg2imgTransformer(self, net): batch_size = 3 hparams = image_transformer_2d.img2img_transformer2d_tiny() hparams.data_dir = "" p_hparams = registry.problem("image_celeba").get_hparams(hparams) inputs = np.random.random_integers(0, high=255, size=(3, 4, 4, 3)) targets = np.random.random_integers(0, high=255, size=(3, 8, 8, 3)) with self.test_session() as session: features = { "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } model = net(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size, 8, 8, 3, 256))
def testMultiModel(self): x = np.random.random_integers(0, high=255, size=(3, 5, 5, 3)) y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1)) hparams = multimodel.multimodel_tiny() hparams.add_hparam("data_dir", "") problem = registry.problem("image_cifar10") p_hparams = problem.get_hparams(hparams) hparams.problems = [p_hparams] with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } model = multimodel.MultiModel( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 1, 1, 1, 10))
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path, epoch_data_dir): """Encode all frames from problem_name and write out as ae_problem_name.""" with tf.Graph().as_default(): ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong", problem_name=problem_name) problem = ae_hparams.problem model = registry.model("autoencoder_ordered_discrete")( ae_hparams, tf.estimator.ModeKeys.EVAL) ae_problem = registry.problem(ae_problem_name) ae_training_paths = ae_problem.training_filepaths(epoch_data_dir, 10, True) ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True) skip_train = False skip_eval = False for path in ae_training_paths: if tf.gfile.Exists(path): skip_train = True break for path in ae_eval_paths: if tf.gfile.Exists(path): skip_eval = True break # Encode train data if not skip_train: dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_training_paths) # Encode eval data if not skip_eval: dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir, shuffle_files=False, output_buffer_size=100, preprocess=False) encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path, ae_eval_paths)
def train_agent(problem_name, agent_model_dir, event_dir, world_model_dir, epoch_data_dir, hparams, autoencoder_path=None, epoch=0): """Train the PPO agent in the simulated environment.""" gym_problem = registry.problem(problem_name) ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params) ppo_params_names = ["epochs_num", "epoch_length", "learning_rate", "num_agents", "optimization_epochs"] for param_name in ppo_params_names: ppo_param_name = "ppo_"+ param_name if ppo_param_name in hparams: ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name)) ppo_epochs_num = hparams.ppo_epochs_num ppo_hparams.save_models_every_epochs = ppo_epochs_num ppo_hparams.world_model_dir = world_model_dir ppo_hparams.add_hparam("force_beginning_resets", True) # Adding model hparams for model specific adjustments model_hparams = trainer_lib.create_hparams(hparams.generative_model_params) ppo_hparams.add_hparam("model_hparams", model_hparams) environment_spec = copy.copy(gym_problem.environment_spec) environment_spec.simulation_random_starts = hparams.simulation_random_starts environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale ppo_hparams.add_hparam("environment_spec", environment_spec) with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "output_dir": world_model_dir, "data_dir": epoch_data_dir, "autoencoder_path": autoencoder_path, }): rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
def generate_data_for_registered_problem(problem_name): tf.logging.info("Generating data for %s.", problem_name) if FLAGS.num_shards: raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id data_dir = os.path.expanduser(FLAGS.data_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) if task_id is None and problem.multiprocess_generate: if FLAGS.task_id_start != -1: assert FLAGS.task_id_end != -1 task_id_start = FLAGS.task_id_start task_id_end = FLAGS.task_id_end else: task_id_start = 0 task_id_end = problem.num_generate_tasks pool = multiprocessing.Pool(processes=FLAGS.num_concurrent_processes) problem.prepare_to_generate(data_dir, tmp_dir) args = [(problem_name, data_dir, tmp_dir, task_id) for task_id in range(task_id_start, task_id_end)] pool.map(generate_data_in_process, args) else: problem.generate_data(data_dir, tmp_dir, task_id)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) problem = registry.problem(FLAGS.problem) hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(FLAGS.data_dir)) problem.get_hparams(hparams) fname = "inputs" if problem.has_inputs else "targets" input_encoder = problem.feature_info[fname].encoder output_decoder = problem.feature_info["targets"].encoder stub = create_stub() while True: prompt = ">> " if FLAGS.inputs_once: inputs = FLAGS.inputs_once else: inputs = input(prompt) input_ids = encode(inputs, input_encoder) output_ids = query(stub, input_ids, feature_name=fname) outputs = decode(output_ids, output_decoder) print_str = """ Input: {inputs} Output: {outputs} """ print(print_str.format(inputs=inputs, outputs=outputs)) if FLAGS.inputs_once: break
def main(_): tf.logging.set_verbosity(tf.logging.INFO) validate_flags() usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) problem = registry.problem(FLAGS.problem) hparams = tf.contrib.training.HParams( data_dir=os.path.expanduser(FLAGS.data_dir)) problem.get_hparams(hparams) request_fn = make_request_fn() while True: inputs = FLAGS.inputs_once if FLAGS.inputs_once else input(">> ") outputs = serving_utils.predict([inputs], problem, request_fn) outputs, = outputs output, score = outputs print_str = """ Input: {inputs} Output (Score {score:.3f}): {output} """ print(print_str.format(inputs=inputs, output=output, score=score)) if FLAGS.inputs_once: break
def evaluate_world_model(simulated_problem_name, problem_name, hparams, world_model_dir, epoch_data_dir, tmp_dir, autoencoder_path=None): """Generate simulated environment data and return reward accuracy.""" gym_simulated_problem = registry.problem(simulated_problem_name) sim_steps = hparams.simulated_env_generator_num_steps gym_simulated_problem.settable_num_steps = sim_steps with temporary_flags({ "problem": problem_name, "model": hparams.generative_model, "hparams_set": hparams.generative_model_params, "data_dir": epoch_data_dir, "output_dir": world_model_dir, "autoencoder_path": autoencoder_path, }): gym_simulated_problem.generate_data(epoch_data_dir, tmp_dir) n = max(1., gym_simulated_problem.statistics.number_of_dones) model_reward_accuracy = ( gym_simulated_problem.statistics.successful_episode_reward_predictions / float(n)) old_path = os.path.join(epoch_data_dir, "debug_frames_env") new_path = os.path.join(epoch_data_dir, "debug_frames_env_eval") tf.gfile.Rename(old_path, new_path) return model_reward_accuracy
def t2t_score_file(filename): """ Score each line in a file and return the scores. :param str filename: T2T checkpoint """ # Prepare model. hparams = create_t2t_hparams() encoders = registry.problem(FLAGS_problem).feature_encoders(FLAGS_data_dir) # Prepare features for feeding into the model. inputs_ph = tf.placeholder(dtype=tf.int32, shape=(None, None)) # Just length dimension. targets_ph = tf.placeholder(dtype=tf.int32, shape=(None, None)) # Just length dimension. features = { "inputs": inputs_ph, "targets": targets_ph, } # Prepare the model and the graph when model runs on features. model = registry.model(FLAGS_model)(hparams, tf.estimator.ModeKeys.EVAL) assert isinstance(model, tensor2tensor.models.transformer.Transformer) # final_output: tensor of logits with shape [batch_size, O, P, body_output_size. # losses: either single loss as a scalar, a list, a tensor (to be averaged) # or a dictionary of losses. final_output, losses = model(features) assert isinstance(losses, dict) saver = tf.train.Saver() sess = tf.Session() # Load weights from checkpoint. ckpts = tf.train.get_checkpoint_state(FLAGS_output_dir) ckpt = ckpts.model_checkpoint_path saver.restore(sess, ckpt) # writer = tf.summary.FileWriter('logs', sess.graph) # writer.close() # Run on each line. results = [] for line in open(filename): tab_split = line.split("\t") if len(tab_split) > 2: raise ValueError("Each line must have at most one tab separator.") assert len(tab_split) == 2 targets = tab_split[1].strip() inputs = tab_split[0].strip() # Run encoders and append EOS symbol. targets_numpy = encoders["targets"].encode(targets) + [text_encoder.EOS_ID] inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID] # Prepare the feed. feed = { inputs_ph: [inputs_numpy], targets_ph: [targets_numpy] } np_res = sess.run({"losses": losses, "final_output": final_output}, feed_dict=feed) pprint(np_res) tvars = tf.trainable_variables() print('t2t inputs_ph:', inputs_ph, inputs_numpy) print('t2t targets_ph:', targets_ph, targets_numpy) return sess, tvars, inputs_ph, targets_ph, losses