Esempio n. 1
0
    def preprocess_example(self, example, mode, hparams):
        """Runtime preprocessing.

    Return a dict or a tf.Data.Datset.from_tensor_slices (if you want each
    example to turn into multiple).

    Args:
      example: dict, features
      mode: tf.estimator.ModeKeys
      hparams: HParams, model hyperparameters

    Returns:
      dict or Dataset
    """
        global data_alpha, data_beta, desired_alpha, invert_ratio
        example = preprocess_example_common(example, hparams, mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            #example["inputs"] = usr_utils.print_shape(example["inputs"], "inputs", dtype=tf.int64)
            tf.logging.info("Filter by length!")
            if hparams.lt_data_alpha < hparams.lt_desired_alpha:  # Normal case: make longer
                data_alpha = hparams.lt_data_alpha
                data_beta = hparams.lt_data_beta
                desired_alpha = hparams.lt_desired_alpha
                invert_ratio = False
            else:  # Make translation shorter
                tf.logging.info("Invert ratios (make translations shorter)")
                data_alpha = 1.0 - hparams.lt_data_alpha
                data_beta = hparams.lt_data_beta
                desired_alpha = 1.0 - hparams.lt_desired_alpha
                invert_ratio = True
            dataset = tf.data.Dataset.from_tensors(example)
            filtered = dataset.filter(filter_rejection_sampling)
            return filtered
        return example
Esempio n. 2
0
 def preprocess_example(self, example, mode, hparams):
     # Create loss mask if necessary
     if hparams.use_loss_mask and mode == tf.estimator.ModeKeys.TRAIN:
         expanded_target_roots = usr_utils.expand_memory_by_pop_1d(
             tf.equal(example["targets"], hparams.pop_id),
             example["target_roots"])
         expanded_target_roots = expanded_target_roots[:
                                                       -1]  # Compensate for offset
         example["loss_mask"] = tf.cast(
             tf.logical_or(
                 tf.equal(expanded_target_roots, text_encoder.EOS_ID),
                 tf.greater(expanded_target_roots, hparams.max_terminal_id),
             ), tf.int32)
         while len(example["loss_mask"].get_shape()) < 3:
             example["loss_mask"] = tf.expand_dims(example["loss_mask"],
                                                   axis=-1)
     # Annoyingly this is done for 'inputs' in problem.serving_input_fn
     # but not for other input modalities
     while len(example["target_roots"].get_shape()) < 3:
         example["target_roots"] = tf.expand_dims(example["target_roots"],
                                                  axis=-1)
     return preprocess_example_common(example, hparams, mode)
Esempio n. 3
0
 def preprocess_example(self, example, unused_mode, unused_hparams):
   example = problem.preprocess_example_common(example, unused_mode,
                                               unused_hparams)
   example["targets"] = tf.reshape(example["targets"], [1, 1, self.ntasks])
   return example
Esempio n. 4
0
 def preprocess_example(self, example, mode, hparams):
     return problem.preprocess_example_common(example, mode, hparams)