Esempio n. 1
0
    def _compute_loss(self, outputs, src_ids_out, src_length, tgt_ids_out, tgt_length, 
                                        mu_src, logvar_src, mu_tgt, logvar_tgt, params, mode):
        
        if mode == "Training":
            mode = tf.estimator.ModeKeys.TRAIN            
        else:
            mode = tf.estimator.ModeKeys.EVAL            
          
        if self.Loss_type == "Cross_Entropy":
            if isinstance(outputs, dict):
                logits_src_from_src = outputs["logits_src_from_src"]
                logits_src_from_tgt = outputs["logits_src_from_tgt"]
                logits_tgt_from_src = outputs["logits_tgt_from_src"]
                logits_tgt_from_tgt = outputs["logits_tgt_from_tgt"]
                            
            loss_src_from_src, loss_normalizer_src_from_src, loss_token_normalizer_src_from_src = \
                    cross_entropy_sequence_loss(logits_src_from_src,
                                                src_ids_out, 
                                                src_length + 1,                                                         
                                                label_smoothing = params.get("label_smoothing", 0.0),
                                                average_in_time = params.get("average_loss_in_time", True),
                                                mode = mode)
            
            loss_src_from_tgt, loss_normalizer_src_from_tgt, loss_token_normalizer_src_from_tgt = \
                    cross_entropy_sequence_loss(logits_src_from_tgt,
                                                src_ids_out, 
                                                src_length + 1,                                                         
                                                label_smoothing = params.get("label_smoothing", 0.0),
                                                average_in_time = params.get("average_loss_in_time", True),
                                                mode = mode)
            
            loss_tgt_from_src, loss_normalizer_tgt_from_src, loss_token_normalizer_tgt_from_src = \
                    cross_entropy_sequence_loss(logits_tgt_from_src,
                                                tgt_ids_out, 
                                                tgt_length + 1,                                                         
                                                label_smoothing = params.get("label_smoothing", 0.0),
                                                average_in_time = params.get("average_loss_in_time", True),
                                                mode = mode)
            
            loss_tgt_from_tgt, loss_normalizer_tgt_from_tgt, loss_token_normalizer_tgt_from_tgt = \
                    cross_entropy_sequence_loss(logits_tgt_from_tgt,
                                                tgt_ids_out, 
                                                tgt_length + 1,                                                         
                                                label_smoothing = params.get("label_smoothing", 0.0),
                                                average_in_time = params.get("average_loss_in_time", True),
                                                mode = mode)
            
            #----- Calculating kl divergence --------

            kld_loss_src = -0.5 * tf.reduce_sum(logvar_src - tf.pow(mu_src, 2) - tf.exp(logvar_src) + 1, 1)
            kld_loss_tgt = -0.5 * tf.reduce_sum(logvar_tgt - tf.pow(mu_tgt, 2) - tf.exp(logvar_tgt) + 1, 1)

            return loss_src_from_src, loss_normalizer_src_from_src, loss_token_normalizer_src_from_src, \
                   loss_src_from_tgt, loss_normalizer_src_from_tgt, loss_token_normalizer_src_from_tgt, \
                   loss_tgt_from_src, loss_normalizer_tgt_from_src, loss_token_normalizer_tgt_from_src, \
                   loss_tgt_from_tgt, loss_normalizer_tgt_from_tgt, loss_token_normalizer_tgt_from_tgt, \
                   kld_loss_src, kld_loss_tgt
    def compute_loss(self, outputs, labels, training=True, params=None):
        outputs, mask = outputs

        if params is None:
            params = {}

        if self.crf_decoding:
            log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                outputs,
                tf.cast(labels["tags_id"], tf.int32),
                labels["length"],
                transition_params=self.transition_params)
            loss = tf.reduce_sum(-log_likelihood)

            loss_normalizer = tf.cast(tf.shape(log_likelihood)[0], loss.dtype)
            return loss, loss_normalizer
        else:
            return cross_entropy_sequence_loss(
                outputs,
                labels["tags_id"],
                labels["length"],
                label_smoothing=params.get("label_smoothing", 0.0),
                average_in_time=params.get("average_loss_in_time", False),
                mask=mask,
                training=training)
Esempio n. 3
0
 def compute_loss(self, outputs, labels, training=True, params=None):
     if params is None:
         params = {}
     if isinstance(outputs, dict):
         logits = outputs["logits"]
         attention = outputs.get("attention")
     else:
         logits = outputs
         attention = None
     labels_lengths = self.labels_inputter.get_length(labels)
     loss, loss_normalizer, loss_token_normalizer = cross_entropy_sequence_loss(
         logits,
         labels["ids_out"],
         labels_lengths,
         label_smoothing=params.get("label_smoothing", 0.0),
         average_in_time=params.get("average_loss_in_time", False),
         training=training)
     if training:
         gold_alignments = labels.get("alignment")
         guided_alignment_type = params.get("guided_alignment_type")
         if gold_alignments is not None and guided_alignment_type is not None:
             if attention is None:
                 tf.logging.warning(
                     "This model did not return attention vectors; "
                     "guided alignment will not be applied")
             else:
                 loss += guided_alignment_cost(
                     attention[:, :-1],  # Do not constrain last timestep.
                     gold_alignments,
                     labels_lengths - 1,
                     guided_alignment_type,
                     guided_alignment_weight=params.get(
                         "guided_alignment_weight", 1))
     return loss, loss_normalizer, loss_token_normalizer
    def testWeightedAndMaskedCrossEntropySequenceLoss(self):
        logits = tf.constant(
            [
                [[0.1, 0.2, 0.9], [-1.2, 2.1, 0], [0.6, 0.3, 0.4]],
                [[-2.2, -0.2, -1.2], [2.3, 0.2, -0.1], [0.0, 0.1, 0.7]],
            ]
        )
        labels = tf.constant([[2, 1, 0], [1, 0, 2]], dtype=tf.int32)
        lengths = tf.constant([3, 2], dtype=tf.int32)
        weights = tf.constant([0.6, 1.2])

        loss, train_norm, stats_norm = losses.cross_entropy_sequence_loss(
            logits,
            labels,
            sequence_length=lengths,
            sequence_weight=weights,
            training=True,
        )
        self.assertNear(loss, 1.77306, 1e-5)
        self.assertNear(train_norm, tf.reduce_sum(weights), 1e-5)
        self.assertNear(
            stats_norm,
            tf.reduce_sum(tf.cast(lengths, tf.float32) * weights),
            1e-5,
        )
Esempio n. 5
0
 def _compute_loss(self, features, labels, outputs, params, mode):
   return cross_entropy_sequence_loss(
       outputs,
       labels["ids_out"],
       self._get_labels_length(labels),
       label_smoothing=params.get("label_smoothing", 0.0),
       mode=mode)
Esempio n. 6
0
def build_model(source, target, mode, reuse=False):
    # Encode the source.
    with tf.variable_scope("encoder", reuse=reuse):
        source_embedding = source_inputter.make_inputs(source, training=True)
        memory, _, _ = encoder.encode(source_embedding,
                                      source["length"],
                                      mode=mode)

    # Decode the target.
    with tf.variable_scope("decoder", reuse=reuse):
        target_embedding = target_inputter.make_inputs(target, training=True)
        logits, _, _ = decoder.decode(
            target_embedding,
            target["length"],
            vocab_size=target_inputter.vocabulary_size,
            mode=mode,
            memory=memory,
            memory_sequence_length=source["length"])
        #logits = tf.Print(logits, [tf.argmax(logits, -1)[0]], summarize=maximum_length)
        #logits = tf.Print(logits, [tr_target['ids_out'][0]], summarize=maximum_length)
    # Compute the loss.
    loss, normalizer, _ = losses.cross_entropy_sequence_loss(
        logits,
        target["ids_out"],
        target["length"],
        label_smoothing=0.1,
        average_in_time=True,
        mode=mode)
    loss /= normalizer

    return loss
 def _compute_loss(self, features, labels, outputs, params, mode):
     if isinstance(outputs, dict):
         logits = outputs["logits"]
         attention = outputs.get("attention")
     else:
         logits = outputs
         attention = None
     labels_lengths = self.labels_inputter.get_length(labels)
     loss, loss_normalizer, loss_token_normalizer = cross_entropy_sequence_loss(
         logits,
         labels["ids_out"],
         labels_lengths,
         label_smoothing=params.get("label_smoothing", 0.0),
         average_in_time=params.get("average_loss_in_time", False),
         mode=mode)
     if mode == tf.estimator.ModeKeys.TRAIN:
         gold_alignments = labels.get("alignment")
         guided_alignment_type = params.get("guided_alignment_type")
         if gold_alignments is not None and guided_alignment_type is not None:
             if attention is None:
                 tf.logging.warning(
                     "This model did not return attention vectors; "
                     "guided alignment will not be applied")
             else:
                 # Note: the first decoder input is <s> for which we don't want any alignment.
                 loss += guided_alignment_cost(
                     attention[:, 1:],
                     gold_alignments,
                     labels_lengths - 1,
                     guided_alignment_type,
                     guided_alignment_weight=params.get(
                         "guided_alignment_weight", 1))
     return loss, loss_normalizer, loss_token_normalizer
Esempio n. 8
0
 def _compute_loss(self, features, labels, outputs, params, mode):
   return cross_entropy_sequence_loss(
       outputs,
       labels["ids_out"],
       self._get_labels_length(labels),
       label_smoothing=params.get("label_smoothing", 0.0),
       average_in_time=params.get("average_loss_in_time", False),
       mode=mode)
Esempio n. 9
0
 def compute_loss(self, outputs, labels, training=True):
     return losses.cross_entropy_sequence_loss(
         outputs["logits"],
         labels["ids_out"],
         labels["length"],
         label_smoothing=self.params.get("label_smoothing", 0.0),
         average_in_time=self.params.get("average_loss_in_time", False),
         training=training)
Esempio n. 10
0
    def testCrossEntropySequenceLoss(self):
        logits = tf.constant([
            [[0.1, 0.2, 0.9], [-1.2, 2.1, 0], [0.6, 0.3, 0.4]],
            [[-2.2, -0.2, -1.2], [2.3, 0.2, -0.1], [0.0, 0.1, 0.7]],
        ])
        labels = tf.constant([[2, 1, 0], [1, 0, 2]], dtype=tf.int32)

        loss, training_norm, stats_norm = losses.cross_entropy_sequence_loss(
            logits, labels, training=True)
        self.assertNear(loss, 3.06985, 1e-5)
        self.assertEqual(training_norm, 2)
        self.assertEqual(stats_norm, 6)

        _, training_norm, stats_norm = losses.cross_entropy_sequence_loss(
            logits, labels, average_in_time=True, training=True)
        self.assertEqual(training_norm, 6)
        self.assertEqual(stats_norm, 6)
Esempio n. 11
0
    def testMaskedCrossEntropySequenceLoss(self):
        logits = tf.constant([
            [[0.1, 0.2, 0.9], [-1.2, 2.1, 0], [0.6, 0.3, 0.4]],
            [[-2.2, -0.2, -1.2], [2.3, 0.2, -0.1], [0.0, 0.1, 0.7]],
        ])
        labels = tf.constant([[2, 1, 0], [1, 0, 2]], dtype=tf.int32)
        lengths = tf.constant([2, 1], dtype=tf.int32)

        loss, _, stats_norm = losses.cross_entropy_sequence_loss(
            logits, labels, sequence_length=lengths, training=True)
        self.assertNear(loss, 1.22118, 1e-5)
        self.assertEqual(stats_norm, 3)
Esempio n. 12
0
 def compute_loss(self, outputs, labels, training=True):
     params = self.params
     if not isinstance(outputs, dict):
         outputs = dict(logits=outputs)
     logits = outputs["logits"]
     noisy_logits = outputs.get("noisy_logits")
     attention = outputs.get("attention")
     if noisy_logits is not None and params.get("contrastive_learning"):
         return losses.max_margin_loss(
             logits,
             labels["ids_out"],
             labels["length"],
             noisy_logits,
             labels["noisy_ids_out"],
             labels["noisy_length"],
             eta=params.get("max_margin_eta", 0.1),
         )
     (
         loss,
         loss_normalizer,
         loss_token_normalizer,
     ) = losses.cross_entropy_sequence_loss(
         logits,
         labels["ids_out"],
         sequence_length=labels["length"],
         sequence_weight=labels.get("weight"),
         label_smoothing=params.get("label_smoothing", 0.0),
         average_in_time=params.get("average_loss_in_time", False),
         training=training,
     )
     if training:
         gold_alignments = labels.get("alignment")
         guided_alignment_type = params.get("guided_alignment_type")
         if gold_alignments is not None and guided_alignment_type is not None:
             if attention is None:
                 tf.get_logger().warning(
                     "This model did not return attention vectors; "
                     "guided alignment will not be applied"
                 )
             else:
                 loss += losses.guided_alignment_cost(
                     attention[:, :-1],  # Do not constrain last timestep.
                     gold_alignments,
                     sequence_length=self.labels_inputter.get_length(
                         labels, ignore_special_tokens=True
                     ),
                     cost_type=guided_alignment_type,
                     weight=params.get("guided_alignment_weight", 1),
                 )
     return loss, loss_normalizer, loss_token_normalizer
Esempio n. 13
0
 def _compute_loss(self, features, labels, outputs, params, mode):
     length = self._get_features_length(features)
     if self.crf_decoding:
         with tf.variable_scope(tf.get_variable_scope(),
                                reuse=mode != tf.estimator.ModeKeys.TRAIN):
             log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                 outputs, tf.cast(labels["tags_id"], tf.int32), length)
         return tf.reduce_mean(-log_likelihood)
     else:
         return cross_entropy_sequence_loss(outputs,
                                            labels["tags_id"],
                                            length,
                                            label_smoothing=params.get(
                                                "label_smoothing", 0.0),
                                            mode=mode)
Esempio n. 14
0
 def compute_loss(self, outputs, labels, training=True):
     if self.crf_decoding:
         log_likelihood, _ = tfa.text.crf_log_likelihood(
             outputs,
             tf.cast(labels["tags_id"], tf.int32),
             labels["length"],
             transition_params=self.transition_params)
         batch_size = tf.shape(log_likelihood)[0]
         return tf.reduce_sum(-log_likelihood) / tf.cast(
             batch_size, log_likelihood.dtype)
     else:
         return cross_entropy_sequence_loss(
             outputs,
             labels["tags_id"],
             labels["length"],
             label_smoothing=self.params.get("label_smoothing", 0.0),
             average_in_time=self.params.get("average_loss_in_time", False),
             training=training)
Esempio n. 15
0
 def _compute_loss(self, features, labels, outputs, params, mode):
     length = self._get_features_length(features)
     if self.crf_decoding:
         with tf.variable_scope(tf.get_variable_scope(),
                                reuse=mode != tf.estimator.ModeKeys.TRAIN):
             log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                 outputs, tf.cast(labels["tags_id"], tf.int32), length)
         loss = tf.reduce_sum(-log_likelihood)
         loss_normalizer = tf.shape(log_likelihood)[0]
         return loss, loss_normalizer
     else:
         return cross_entropy_sequence_loss(
             outputs,
             labels["tags_id"],
             length,
             label_smoothing=params.get("label_smoothing", 0.0),
             average_in_time=params.get("average_loss_in_time", False),
             mode=mode)
Esempio n. 16
0
 def _compute_loss(self, features, labels, outputs, params, mode):
   length = self._get_features_length(features)
   if self.crf_decoding:
     with tf.variable_scope(tf.get_variable_scope(), reuse=mode != tf.estimator.ModeKeys.TRAIN):
       log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
           outputs,
           tf.cast(labels["tags_id"], tf.int32),
           length)
     loss = tf.reduce_sum(-log_likelihood)
     loss_normalizer = tf.shape(log_likelihood)[0]
     return loss, loss_normalizer
   else:
     return cross_entropy_sequence_loss(
         outputs,
         labels["tags_id"],
         length,
         label_smoothing=params.get("label_smoothing", 0.0),
         average_in_time=params.get("average_loss_in_time", False),
         mode=mode)
Esempio n. 17
0
def denoise(x, embedding, encoder_outputs, generator, decoder, reuse=None):
    """Denoises from the noisy encoding.

    Args:
    x: The input data from the dataset.
    embedding: The embedding variable.
    encoder_outputs: A tuple with the encoder outputs.
    generator: A tf.layers.Dense instance for projecting the logits.
    reuse: If True, reuse the decoder variables.

    Returns:
    The decoder loss.
    """
    with tf.variable_scope("decoder", reuse=reuse):
        logits, _, _ = decoder.decode(tf.nn.embedding_lookup(embedding, x["ids_in"]), x["length"] + 1,\
                                      initial_state=encoder_outputs[1], output_layer=generator,\
                                      memory=encoder_outputs[0], memory_sequence_length=encoder_outputs[2])
    cumulated_loss, _, normalizer = cross_entropy_sequence_loss(
        logits, x["ids_out"], x["length"] + 1)
    return cumulated_loss / normalizer
Esempio n. 18
0
 def compute_loss(self, outputs, labels, training=True, params=None):
     if params is None:
         params = {}
     if self.crf_decoding:
         with tf.variable_scope(tf.get_variable_scope(),
                                reuse=not training):
             log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                 outputs, tf.cast(labels["tags_id"], tf.int32),
                 labels["length"])
         loss = tf.reduce_sum(-log_likelihood)
         loss_normalizer = tf.cast(tf.shape(log_likelihood)[0], loss.dtype)
         return loss, loss_normalizer
     else:
         return cross_entropy_sequence_loss(
             outputs,
             labels["tags_id"],
             labels["length"],
             label_smoothing=params.get("label_smoothing", 0.0),
             average_in_time=params.get("average_loss_in_time", False),
             mode=tf.estimator.ModeKeys.TRAIN
             if training else tf.estimator.ModeKeys.EVAL)
Esempio n. 19
0
def denoise(x, embedding, encoder_outputs, generator, reuse=None):
  """Denoises from the noisy encoding.

  Args:
    x: The input data from the dataset.
    embedding: The embedding variable.
    encoder_outputs: A tuple with the encoder outputs.
    generator: A tf.layers.Dense instance for projecting the logits.
    reuse: If True, reuse the decoder variables.

  Returns:
    The decoder loss.
  """
  with tf.variable_scope("decoder", reuse=reuse):
    logits, _, _ = decoder.decode(
        tf.nn.embedding_lookup(embedding, x["ids_in"]),
        x["length"] + 1,
        initial_state=encoder_outputs[1],
        output_layer=generator,
        memory=encoder_outputs[0],
        memory_sequence_length=encoder_outputs[2])
  cumulated_loss, _, normalizer = cross_entropy_sequence_loss(
      logits, x["ids_out"], x["length"] + 1)
  return cumulated_loss / normalizer
Esempio n. 20
0
def train(model_dir,
          example_inputter,
          source_file,
          target_file,
          maximum_length=100,
          shuffle_buffer_size=1000000,
          gradients_accum=8,
          train_steps=100000,
          save_every=1000,
          report_every=50):
    """Runs the training loop.
    Args:
      model_dir: Directory where checkpoints are saved.
      example_inputter: The inputter instance that produces the training examples.
      source_file: The source training file.
      target_file: The target training file.
      maximum_length: Filter sequences longer than this.
      shuffle_buffer_size: How many examples to load for shuffling.
      gradients_accum: Accumulate gradients of this many iterations.
      train_steps: Train for this many iterations.
      save_every: Save a checkpoint every this many iterations.
      report_every: Report training progress every this many iterations.
    """
    mode = tf.estimator.ModeKeys.TRAIN

    # Create the dataset.
    dataset = example_inputter.make_training_dataset(
        source_file,
        target_file,
        batch_size=3072,
        batch_type="tokens",
        shuffle_buffer_size=shuffle_buffer_size,
        bucket_width=1,  # Bucketize sequences by the same length for efficiency.
        maximum_features_length=maximum_length,
        maximum_labels_length=maximum_length)
    iterator = dataset.make_initializable_iterator()
    source, target = iterator.get_next()

    # Encode the source.
    with tf.variable_scope("encoder"):
        source_embedding = source_inputter.make_inputs(source, training=True)
        memory, _, _ = encoder.encode(source_embedding,
                                      source["length"],
                                      mode=mode)

    # Decode the target.
    with tf.variable_scope("decoder"):
        target_embedding = target_inputter.make_inputs(target, training=True)
        logits, _, _ = decoder.decode(
            target_embedding,
            target["length"],
            vocab_size=target_inputter.vocabulary_size,
            mode=mode,
            memory=memory,
            memory_sequence_length=source["length"])

    # Compute the loss.
    loss, normalizer, _ = losses.cross_entropy_sequence_loss(
        logits,
        target["ids_out"],
        target["length"],
        label_smoothing=0.1,
        average_in_time=True,
        mode=mode)
    loss /= normalizer

    # Define the learning rate schedule.
    step = tf.train.create_global_step()
    learning_rate = decay.noam_decay_v2(2.0,
                                        step,
                                        model_dim=512,
                                        warmup_steps=4000)

    # Define the optimization op.
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss)
    train_op, optim_variables = optim.delayed_update(
        optimizer, gradients, step, accum_count=gradients_accum)

    # Runs the training loop.
    saver = tf.train.Saver()
    checkpoint_path = None
    if os.path.exists(model_dir):
        checkpoint_path = tf.train.latest_checkpoint(model_dir)
    with tf.Session() as sess:
        if checkpoint_path is not None:
            print("Restoring parameters from %s" % checkpoint_path)
            saver.restore(sess, checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())
        sess.run(tf.variables_initializer(optim_variables))
        sess.run(tf.tables_initializer())
        sess.run(iterator.initializer)
        last_step = -1
        while True:
            step_, lr_, loss_, _ = sess.run(
                [step, learning_rate, loss, train_op])
            if step_ != last_step:
                if step_ % report_every == 0:
                    print("Step = %d ; Learning rate = %f ; Loss = %f" %
                          (step_, lr_, loss_))
                if step_ % save_every == 0:
                    print("Saving checkpoint for step %d" % step_)
                    saver.save(sess, "%s/model" % model_dir, global_step=step_)
                if step_ == train_steps:
                    break
            last_step = step_