def body(self, features):
    hparams = self._hparams
    targets = features["targets"]
    inputs = features["inputs"]
    target_space = features["target_space_id"]

    inputs = common_layers.flatten4d3d(inputs)
    targets = common_layers.flatten4d3d(targets)

    (encoder_input, encoder_self_attention_bias,
     encoder_decoder_attention_bias) = (transformer.transformer_prepare_encoder(
         inputs, target_space, hparams))
    (decoder_input,
     decoder_self_attention_bias) = transformer.transformer_prepare_decoder(
         targets, hparams)

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    decoder_input = tf.nn.dropout(decoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer_revnet_encoder(
        encoder_input, encoder_self_attention_bias, hparams)

    decoder_output = transformer_revnet_decoder(
        decoder_input, encoder_output, decoder_self_attention_bias,
        encoder_decoder_attention_bias, hparams)
    decoder_output = tf.expand_dims(decoder_output, 2)

    return decoder_output
    def body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features["inputs"]
        target_space = features["target_space_id"]

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias) = (
             transformer.transformer_prepare_encoder(inputs, target_space,
                                                     hparams))
        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets, hparams)

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        encoder_output = transformer_revnet_encoder(
            encoder_input, encoder_self_attention_bias, hparams)

        decoder_output = transformer_revnet_decoder(
            decoder_input, encoder_output, decoder_self_attention_bias,
            encoder_decoder_attention_bias, hparams)
        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
Beispiel #3
0
    def body(self, features):
        """Transformer main model_fn.

        Args:
          features: Map of features to the model. Should contain the following:
              "inputs": Transformer inputs.
                  [batch_size, input_length, 1, hidden_dim].
              "targets": Target decoder outputs.
                  [batch_size, decoder_length, 1, hidden_dim]
              "target_space_id": A scalar int from data_generators.problem.SpaceID.

        Returns:
          Final decoder representation. [batch_size, decoder_length, hidden_dim]
        """
        #self._hparams.add("warm_start_from",True)
        hparams = self._hparams

        losses = []

        #         if self.has_input:
        #             inputs = features["inputs"]
        #             target_space = features["target_space_id"]
        #             encoder_output, encoder_decoder_attention_bias = self.encode(
        #                 inputs, target_space, hparams, features=features, losses=losses)
        #         else:
        encoder_output, encoder_decoder_attention_bias = (None, None)
        lekeys = "inputs"
        if lekeys in features:
            targets = features["inputs"]
            lekeys = "inputs"
        else:
            targets = features["targets"]
            lekeys = "targets"
        targets_shape = common_layers.shape_list(targets)
        targets = common_layers.flatten4d3d(targets)
        decoder_input, decoder_self_attention_bias = transformer_prepare_decoder(
            targets, hparams, features=features)
        decoder_output = self.decode(decoder_input,
                                     encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias,
                                     hparams,
                                     nonpadding=features_to_nonpadding(
                                         features, lekeys),
                                     losses=losses)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        ret = tf.reshape(decoder_output, targets_shape)
        if losses:
            return ret, {"extra_loss": tf.add_n(losses)}
        else:
            return ret
  def body(self, features):
    """Transformer main model_fn.

    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs.
              [batch_size, input_length, 1, hidden_dim].
          "targets": Target decoder outputs.
              [batch_size, decoder_length, 1, hidden_dim]
          "target_space_id": A scalar int from data_generators.problem.SpaceID.

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
    hparams = self._hparams

    losses = []

    if self.has_input:
      raise AttributeError("Context transformer encoder not implemented")
      inputs = features["inputs"]
      target_space = features["target_space_id"]
      encoder_output, encoder_decoder_attention_biases = self.encode(
          inputs, target_space, hparams, features=features, losses=losses)
    else:
      encoder_output, encoder_decoder_attention_biases = (None, None)

    targets = features["targets"]
    targets_shape = common_layers.shape_list(targets)
    targets = common_layers.flatten4d3d(targets)
    decoder_input, decoder_self_attention_bias = transformer_prepare_decoder(
        targets, hparams, features=features)
    decoder_self_attention_biases = expand_bias_modes(
        decoder_self_attention_bias, features["targets_seg"])
    decoder_output = self.decode(
        decoder_input,
        encoder_output,
        encoder_decoder_attention_biases,
        decoder_self_attention_biases,
        hparams,
        nonpadding=features_to_nonpadding(features, "targets"),
        losses=losses)

    expected_attentions = features.get("expected_attentions")
    if expected_attentions is not None:
      attention_loss = common_attention.encoder_decoder_attention_loss(
          expected_attentions, self.attention_weights,
          hparams.expected_attention_loss_type,
          hparams.expected_attention_loss_multiplier)
      return decoder_output, {"attention_loss": attention_loss}

    ret = tf.reshape(decoder_output, targets_shape)
    if losses:
      return ret, {"extra_loss": tf.add_n(losses)}
    else:
      return ret
def decode_transformer(encoder_output,
                       encoder_decoder_attention_bias,
                       targets,
                       hparams,
                       name,
                       task=None):
  """Original Transformer decoder."""
  with tf.variable_scope(name):
    if task is None:
      task = hparams.task
    if task == "translate":
      targets = common_layers.flatten4d3d(targets)

      decoder_input, decoder_self_bias = (
          transformer.transformer_prepare_decoder(targets, hparams))

      decoder_input = tf.nn.dropout(decoder_input,
                                    1.0 - hparams.layer_prepostprocess_dropout)

      decoder_output = transformer.transformer_decoder(
          decoder_input,
          encoder_output,
          decoder_self_bias,
          encoder_decoder_attention_bias,
          hparams)
      decoder_output = tf.expand_dims(decoder_output, axis=2)
    else:
      assert task == "image"
      inputs = None
      # have to reshape targets as b, 32, 32, 3 * hidden size] beacuse otherwise
      # prepare_image will choke
      targets = tf.reshape(targets, [tf.shape(targets)[0], hparams.img_len,
                                     hparams.img_len,
                                     hparams.num_channels*hparams.hidden_size])

      # Prepare decoder inputs and bias.
      decoder_input, _, _, bias = cia.prepare_decoder(targets, hparams)
      # Add class label to decoder input.
      if not hparams.drop_inputs:
        decoder_input += tf.reshape(
            inputs,
            [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size])
      decoder_output = cia.transformer_decoder_layers(
          decoder_input,
          None,
          bias,
          hparams.num_decoder_layers or hparams.num_hidden_layers,
          hparams,
          attention_type=hparams.dec_attention_type,
          name="decoder")
    decoder_output_shape = common_layers.shape_list(decoder_output)
    decoder_output = tf.reshape(decoder_output, [decoder_output_shape[0], -1, 1,
                                                 hparams.hidden_size])
    # Expand since t2t expects 4d tensors.
    return decoder_output
Beispiel #6
0
def decode_transformer(encoder_output,
                       encoder_decoder_attention_bias,
                       targets,
                       hparams,
                       name,
                       task=None):
    """Original Transformer decoder."""
    with tf.variable_scope(name):
        if task is None:
            task = hparams.task
        if task == "translate":
            targets = common_layers.flatten4d3d(targets)

            decoder_input, decoder_self_bias = (
                transformer.transformer_prepare_decoder(targets, hparams))

            decoder_input = tf.nn.dropout(
                decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

            decoder_output = transformer.transformer_decoder(
                decoder_input, encoder_output, decoder_self_bias,
                encoder_decoder_attention_bias, hparams)
            decoder_output = tf.expand_dims(decoder_output, axis=2)
        else:
            assert task == "image"
            inputs = None
            # have to reshape targets as b, 32, 32, 3 * hidden size] beacuse otherwise
            # prepare_image will choke
            targets = tf.reshape(targets, [
                tf.shape(targets)[0], hparams.img_len, hparams.img_len,
                hparams.num_channels * hparams.hidden_size
            ])

            # Prepare decoder inputs and bias.
            decoder_input, _, _, bias = cia.prepare_decoder(targets, hparams)
            # Add class label to decoder input.
            if not hparams.drop_inputs:
                decoder_input += tf.reshape(inputs, [
                    common_layers.shape_list(targets)[0], 1, 1,
                    hparams.hidden_size
                ])
            decoder_output = cia.transformer_decoder_layers(
                decoder_input,
                None,
                bias,
                hparams.num_decoder_layers or hparams.num_hidden_layers,
                hparams,
                attention_type=hparams.dec_attention_type,
                name="decoder")
        decoder_output_shape = common_layers.shape_list(decoder_output)
        decoder_output = tf.reshape(
            decoder_output,
            [decoder_output_shape[0], -1, 1, hparams.hidden_size])
        # Expand since t2t expects 4d tensors.
        return decoder_output
Beispiel #7
0
    def body(self, features):
        """Transformer main model_fn.

    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
          "tragets": Target decoder outputs.
              [batch_size, decoder_length, hidden_dim]
          "target_space_id"

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
        hparams = self._hparams

        if self.has_input:
            inputs = features["inputs"]
            target_space = features["target_space_id"]
            encoder_output, encoder_decoder_attention_bias = self.encode(
                inputs, target_space, hparams, features=features)
        else:
            encoder_output, encoder_decoder_attention_bias = (None, None)

        targets = features["targets"]
        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_attention_bias = transformer_prepare_decoder(
            targets, hparams, features=features)

        decoder_output = self.decode(decoder_input,
                                     encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias,
                                     hparams,
                                     nonpadding=features_to_nonpadding(
                                         features, "targets"))

        self.cache_flag = tf.py_func(
            self.sentence_cache.AddMultipleEntries,
            [features["targets_raw"], decoder_output],
            tf.float32,
        )

        tf.cast(self.cache_flag, tf.float32)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}
        self.cache_flag.set_shape((1, ))
        return decoder_output + 0 * self.cache_flag
Beispiel #8
0
def vae_transformer_internal(inputs, targets, target_space, hparams):
    """VAE Transformer, main step used for training."""
    with tf.variable_scope("vae_transformer"):
        is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN
        # Prepare inputs, targets, and k.
        inputs = common_layers.flatten4d3d(inputs)
        input_len = tf.shape(inputs)[1]  # Double input size to cover targets.
        inputs = tf.pad(inputs, [[0, 0], [0, input_len], [0, 0]])
        inputs.set_shape([None, None, hparams.hidden_size])
        targets = common_layers.flatten4d3d(targets)
        k = 2**hparams.num_compress_steps
        inputs, targets = common_layers.pad_to_same_length(
            inputs, targets, final_length_divisible_by=k)
        inputs = encode(inputs, target_space, hparams, "input_enc")

        # Dropout targets or swap for zeros 5% of the time.
        targets_nodrop = targets
        max_prestep = hparams.kl_warmup_steps
        prob_targets = 0.95 if is_training else 1.0
        targets_dropout_max = common_layers.inverse_lin_decay(
            max_prestep) - 0.01
        targets = dropmask(targets, targets_dropout_max * 0.7, is_training)
        targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets),
                          lambda: targets, lambda: tf.zeros_like(targets))
        targets = targets_nodrop

        # Compress and vae.
        z = tf.get_variable("z", [hparams.hidden_size])
        z = tf.reshape(z, [1, 1, 1, -1])
        z = tf.tile(z, [tf.shape(inputs)[0], 1, 1, 1])

        z = attend(z, inputs, hparams, "z_attendsi")
        z = ffn(z, hparams, "zff2")
        z = attend(z, targets, hparams, "z_attendst2")
        z = ffn(z, hparams, "zff3")
        z, kl_loss, _, _ = vae(z, hparams, name="vae")
        z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense")

        # z, kl_loss, _, _ = vae_compress(
        #     tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2),
        #     hparams, "vae_compress", "vae_decompress")

        decoder_in = tf.squeeze(z, axis=2) + tf.zeros_like(targets)
        (decoder_input, decoder_self_attention_bias) = (
            transformer.transformer_prepare_decoder(decoder_in, hparams))
        ret = transformer.transformer_decoder(decoder_input, inputs,
                                              decoder_self_attention_bias,
                                              None, hparams)

        kl_loss *= common_layers.inverse_exp_decay(int(
            max_prestep * 1.5)) * 5.0
        losses = {"kl": kl_loss}
        return tf.expand_dims(ret, axis=2), losses
Beispiel #9
0
  def _prepare_decoder(self, targets):
    """Process the transformer decoder input."""
    targets = common_layers.flatten4d3d(targets)

    output = transformer.transformer_prepare_decoder(
        targets, self._hparams, features=None,
    )
    deco_input, deco_self_attention_bias = output

    deco_input = tf.nn.dropout(
        deco_input, 1.0 - self._hparams.layer_prepostprocess_dropout
    )
    return deco_input, deco_self_attention_bias
  def _prepare_decoder(self, targets):
    """Process the transformer decoder input."""
    targets = common_layers.flatten4d3d(targets)

    output = transformer.transformer_prepare_decoder(
        targets, self._hparams, features=None,
    )
    deco_input, deco_self_attention_bias = output

    deco_input = tf.nn.dropout(
        deco_input, 1.0 - self._hparams.layer_prepostprocess_dropout
    )
    return deco_input, deco_self_attention_bias
Beispiel #11
0
 def decode(self, decoder_input, encoder_output,
            encoder_decoder_attention_bias, hparams):
     decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder(
         decoder_input, hparams)
     decoder_input = tf.nn.dropout(
         decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
     decoder_output = transformer.transformer_decoder(
         decoder_input,
         encoder_output,
         decoder_self_attention_bias,
         encoder_decoder_attention_bias,
         hparams,
         cache=None)
     return decoder_output
    def body(self, features):

        hparams = self._hparams
        losses = []

        contexts = {}
        for feature_name in features:
            if 'context' in feature_name and 'raw' not in feature_name:
                contexts[feature_name] = features[feature_name]
        inputs = features["inputs"]
        target_space = features["target_space_id"]

        encoder_output, encoder_decoder_attention_bias = self.encode(
            inputs,
            contexts,
            target_space,
            hparams=hparams,
            features=features,
            losses=losses)

        targets = features["targets"]
        targets_shape = common_layers.shape_list(targets)
        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_attention_bias = transformer_prepare_decoder(
            targets, hparams, features=features)

        decoder_output = self.decode(decoder_input,
                                     encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias,
                                     hparams=hparams,
                                     nonpadding=features_to_nonpadding(
                                         features, "targets"),
                                     losses=losses)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        ret = tf.reshape(decoder_output, targets_shape)
        if losses:
            return ret, {"extra_loss": tf.add_n(losses)}
        else:
            return ret
Beispiel #13
0
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets,
                       hparams, name):
    """Original Transformer decoder."""
    with tf.variable_scope(name):
        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_bias = transformer.transformer_prepare_decoder(
            targets, hparams)

        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        decoder_output = transformer.transformer_decoder(
            decoder_input, encoder_output, decoder_self_bias,
            encoder_decoder_attention_bias, hparams)

        # Expand since t2t expects 4d tensors.
        return tf.expand_dims(decoder_output, axis=2)
Beispiel #14
0
    def decode(self, decoder_input, encoder_output,
               encoder_decoder_attention_bias_slices, hparams):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        config_file = os.path.join(dir_path, "config.yml")
        config = yaml.load(open(config_file))
        enc_name = config["model_params"].split('_')[0][3:]
        dec_name = "dec1d"
        if enc_name.endswith("2d") and enc_name != "all2d":
            dec_name = "dec2d"

        decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder(
            decoder_input, hparams)
        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        decoder_output = getattr(decode_fn, dec_name)(
            decoder_input, encoder_output, decoder_self_attention_bias,
            encoder_decoder_attention_bias_slices, hparams, "decoder")
        return decoder_output
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets,
                       hparams, name):
  """Original Transformer decoder."""
  with tf.variable_scope(name):
    targets = common_layers.flatten4d3d(targets)

    decoder_input, decoder_self_bias = (
        transformer.transformer_prepare_decoder(targets, hparams))

    decoder_input = tf.nn.dropout(decoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    decoder_output = transformer.transformer_decoder(
        decoder_input, encoder_output, decoder_self_bias,
        encoder_decoder_attention_bias, hparams)
    decoder_output = tf.expand_dims(decoder_output, axis=2)
    decoder_output_shape = common_layers.shape_list(decoder_output)
    decoder_output = tf.reshape(
        decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size])
    # Expand since t2t expects 4d tensors.
    return decoder_output
Beispiel #16
0
    def model_fn_body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features.get("inputs")
        target_space = features.get("target_space_id")

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_attention_bias,
         _) = transformer.transformer_prepare_encoder(inputs, target_space,
                                                      hparams)
        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets, hparams)

        # We need masks of the form batch size x input sequences
        # Biases seem to be of the form batch_size x 1 x input sequences x vec dim
        #  Squeeze out dim one, and get the first element of each vector.
        encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0]
        decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0]

        def residual_fn(x, y):
            return common_layers.layer_norm(
                x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout))

        encoder_input = tf.nn.dropout(encoder_input,
                                      1.0 - hparams.residual_dropout)
        decoder_input = tf.nn.dropout(decoder_input,
                                      1.0 - hparams.residual_dropout)
        encoder_output = alt_transformer_encoder(encoder_input, residual_fn,
                                                 encoder_mask, hparams)

        decoder_output = alt_transformer_decoder(decoder_input, encoder_output,
                                                 residual_fn, decoder_mask,
                                                 encoder_attention_bias,
                                                 hparams)

        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
Beispiel #17
0
    def model_fn_body(self, features):
        hparams = self._hparams

        encoder_input = features["inputs"]
        print(encoder_input.shape.as_list())  # ==> [None, None, None, 4, 300]
        encoder_input = flatten5d4d(encoder_input)
        print(encoder_input.shape.as_list())  # ==> [None, None, 4, 300]
        target_space = features["target_space_id"]
        print(target_space.shape.as_list())  # ==> []
        # encode_lex
        encoder_output, encoder_decoder_attention_bias = self.encode_lex(
            encoder_input, target_space, hparams)
        targets = features["targets"]
        print(targets.shape.as_list())
        targets = common_layers.flatten4d3d(targets)
        # decode_lex
        decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder(
            targets, hparams)
        decoder_output = self.decode(decoder_input, encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias, hparams)
        return decoder_output
Beispiel #18
0
    def model_fn_body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features.get("inputs")
        target_space = features.get("target_space_id")

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_attention_bias,
         _) = (transformer.transformer_prepare_encoder(inputs, target_space,
                                                       hparams))
        (decoder_input,
         _) = (transformer.transformer_prepare_decoder(targets, hparams))

        encoder_mask = bias_to_mask(encoder_attention_bias)

        def residual_fn(x, y):
            return common_layers.layer_norm(
                x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout))

        encoder_input = tf.nn.dropout(encoder_input,
                                      1.0 - hparams.residual_dropout)
        decoder_input = tf.nn.dropout(decoder_input,
                                      1.0 - hparams.residual_dropout)

        encoder_output = alt_transformer_encoder(encoder_input, residual_fn,
                                                 encoder_mask, hparams)

        decoder_output = alt_transformer_decoder(decoder_input, encoder_output,
                                                 residual_fn,
                                                 encoder_attention_bias,
                                                 hparams)

        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
Beispiel #19
0
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets,
                       hparams, name):
    """Original Transformer decoder."""
    orig_hparams = hparams
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_bias = (
            transformer.transformer_prepare_decoder(targets, hparams))

        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        decoder_output = transformer.transformer_decoder(
            decoder_input, encoder_output, decoder_self_bias,
            encoder_decoder_attention_bias, hparams)
        decoder_output = tf.expand_dims(decoder_output, axis=2)
        decoder_output_shape = common_layers.shape_list(decoder_output)
        decoder_output = tf.reshape(
            decoder_output,
            [decoder_output_shape[0], -1, 1, hparams.hidden_size])
        # Expand since t2t expects 4d tensors.
        hparams = orig_hparams
        return decoder_output
Beispiel #20
0
    def body(self, features, original_features):
        """Transformer main model_fn.
    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
          "targets": Target decoder outputs.
              [batch_size, decoder_length, hidden_dim]
          "target_space_id"
    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
        hparams = self._hparams

        snippets = features.get(searchqa_problem.FeatureNames.SNIPPETS)
        questions = features.get(searchqa_problem.FeatureNames.QUESTION)
        target_space = features["target_space_id"]

        with tf.variable_scope('input'):
            # [batch_size, search_results_len, embed_sz]
            encoded_snippets = self.inputs_encoding(
                input=snippets,
                original_input=original_features.get(
                    searchqa_problem.FeatureNames.SNIPPETS),
                initializer=tf.constant_initializer(1.0),
                scope='snippets_encoding')

            # [batch_size, 1, embed_sz]
            encoded_question = self.inputs_encoding(
                input=questions,
                original_input=original_features.get(
                    searchqa_problem.FeatureNames.QUESTION),
                initializer=tf.constant_initializer(1.0),
                scope='question_encoding')

        # Concat snippets and questions to creat the inputs
        inputs = tf.concat([encoded_snippets, encoded_question], axis=1)
        # the input is 4D by default and it gets squeezed from 4D to 3D in the
        # encode function, so we need to make it 4D by inserting channel dim.
        inputs = tf.expand_dims(inputs, axis=2)

        losses = []
        encoder_output, encoder_decoder_attention_bias = self.encode(
            inputs, target_space, hparams, features=features, losses=losses)

        targets = features["targets"]
        targets_shape = common_layers.shape_list(targets)
        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder(
            targets, hparams, features=features)

        decoder_output = self.decode(decoder_input,
                                     encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias,
                                     hparams,
                                     nonpadding=features_to_nonpadding(
                                         features, "targets"),
                                     losses=losses)

        ret = tf.reshape(decoder_output, targets_shape)
        if losses:
            return ret, {"extra_loss": tf.add_n(losses)}
        else:
            return ret
Beispiel #21
0
 def get_decoder_autoregressive():
     """Decoder input for autoregressive computation."""
     (a, b) = transformer.transformer_prepare_decoder(targets, hparams)
     return (a, b, tf.constant(0.0))
Beispiel #22
0
    def body(self, features):
        """Universal Transformer main model_fn.

        Args:
        features: Map of features to the model. Should contain the following:
            "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
            "targets": Target decoder outputs.
                [batch_size, decoder_length, hidden_dim]
            "target_space_id"

        Returns:
        Final decoder representation. [batch_size, decoder_length, hidden_dim]
        """
        hparams = self._hparams
        if hparams.add_postion_timing_signal:
            # Turning off addition of positional embedding in the encoder/decoder
            # preparation as we do it in the beginning of each step.
            hparams.pos = None

        if self.has_input:
            inputs = features["inputs"]
            target_space = features["target_space_id"]
            (encoder_output, encoder_decoder_attention_bias,
             enc_extra_output) = self.encode(inputs,
                                             target_space,
                                             hparams,
                                             features=features)
        else:
            (encoder_output, encoder_decoder_attention_bias,
             enc_extra_output) = (None, None, (None, None))

        targets = features["targets"]
        targets = common_layers.flatten4d3d(targets)

        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets,
                                                     hparams,
                                                     features=features)

        decoder_output, dec_extra_output = self.decode(
            decoder_input,
            encoder_output,
            encoder_decoder_attention_bias,
            decoder_self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "targets"))

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            print('returning attention loss')
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0:
            print('returning act loss')
            if self.has_input:
                enc_ponder_times, enc_remainders = enc_extra_output
                enc_act_loss = (
                    hparams.act_loss_weight *
                    tf.reduce_mean(enc_ponder_times + enc_remainders))
            else:
                enc_act_loss = 0.0

            (dec_ponder_times, dec_remainders) = dec_extra_output
            dec_act_loss = (hparams.act_loss_weight *
                            tf.reduce_mean(dec_ponder_times + dec_remainders))
            act_loss = enc_act_loss + dec_act_loss
            tf.contrib.summary.scalar("act_loss", act_loss)
            return decoder_output, {"act_loss": act_loss}

        #grads = get_grads_and_vars(attention_loss)
        # dec_out_and_grads = tf.concat([decoder_output, grads], 1)  # ¿0 or 1?
        access_output, access_state = self._access(decoder_output,
                                                   dec_extra_output)

        return decoder_output, DNCState(access_output=access_output,
                                        access_state=access_state,
                                        controller_state=dec_extra_output)
  def body(self, features):
    """Universal Transformer main model_fn.


    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
          "targets": Target decoder outputs.
              [batch_size, decoder_length, hidden_dim]
          "target_space_id"

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
    hparams = self._hparams
    if hparams.add_position_timing_signal:
      # Turning off addition of positional embedding in the encoder/decoder
      # preparation as we do it in the beginning of each step.
      hparams.pos = None

    if self.has_input:
      inputs = features["inputs"]
      target_space = features["target_space_id"]
      (encoder_output, encoder_decoder_attention_bias,
       enc_extra_output) = self.encode(
           inputs, target_space, hparams, features=features)
    else:
      (encoder_output, encoder_decoder_attention_bias,
       enc_extra_output) = (None, None, (None, None))

    targets = features["targets"]
    targets = common_layers.flatten4d3d(targets)

    (decoder_input,
     decoder_self_attention_bias) = transformer.transformer_prepare_decoder(
         targets, hparams, features=features)

    decoder_output, dec_extra_output = self.decode(
        decoder_input,
        encoder_output,
        encoder_decoder_attention_bias,
        decoder_self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, "targets"))

    expected_attentions = features.get("expected_attentions")
    if expected_attentions is not None:
      attention_loss = common_attention.encoder_decoder_attention_loss(
          expected_attentions, self.attention_weights,
          hparams.expected_attention_loss_type,
          hparams.expected_attention_loss_multiplier)
      return decoder_output, {"attention_loss": attention_loss}

    if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0:
      if self.has_input:
        enc_ponder_times, enc_remainders = enc_extra_output
        enc_act_loss = (
            hparams.act_loss_weight *
            tf.reduce_mean(enc_ponder_times + enc_remainders))
      else:
        enc_act_loss = 0.0

      (dec_ponder_times, dec_remainders) = dec_extra_output
      dec_act_loss = (
          hparams.act_loss_weight *
          tf.reduce_mean(dec_ponder_times + dec_remainders))
      act_loss = enc_act_loss + dec_act_loss
      tf.contrib.summary.scalar("act_loss", act_loss)
      return decoder_output, {"act_loss": act_loss}

    return decoder_output
Beispiel #24
0
 def get_decoder_from_vae():
     """Decoder input computed by VAE."""
     # Return decoder stuff.
     (a, b) = transformer.transformer_prepare_decoder(
         tf.squeeze(z, axis=2), hparams)
     return (a, b, kl_loss)
Beispiel #25
0
    def body(self, features):
        """Transformer main model_fn.

    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs.
              [batch_size, input_length, 1, hidden_dim].
          "targets": Target decoder outputs.
              [batch_size, decoder_length, 1, hidden_dim]
          "target_space_id": A scalar int from data_generators.problem.SpaceID.

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
        hparams = self._hparams

        losses = []

        if self.has_input:
            inputs = features["inputs"]
            target_space = features["target_space_id"]
            encoder_output, encoder_decoder_attention_bias = self.encode(
                inputs,
                target_space,
                hparams,
                features=features,
                losses=losses)
        else:
            encoder_output, encoder_decoder_attention_bias = (None, None)

        targets = features["targets"]
        targets_shape = common_layers.shape_list(targets)
        targets = common_layers.flatten4d3d(targets)
        left_decoder_input, left_decoder_self_attention_bias = transformer_prepare_decoder(
            targets, hparams, features=features)
        right_decoder_input, right_decoder_self_attention_bias = transformer_prepare_decoder_right(
            targets, hparams, features=features)
        non_pad = nonpadding = features_to_nonpadding(features, "targets")
        with tf.variable_scope("left_decoder"):
            left_decoder_output = self.decode(left_decoder_input,
                                              encoder_output,
                                              encoder_decoder_attention_bias,
                                              left_decoder_self_attention_bias,
                                              hparams,
                                              nonpadding=non_pad,
                                              losses=losses)
        with tf.variable_scope("right_decoder"):
            right_decoder_output = self.decode(
                right_decoder_input,
                encoder_output,
                encoder_decoder_attention_bias,
                right_decoder_self_attention_bias,
                hparams,
                nonpadding=non_pad,
                losses=losses)

        decoder_output = transformer_bidirectional_joint_decoder(
            tf.squeeze(left_decoder_output, axis=2),
            tf.squeeze(right_decoder_output, axis=2),
            encoder_output,
            encoder_decoder_attention_bias,
            hparams,
            nonpadding=non_pad,
            save_weights_to=self.attention_weights,
            losses=losses)
        decoder_output = tf.expand_dims(decoder_output, axis=2)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        ret = tf.reshape(decoder_output, targets_shape)
        if losses:
            return ret, {"extra_loss": tf.add_n(losses)}
        else:
            return ret
Beispiel #26
0
    def body(self, features):
        """R-Transformer main model_fn.


    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
          "targets": Target decoder outputs.
              [batch_size, decoder_length, hidden_dim]
          "target_space_id"

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
        hparams = self._hparams

        if self.has_input:
            inputs = features["inputs"]
            target_space = features["target_space_id"]
            (encoder_output, encoder_decoder_attention_bias,
             enc_extra_output) = self.encode(inputs,
                                             target_space,
                                             hparams,
                                             features=features)
        else:
            (encoder_output, encoder_decoder_attention_bias,
             enc_extra_output) = (None, None, (None, None))

        targets = features["targets"]
        targets = common_layers.flatten4d3d(targets)

        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets,
                                                     hparams,
                                                     features=features)

        decoder_output, dec_extra_output = self.decode(
            decoder_input,
            encoder_output,
            encoder_decoder_attention_bias,
            decoder_self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "targets"))

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0:
            if self.has_input:
                enc_ponder_times, enc_remainders = enc_extra_output
                enc_act_loss = (
                    hparams.act_loss_weight *
                    tf.reduce_mean(enc_ponder_times + enc_remainders))
            else:
                enc_act_loss = 0.0

            (dec_ponder_times, dec_remainders) = dec_extra_output
            dec_act_loss = (hparams.act_loss_weight *
                            tf.reduce_mean(dec_ponder_times + dec_remainders))
            act_loss = enc_act_loss + dec_act_loss
            tf.summary.scalar("act_loss", act_loss)
            return decoder_output, {"act_loss": act_loss}

        return decoder_output
    def body(self, features):
        """CopyTransformer main model_fn.

        Args:
          features: Map of features to the model. Should contain the following:
              "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
              "targets": Target decoder outputs.
                  [batch_size, decoder_length, hidden_dim]
              "targets_*": Additional decoder outputs to generate, for copying
                  and pointing; [batch_size, decoder_length]
              "target_space_id": A scalar int from data_generators.problem.SpaceID.

        Returns:
          Final decoder representation. [batch_size, decoder_length, hidden_dim]
        """
        hparams = self._hparams

        losses = []

        inputs = features["inputs"]

        target_space = features["target_space_id"]
        encoder_output, encoder_decoder_attention_bias = self.encode(
            inputs, target_space, hparams, features=features, losses=losses)

        if "targets_actions" in features:
            targets = features["targets_actions"]
        else:
            tf.logging.warn(
                "CopyTransformer must be used with a SemanticParsing problem with a ShiftReduceGrammar; bad things will happen otherwise"
            )
            targets = features["targets"]

        targets_shape = common_layers.shape_list(targets)

        targets = common_layers.flatten4d3d(targets)

        decoder_input, decoder_self_attention_bias = transformer_prepare_decoder(
            targets, hparams, features=features)

        decoder_output = self.decode(decoder_input,
                                     encoder_output,
                                     encoder_decoder_attention_bias,
                                     decoder_self_attention_bias,
                                     hparams,
                                     nonpadding=features_to_nonpadding(
                                         features, "targets"),
                                     losses=losses)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        decoder_output = tf.reshape(decoder_output, targets_shape)

        body_output = dict()
        target_modality = self._problem_hparams.target_modality \
            if self._problem_hparams else {"targets": None}

        assert hparams.pointer_layer in ("attentive", "decaying_attentive")

        for key, modality in target_modality.items():
            if isinstance(modality, CopyModality):
                with tf.variable_scope("copy_layer/" + key):
                    if hparams.pointer_layer == "decaying_attentive":
                        output_layer = DecayingAttentivePointerLayer(
                            encoder_output)
                    else:
                        output_layer = AttentivePointerLayer(encoder_output)
                    scores = output_layer(decoder_output)
                    scores += encoder_decoder_attention_bias
                    body_output[key] = scores
            else:
                body_output[key] = decoder_output

        if losses:
            return body_output, {"extra_loss": tf.add_n(losses)}
        else:
            return body_output
    def body(self, features):
        """Transformer main model_fn.

    Args:
      features: Map of features to the model. Should contain the following:
          "inputs": Transformer inputs [batch_size, input_length, hidden_dim]
          "targets": Target decoder outputs. [batch_size, decoder_length,
            hidden_dim]
          "target_space_id": A scalar int from data_generators.problem.SpaceID.

    Returns:
      Final decoder representation. [batch_size, decoder_length, hidden_dim]
    """
        tf.logging.info("Using PgScratch BODY function.")
        hparams = self._hparams

        losses = {}
        inputs = features["inputs"]
        target_space = features["target_space_id"]
        # encoder_output: <tf.float32>[batch_size, input_length, hidden_dim]
        # encoder_decoder_attention_bias: <tf.float32>[batch_size, input_length]
        encoder_output, encoder_decoder_attention_bias = self.encode(
            inputs, target_space, hparams, features=features, losses=losses)

        with tf.variable_scope("knowledge"):
            with tf.name_scope("knowledge_encoding"):
                # Encode knowledge.
                # <tf.float32>[batch_size, triple_num, emb_dim]
                fact_embedding, fact_lengths = self.encode_knowledge_bottom(
                    features)
                tf.logging.info("Encoded knowledge")

            with tf.name_scope("knowledge_selection_and_loss"):
                # Compute knowledge selection and loss.
                triple_logits, avg_triple_selection_loss, knowledge_encoder_output, transe_loss = self.compute_knowledge_selection_and_loss(
                    features, encoder_output, fact_embedding, fact_lengths,
                    hparams.margin, hparams.num_negative_samples)
                losses["kb_loss"] = avg_triple_selection_loss
                losses["transe_loss"] = transe_loss

        if hparams.attend_kb:
            tf.logging.info("ATTEND_KB is ACTIVE")
            with tf.name_scope("knowledge_attention"):

                knowledge_padding = tf.zeros_like(triple_logits,
                                                  dtype=tf.float32)
                knowledge_attention_bias = common_attention.attention_bias_ignore_padding(
                    knowledge_padding)
                encoder_output = tf.concat(
                    [knowledge_encoder_output, encoder_output], 1)
                encoder_decoder_attention_bias = tf.concat(
                    [knowledge_attention_bias, encoder_decoder_attention_bias],
                    -1)

        else:
            tf.logging.info("ATTEND_KB is INACTIVE")

        targets = features["targets"]
        targets_shape = common_layers.shape_list(targets)
        targets = common_layers.flatten4d3d(targets)

        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets,
                                                     hparams,
                                                     features=features)

        decode_kwargs = {}
        decoder_output = self.decode(
            decoder_input,
            encoder_output,
            encoder_decoder_attention_bias,
            decoder_self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "targets"),
            losses=losses,
            **decode_kwargs)

        expected_attentions = features.get("expected_attentions")
        if expected_attentions is not None:
            attention_loss = common_attention.encoder_decoder_attention_loss(
                expected_attentions, self.attention_weights,
                hparams.expected_attention_loss_type,
                hparams.expected_attention_loss_multiplier)
            return decoder_output, {"attention_loss": attention_loss}

        ret = tf.reshape(decoder_output, targets_shape)
        if losses:
            return ret, losses
        else:
            return ret
Beispiel #29
0
 def transformer_prepare_decoder(targets, features):
     return transformer.transformer_prepare_decoder(
         targets, hparams, features)