def encode(self,
               inputs,
               contexts,
               target_space,
               hparams,
               features=None,
               losses=None):

        inputs = common_layers.flatten4d3d(inputs)
        _contexts = {}
        for context_name in contexts:
            _contexts[context_name] = common_layers.flatten4d3d(
                contexts[context_name])

        encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
            transformer_prepare_encoder(inputs,
                                        target_space,
                                        hparams,
                                        features=features))
        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        context_inputs = {}
        self_ctxt_attention_biases = {}
        encoder_decoder_ctxt_attention_biases = {}

        for context_name in _contexts:
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                context_input, self_ctxt_attention_bias, encoder_decoder_ctxt_attention_bias = (
                    transformer_prepare_encoder(_contexts[context_name],
                                                target_space,
                                                hparams,
                                                features=features))
                context_input = tf.nn.dropout(
                    context_input, 1.0 - hparams.layer_prepostprocess_dropout)
                context_inputs[context_name] = context_input
                self_ctxt_attention_biases[
                    context_name] = self_ctxt_attention_bias
                encoder_decoder_ctxt_attention_biases[
                    context_name] = encoder_decoder_ctxt_attention_bias

        encoder_output = discourse_aware_transformer_encoder_with_context(
            encoder_input,
            self_attention_bias,
            context_inputs,
            self_ctxt_attention_biases,
            features,
            hparams,
            save_weights_to=self.attention_weights,
            losses=losses)
        return encoder_output, self_attention_bias
예제 #2
0
  def encode(self, stories, questions, target_space, hparams,
             unused_features=None):
    """Encode transformer inputs.

    Args:
      inputs: Transformer inputs [batch_size, input_length, input_height,
        hidden_dim] which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: hyperparmeters for model.
      unused_features: optionally pass the entire features dictionary as well.
        This is needed now for "packed" datasets.

    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_decoder_attention_bias: Bias and mask weights for
              encodre-decoder attention. [batch_size, input_length]
    """

    inputs = tf.concat([stories, questions], axis=1)
    # inputs = common_layers.flatten4d3d(inputs)

    (encoder_input, encoder_self_attention_bias, _) = (
      transformer.transformer_prepare_encoder(inputs, target_space, hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    encoder_output = transformer.transformer_encoder(encoder_input,
      encoder_self_attention_bias, hparams,
      # nonpadding=features_to_nonpadding(features, "inputs"),
      save_weights_to=self.attention_weights)

    return encoder_output
예제 #3
0
  def encode(self, inputs, target_space, hparams, features=None):
    """Encode transformer inputs.

    Args:
      inputs: Transformer inputs [batch_size, input_length, input_height,
        hidden_dim] which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: hyperparmeters for model.
      features: optionally pass the entire features dictionary as well.
        This is needed now for "packed" datasets.

    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_extra_output: which is extra encoder output used in some
            variants of the model (e.g. in ACT, to pass the ponder-time to body)
    """
    inputs = common_layers.flatten4d3d(inputs)

    (encoder_input, self_attention_bias, _) = (
        transformer.transformer_prepare_encoder(inputs, target_space, hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    (encoder_output,
     encoder_extra_output) = r_transformer_util.r_transformer_encoder(
         encoder_input,
         self_attention_bias,
         hparams,
         nonpadding=transformer.features_to_nonpadding(features, "inputs"),
         save_weights_to=self.attention_weights)

    return encoder_output, encoder_extra_output
예제 #4
0
파일: transformers.py 프로젝트: r-mal/sacar
    def encode_no_lookup(self, embedded_inputs, inputs_mask):
        """Encoder step for transformer given already-embedded inputs

      Args:
        embedded_inputs: int tensor with shape [batch_size, input_length, emb_size].
        inputs_mask: tensor with shape [batch_size, input_length]

      Returns:
        float tensor with shape [batch_size, input_length, hidden_size]
      """
        (encoder_input, self_attention_bias,
         _) = (t2t_transformer.transformer_prepare_encoder(
             embedded_inputs, self.target_space, self.hparams))

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - self.hparams.layer_prepostprocess_dropout)

        (encoder_output, encoder_extra_output) = (
            universal_transformer_util.universal_transformer_encoder(
                encoder_input,
                self_attention_bias,
                self.hparams,
                nonpadding=inputs_mask,
                save_weights_to=self.model.attention_weights))

        return encoder_output, encoder_extra_output
예제 #5
0
def transformer_text_encoder(inputs, target_space, hparams, name=None):
    """Transformer text encoder over inputs with unmasked full attention.

  Args:
    inputs: Tensor of shape [batch, length, 1, hparams.hidden_size].
    target_space: int. Used for encoding inputs under a target space id.
    hparams: tf.contrib.training.HParams.
    name: string, variable scope.

  Returns:
    encoder_output: Tensor of shape [batch, length, hparams.hidden_size].
    ed: Tensor of shape [batch, 1, 1, length]. Encoder-decoder attention bias
      for any padded tokens.
  """
    with tf.variable_scope(name, default_name="transformer_text_encoder"):
        inputs = common_layers.flatten4d3d(inputs)
        [
            encoder_input,
            encoder_self_attention_bias,
            ed,
        ] = transformer.transformer_prepare_encoder(inputs,
                                                    target_space=target_space,
                                                    hparams=hparams)
        encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
        encoder_output = transformer.transformer_encoder(
            encoder_input, encoder_self_attention_bias, hparams)
        return encoder_output, ed
예제 #6
0
    def body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features["inputs"]
        target_space = features["target_space_id"]

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias) = (
             transformer.transformer_prepare_encoder(inputs, target_space,
                                                     hparams))
        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets, hparams)

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        encoder_output = transformer_revnet_encoder(
            encoder_input, encoder_self_attention_bias, hparams)

        decoder_output = transformer_revnet_decoder(
            decoder_input, encoder_output, decoder_self_attention_bias,
            encoder_decoder_attention_bias, hparams)
        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
예제 #7
0
def transformer_text_encoder(x,
                             space_id,
                             hparams,
                             name="transformer_text_encoder"):
  """Transformer text encoder over inputs with unmasked full attention.

  Args:
    x: Tensor of shape [batch, length, 1, hparams.hidden_size].
    space_id: int, id.
    hparams: tf.contrib.training.HParams.
    name: string, variable scope.

  Returns:
    encoder_output: Tensor of shape [batch, length, hparams.hidden_size].
    ed: Tensor of shape [batch, 1, 1, length]. Encoder-decoder attention bias
      for any padded tokens.
  """
  with tf.variable_scope(name):
    x = common_layers.flatten4d3d(x)
    (encoder_input, encoder_self_attention_bias,
     ed) = transformer.transformer_prepare_encoder(x, space_id, hparams)
    encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
    encoder_output = transformer.transformer_encoder(
        encoder_input, encoder_self_attention_bias, hparams)
    return encoder_output, ed
def transformer_encoder_ht(inputs,
                           target_space,
                           hparams,
                           features=None,
                           losses=None):
    encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
        transformer.transformer_prepare_encoder(inputs,
                                                target_space,
                                                hparams,
                                                features=features))

    # encoder_input = tf.nn.dropout(encoder_input,
    #                               1.0 - hparams.layer_prepostprocess_dropout)

    encoder_output = transformer.transformer_encoder(
        encoder_input,
        self_attention_bias,
        hparams,
        # nonpadding=transformer.features_to_nonpadding(features, "inputs"),
        nonpadding=None,
        save_weights_to=None,
        losses=losses)

    # encoder_output = tf.expand_dims(encoder_output, 2)

    return encoder_output
예제 #9
0
def transformer_text_encoder(x,
                             space_id,
                             hparams,
                             name="transformer_text_encoder"):
    """Transformer text encoder over inputs with unmasked full attention.

  Args:
    x: Tensor of shape [batch, length, 1, hparams.hidden_size].
    space_id: int, id.
    hparams: tf.contrib.training.HParams.
    name: string, variable scope.

  Returns:
    encoder_output: Tensor of shape [batch, length, hparams.hidden_size].
    ed: Tensor of shape [batch, 1, 1, length]. Encoder-decoder attention bias
      for any padded tokens.
  """
    with tf.variable_scope(name):
        x = common_layers.flatten4d3d(x)
        (encoder_input, encoder_self_attention_bias,
         ed) = transformer.transformer_prepare_encoder(x, space_id, hparams)
        encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
        return transformer.transformer_encoder(encoder_input,
                                               encoder_self_attention_bias,
                                               hparams), ed
예제 #10
0
def universal_transformer_encoder(inputs,
                                  target_space,
                                  hparams,
                                  features=None,
                                  make_image_summary=False):

    encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
        transformer.transformer_prepare_encoder(inputs,
                                                target_space,
                                                hparams,
                                                features=features))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    [encoder_output, encoder_extra_output
     ] = universal_transformer_util.universal_transformer_encoder(
         encoder_input,
         self_attention_bias,
         hparams,
         nonpadding=transformer.features_to_nonpadding(features, "inputs"),
         save_weights_to=None,
         make_image_summary=make_image_summary)

    if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0:
        ponder_times, remainders = encoder_extra_output
        act_loss = hparams.act_loss_weight * tf.reduce_mean(ponder_times +
                                                            remainders)

        return encoder_output, act_loss
    else:
        return encoder_output, tf.constant(0.0, tf.float32)
예제 #11
0
  def body(self, features):
    hparams = self._hparams
    targets = features["targets"]
    inputs = features["inputs"]
    target_space = features["target_space_id"]

    inputs = common_layers.flatten4d3d(inputs)
    targets = common_layers.flatten4d3d(targets)

    (encoder_input, encoder_self_attention_bias,
     encoder_decoder_attention_bias) = (transformer.transformer_prepare_encoder(
         inputs, target_space, hparams))
    (decoder_input,
     decoder_self_attention_bias) = transformer.transformer_prepare_decoder(
         targets, hparams)

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    decoder_input = tf.nn.dropout(decoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer_revnet_encoder(
        encoder_input, encoder_self_attention_bias, hparams)

    decoder_output = transformer_revnet_decoder(
        decoder_input, encoder_output, decoder_self_attention_bias,
        encoder_decoder_attention_bias, hparams)
    decoder_output = tf.expand_dims(decoder_output, 2)

    return decoder_output
def encode(x, x_space, hparams, name):
  """Transformer preparations and encoder."""
  with tf.variable_scope(name):
    (encoder_input, encoder_self_attention_bias,
     ed) = transformer.transformer_prepare_encoder(x, x_space, hparams)
    encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
    return transformer.transformer_encoder(
        encoder_input, encoder_self_attention_bias, hparams), ed
예제 #13
0
def encode(x, x_space, hparams, name):
  """Transformer preparations and encoder."""
  with tf.variable_scope(name):
    (encoder_input, encoder_self_attention_bias,
     ed) = transformer.transformer_prepare_encoder(x, x_space, hparams)
    encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
    return transformer.transformer_encoder(
        encoder_input, encoder_self_attention_bias, hparams), ed
예제 #14
0
def create_t2t_transformer_encoder(
    x_in: "tf.Tensor",
    mask: "tf.Tensor",
    attention_weights: Dict[Text, "tf.Tensor"],
    hparams: "HParams",
    C2: float,
    is_training: "tf.Tensor",
) -> "tf.Tensor":
    """Create t2t transformer encoder."""

    with tf.variable_scope("transformer", reuse=tf.AUTO_REUSE):
        x = create_tf_fnn(
            x_in,
            [hparams.hidden_size],
            hparams.layer_prepostprocess_dropout,
            C2,
            is_training,
            layer_name_suffix="pre_embed",
            activation=None,
            use_bias=False,
            kernel_initializer=tf.random_normal_initializer(
                0.0, hparams.hidden_size**-0.5),
        )
        if hparams.multiply_embedding_mode == "sqrt_depth":
            x *= hparams.hidden_size**0.5

        x *= tf.expand_dims(mask, -1)
        (
            x,
            self_attention_bias,
            encoder_decoder_attention_bias,
        ) = transformer_prepare_encoder(x, None, hparams)

        x *= tf.expand_dims(mask, -1)

        x = tf.nn.dropout(x, 1.0 - hparams.layer_prepostprocess_dropout)

        attn_bias_for_padding = None
        # Otherwise the encoder will just use encoder_self_attention_bias.
        if hparams.unidirectional_encoder:
            attn_bias_for_padding = encoder_decoder_attention_bias

        x = transformer_encoder(
            x,
            self_attention_bias,
            hparams,
            nonpadding=mask,
            save_weights_to=attention_weights,
            attn_bias_for_padding=attn_bias_for_padding,
        )

        x *= tf.expand_dims(mask, -1)

        return tf.nn.dropout(tf.nn.relu(x),
                             1.0 - hparams.layer_prepostprocess_dropout)
예제 #15
0
    def encode(self,
               inputs,
               target_space,
               hparams,
               features=None,
               losses=None,
               **kwargs):
        """Encode Universal Transformer inputs.

    It is similar to "transformer.encode", but it uses
    "universal_transformer_util.universal_transformer_encoder" instead of
    "transformer.transformer_encoder".

    Args:
      inputs: Transformer inputs [batch_size, input_length, input_height,
        hidden_dim] which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: hyperparmeters for model.
      features: optionally pass the entire features dictionary as well.
        This is needed now for "packed" datasets.
      losses: Unused.
      **kwargs: additional arguments to pass to encoder_function

    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_decoder_attention_bias: Bias and mask weights for
              encoder-decoder attention. [batch_size, input_length]
          encoder_extra_output: which is extra encoder output used in some
            variants of the model (e.g. in ACT, to pass the ponder-time to body)
    """
        del losses

        inputs = common_layers.flatten4d3d(inputs)

        encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
            transformer.transformer_prepare_encoder(inputs,
                                                    target_space,
                                                    hparams,
                                                    features=features))

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        (encoder_output, encoder_extra_output) = (
            universal_transformer_util.universal_transformer_encoder(
                encoder_input,
                self_attention_bias,
                hparams,
                nonpadding=transformer.features_to_nonpadding(
                    features, "inputs"),
                save_weights_to=self.attention_weights))

        return encoder_output, encoder_decoder_attention_bias, encoder_extra_output
예제 #16
0
def encoder(name, hparams, inputs, target_space):
    """Compute encoder outputs and attention bias."""
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias) = (transformer_prepare_encoder(
             inputs, target_space, hparams))
        encoder_input = tf.nn.dropout(
            encoder_input, rate=hparams.layer_prepostprocess_dropout)
        encoder_output = transformer_encoder(encoder_input,
                                             encoder_self_attention_bias,
                                             hparams)
        return encoder_output, encoder_decoder_attention_bias
예제 #17
0
def transformer_text_encoder(inputs,
                             space_id,
                             hparams,
                             name="transformer_text_enc"):
    """Transformer text encoder."""
    with tf.variable_scope(name):
        x = common_layers.flatten4d3d(inputs)
        (encoder_input, encoder_self_attention_bias,
         ed) = transformer.transformer_prepare_encoder(x, space_id, hparams)
        encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
        return transformer.transformer_encoder(encoder_input,
                                               encoder_self_attention_bias,
                                               hparams), ed
    def encode(self,
               inputs,
               target_space,
               hparams,
               features=None,
               losses=None):
        """Encode inputs using _encoder().

    This performs the same way as transformer.Transformer.encode with the
    encoder portion replaced with _encoder().

    Args:
      inputs: Input [batch_size, input_length, input_height, hidden_dim] tensor
        which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: Hyperparmeters for model.
      features: Optionally pass the entire features dictionary as well. This is
        needed now for "packed" datasets.
      losses: Unused list of losses.

    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_decoder_attention_bias: Bias and mask weights for
              encodre-decoder attention. [batch_size, input_length]

    Raises:
      ValueError: If encoder type not found.
    """
        inputs = common_layers.flatten4d3d(inputs)

        encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
            transformer.transformer_prepare_encoder(inputs,
                                                    target_space,
                                                    hparams,
                                                    features=features))

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        encoder_output = self._encoder(
            encoder_input,
            self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "inputs"),
            save_weights_to=self.attention_weights)

        return encoder_output, encoder_decoder_attention_bias
예제 #19
0
def te_encode(input_seq, hparams, target_space, features, name):
    input_seq = common_layers.flatten4d3d(input_seq)

    (encoder_input, encoder_self_attention_bias, _) = (
        transformer_prepare_encoder(input_seq, target_space, hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer_encoder(
        encoder_input,
        encoder_self_attention_bias,
        hparams,
        nonpadding=features_to_nonpadding(features, "input_seq"))
    encoder_output = tf.expand_dims(encoder_output, 2)
    return encoder_output
예제 #20
0
  def _prepare_encoder(self, inputs, target_space):
    """Process the transformer encoder inputs."""
    inputs = common_layers.flatten4d3d(inputs)

    output = transformer.transformer_prepare_encoder(
        inputs,
        target_space,
        self._hparams,
        features=None,
    )
    enco_input, enco_self_att_bias, enco_deco_att_bias = output

    enco_input = tf.nn.dropout(
        enco_input, 1.0 - self._hparams.layer_prepostprocess_dropout)

    return enco_input, enco_self_att_bias, enco_deco_att_bias
예제 #21
0
  def _prepare_encoder(self, inputs, target_space):
    """Process the transformer encoder inputs."""
    inputs = common_layers.flatten4d3d(inputs)

    output = transformer.transformer_prepare_encoder(
        inputs,
        target_space,
        self._hparams,
        features=None,
    )
    enco_input, enco_self_att_bias, enco_deco_att_bias = output

    enco_input = tf.nn.dropout(
        enco_input, 1.0 - self._hparams.layer_prepostprocess_dropout)

    return enco_input, enco_self_att_bias, enco_deco_att_bias
예제 #22
0
  def encode(self, inputs, target_space, hparams, features=None, losses=None):
    """Encode Universal Transformer inputs.

    It is similar to "transformer.encode", but it uses
    "universal_transformer_util.universal_transformer_encoder" instead of
    "transformer.transformer_encoder".

    Args:
      inputs: Transformer inputs [batch_size, input_length, input_height,
        hidden_dim] which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: hyperparmeters for model.
      features: optionally pass the entire features dictionary as well.
        This is needed now for "packed" datasets.
      losses: Unused.

    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_decoder_attention_bias: Bias and mask weights for
              encoder-decoder attention. [batch_size, input_length]
          encoder_extra_output: which is extra encoder output used in some
            variants of the model (e.g. in ACT, to pass the ponder-time to body)
    """
    del losses

    inputs = common_layers.flatten4d3d(inputs)

    encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
        transformer.transformer_prepare_encoder(
            inputs, target_space, hparams, features=features))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    (encoder_output, encoder_extra_output) = (
        universal_transformer_util.universal_transformer_encoder(
            encoder_input,
            self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "inputs"),
            save_weights_to=self.attention_weights))

    return encoder_output, encoder_decoder_attention_bias, encoder_extra_output
예제 #23
0
  def encode(self, features, input_key):
    hparams = self._hparams
    inputs = common_layers.flatten4d3d(features[input_key])

    (encoder_input, encoder_self_attention_bias, _) = (
        transformer.transformer_prepare_encoder(inputs, problem.SpaceID.EN_TOK,
                                                hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer.transformer_encoder(
        encoder_input,
        encoder_self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, input_key))

    encoder_output = tf.reduce_mean(encoder_output, axis=1)

    return encoder_output
  def encode(self, features, input_key):
    hparams = self._hparams
    inputs = common_layers.flatten4d3d(features[input_key])

    (encoder_input, encoder_self_attention_bias, _) = (
        transformer.transformer_prepare_encoder(inputs, problem.SpaceID.EN_TOK,
                                                hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer.transformer_encoder(
        encoder_input,
        encoder_self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, input_key))

    encoder_output = tf.reduce_mean(encoder_output, axis=1)

    return encoder_output
예제 #25
0
def sim_encode(inputs, target_space, hparams, features):
    # inputs = tf.Print(inputs, [tf.shape(inputs)], "input", summarize=10)
    inputs = common_layers.flatten4d3d(inputs)

    (encoder_input, encoder_self_attention_bias,
     _) = (transformer.transformer_prepare_encoder(inputs, target_space,
                                                   hparams))
    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer.transformer_encoder(
        encoder_input,
        encoder_self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, "inputs"))

    positional_mean = tf.nn.l2_normalize(tf.reduce_mean(encoder_output, 1), 1)
    # out_norm = tf.norm(positional_mean)
    # positional_mean = tf.Print(positional_mean , [out_norm], "enc_out: (should be b_size**0.5) ", summarize=10)
    # positional_mean = tf.Print(positional_mean , [tf.shape(positional_mean)], "enc_out: (should be (b_size, h_size)) ", summarize=10)
    return positional_mean
예제 #26
0
    def model_fn_body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features.get("inputs")
        target_space = features.get("target_space_id")

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_attention_bias,
         _) = transformer.transformer_prepare_encoder(inputs, target_space,
                                                      hparams)
        (decoder_input, decoder_self_attention_bias
         ) = transformer.transformer_prepare_decoder(targets, hparams)

        # We need masks of the form batch size x input sequences
        # Biases seem to be of the form batch_size x 1 x input sequences x vec dim
        #  Squeeze out dim one, and get the first element of each vector.
        encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0]
        decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0]

        def residual_fn(x, y):
            return common_layers.layer_norm(
                x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout))

        encoder_input = tf.nn.dropout(encoder_input,
                                      1.0 - hparams.residual_dropout)
        decoder_input = tf.nn.dropout(decoder_input,
                                      1.0 - hparams.residual_dropout)
        encoder_output = alt_transformer_encoder(encoder_input, residual_fn,
                                                 encoder_mask, hparams)

        decoder_output = alt_transformer_decoder(decoder_input, encoder_output,
                                                 residual_fn, decoder_mask,
                                                 encoder_attention_bias,
                                                 hparams)

        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
예제 #27
0
def universal_transformer_encoder(inputs, target_space, 
				hparams, features=None, make_image_summary=False):
    
    encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
        transformer.transformer_prepare_encoder(
            inputs, target_space, hparams, features=features))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    [encoder_output, 
    encoder_extra_output] = universal_transformer_util.universal_transformer_encoder(
        encoder_input,
        self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, "inputs"),
        save_weights_to=None,
        make_image_summary=make_image_summary)

    # encoder_output = tf.expand_dims(encoder_output, 2)

    return encoder_output
예제 #28
0
    def body(self, features):
        hparams = self._hparams
        inputs = features["inputs"]
        target_space = features["target_space_id"]

        inputs = common_layers.flatten4d3d(inputs)

        (encoder_input, encoder_self_attention_bias,
         _) = (transformer.transformer_prepare_encoder(inputs, target_space,
                                                       hparams))

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
        encoder_output = transformer.transformer_encoder(
            encoder_input,
            encoder_self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "inputs"))

        encoder_output = encoder_output[:, :1, :]
        encoder_output = tf.expand_dims(encoder_output, 2)

        return encoder_output
예제 #29
0
    def model_fn_body(self, features):
        hparams = self._hparams
        targets = features["targets"]
        inputs = features.get("inputs")
        target_space = features.get("target_space_id")

        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)

        (encoder_input, encoder_attention_bias,
         _) = (transformer.transformer_prepare_encoder(inputs, target_space,
                                                       hparams))
        (decoder_input,
         _) = (transformer.transformer_prepare_decoder(targets, hparams))

        encoder_mask = bias_to_mask(encoder_attention_bias)

        def residual_fn(x, y):
            return common_layers.layer_norm(
                x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout))

        encoder_input = tf.nn.dropout(encoder_input,
                                      1.0 - hparams.residual_dropout)
        decoder_input = tf.nn.dropout(decoder_input,
                                      1.0 - hparams.residual_dropout)

        encoder_output = alt_transformer_encoder(encoder_input, residual_fn,
                                                 encoder_mask, hparams)

        decoder_output = alt_transformer_decoder(decoder_input, encoder_output,
                                                 residual_fn,
                                                 encoder_attention_bias,
                                                 hparams)

        decoder_output = tf.expand_dims(decoder_output, 2)

        return decoder_output
예제 #30
0
def transformer_text_encoder(x,
                             space_id,
                             hparams,
                             name="transformer_text_encoder"):
    """Transformer text encoder over inputs with unmasked full attention.

  Args:
    x: Tensor of shape [batch, length, hidden_dim].
    space_id: int, id.
    hparams: Dict, hyperparameters.
    name: string, variable scope.

  Returns:
    x: Tensor of shape [batch, length, hidden_dim].
    ed: Tensor, bias for padded tokens in the input, shape [batch, length]
  """
    with tf.variable_scope(name):
        x = common_layers.flatten4d3d(x)
        (encoder_input, encoder_self_attention_bias,
         ed) = transformer.transformer_prepare_encoder(x, space_id, hparams)
        encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
        return transformer.transformer_encoder(encoder_input,
                                               encoder_self_attention_bias,
                                               hparams), ed
예제 #31
0
def main():

    FLAGS = Args()

    # Enable TF Eager execution
    tfe = tf.contrib.eager
    tfe.enable_eager_execution()

    batch_inputs = input_generator()

    # initialize translation model
    hparams_set = FLAGS.hparams_set
    Modes = tf.estimator.ModeKeys
    hparams = trainer_lib.create_hparams(hparams_set,
                                         data_dir=FLAGS.data_dir,
                                         problem_name=FLAGS.problem)
    translate_model = registry.model(FLAGS.model)(hparams, Modes.EVAL)

    # recover parameters and conduct recurrent conduction
    ckpt_dir = tf.train.latest_checkpoint(FLAGS.model_dir)

    with tfe.restore_variables_on_create(ckpt_dir):
        with variable_scope.EagerVariableStore().as_default():
            features = {'inputs': batch_inputs}
            with tf.variable_scope('universal_transformer/body'):
                input_tensor = tf.convert_to_tensor(features['inputs'])
                input_tensor = common_layers.flatten4d3d(input_tensor)
                encoder_input, self_attention_bias, _ = (
                    transformer.transformer_prepare_encoder(
                        input_tensor,
                        tf.convert_to_tensor([0]),
                        translate_model.hparams,
                        features=None))

            with tf.variable_scope('universal_transformer/body/encoder'):

                ffn_unit = functools.partial(
                    universal_transformer_util.transformer_encoder_ffn_unit,
                    hparams=translate_model.hparams)

                attention_unit = functools.partial(
                    universal_transformer_util.
                    transformer_encoder_attention_unit,
                    hparams=translate_model.hparams,
                    encoder_self_attention_bias=None,
                    attention_dropout_broadcast_dims=[],
                    save_weights_to={},
                    make_image_summary=True)

            storing_list = []
            transformed_state = encoder_input
            for step_index in range(1024):
                storing_list.append(transformed_state.numpy())

                with tf.variable_scope(
                        'universal_transformer/body/encoder/universal_transformer_{}'
                        .format(FLAGS.ut_type)):
                    transformed_state = universal_transformer_util.step_preprocess(
                        transformed_state,
                        tf.convert_to_tensor(step_index % FLAGS.step_num),
                        translate_model.hparams)
                with tf.variable_scope(
                        'universal_transformer/body/encoder/universal_transformer_{}/rec_layer_0'
                        .format(FLAGS.ut_type)):
                    transformed_new_state = ffn_unit(
                        attention_unit(transformed_state))
                with tf.variable_scope('universal_transformer/body/encoder'):
                    if (step_index + 1) % FLAGS.step_num == 0:
                        transformed_new_state = common_layers.layer_preprocess(
                            transformed_new_state, translate_model.hparams)

                        if step_index == 5:
                            print(transformed_new_state)

                transformed_state = transformed_new_state
            storing_list = np.asarray(storing_list)
            np.save(FLAGS.save_dir, storing_list)
def main():

  FLAGS = Args()

  # Enable TF Eager execution
  tfe = tf.contrib.eager
  tfe.enable_eager_execution()

  # sample sentence
  input_str = 'Twas brillig, and the slithy toves Did gyre and gimble in the wade; All mimsy were the borogoves, And the mome raths outgrabe.'

  # convert sentence into index in vocab
  wmt_problem = problems.problem(FLAGS.problem)
  encoders = wmt_problem.feature_encoders(FLAGS.data_dir)
  inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
  batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
  features = {"inputs": batch_inputs}

  # initialize translation model
  hparams_set = FLAGS.hparams_set
  Modes = tf.estimator.ModeKeys
  hparams = trainer_lib.create_hparams(hparams_set, data_dir=FLAGS.data_dir, problem_name=FLAGS.problem)
  translate_model = registry.model(FLAGS.model)(hparams, Modes.EVAL)

  # recover parameters and conduct recurrent conduction
  ckpt_dir = tf.train.latest_checkpoint(FLAGS.model_dir)

  with tfe.restore_variables_on_create(ckpt_dir):
    with variable_scope.EagerVariableStore().as_default():
      with tf.variable_scope('universal_transformer'):
        # Convert word index to word embedding
        features = translate_model.bottom(features)

      with tf.variable_scope('universal_transformer/body'):
        input_tensor = tf.convert_to_tensor(features['inputs'])
        input_tensor = common_layers.flatten4d3d(input_tensor)
        encoder_input, self_attention_bias, _ = (
          transformer.transformer_prepare_encoder(
            input_tensor, tf.convert_to_tensor([0]), translate_model.hparams, features=None))

      with tf.variable_scope('universal_transformer/body/encoder'):

        ffn_unit = functools.partial(
          universal_transformer_util.transformer_encoder_ffn_unit,
          hparams=translate_model.hparams)

        attention_unit = functools.partial(
          universal_transformer_util.transformer_encoder_attention_unit,
          hparams=translate_model.hparams,
          encoder_self_attention_bias=None,
          attention_dropout_broadcast_dims=[],
          save_weights_to={},
          make_image_summary=True)

      storing_list = []
      transformed_state = encoder_input
      for step_index in range(1024):
        storing_list.append(transformed_state.numpy())

        with tf.variable_scope('universal_transformer/body/encoder/universal_transformer_{}'.format(FLAGS.ut_type)):
          transformed_state = universal_transformer_util.step_preprocess(
            transformed_state,
            tf.convert_to_tensor(step_index % FLAGS.step_num),
            translate_model.hparams
          )
        with tf.variable_scope('universal_transformer/body/encoder/universal_transformer_{}/rec_layer_0'.format(FLAGS.ut_type)):
          transformed_new_state = ffn_unit(attention_unit(transformed_state))
        with tf.variable_scope('universal_transformer/body/encoder'):
          if (step_index + 1) % FLAGS.step_num == 0:
            transformed_new_state = common_layers.layer_preprocess(transformed_new_state, translate_model.hparams)

            if step_index == 5:
              print(transformed_new_state)

        transformed_state = transformed_new_state
      storing_list = np.asarray(storing_list)
      np.save(FLAGS.save_dir, storing_list)
예제 #33
0
    def encode(self,
               inputs,
               target_space,
               hparams,
               features=None,
               losses=None,
               **kwargs):
        """Encode Universal Transformer inputs.
    It is similar to "transformer.encode", but it uses
    "universal_transformer_util.universal_transformer_encoder" instead of
    "transformer.transformer_encoder".
    Args:
      inputs: Transformer inputs [batch_size, input_length, input_height,
        hidden_dim] which will be flattened along the two spatial dimensions.
      target_space: scalar, target space ID.
      hparams: hyperparmeters for model.
      features: optionally pass the entire features dictionary as well.
        This is needed now for "packed" datasets.
      losses: Unused.
      **kwargs: additional arguments to pass to encoder_function
    Returns:
      Tuple of:
          encoder_output: Encoder representation.
              [batch_size, input_length, hidden_dim]
          encoder_decoder_attention_bias: Bias and mask weights for
              encoder-decoder attention. [batch_size, input_length]
          encoder_extra_output: which is extra encoder output used in some
            variants of the model (e.g. in ACT, to pass the ponder-time to body)
    """

        ####
        ## DEBUG
        ####
        # with open("invertible_UT_params.json", "w") as f:
        #   json.dump(dict(hparams.__dict__), f, default=lambda o: '<not serializable>', sort_keys=True,
        #             indent=4, separators=(',', ': '))
        # sys.exit()

        del losses

        inputs = common_layers.flatten4d3d(inputs)

        encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
            transformer.transformer_prepare_encoder(inputs,
                                                    target_space,
                                                    hparams,
                                                    features=features))

        encoder_input = tf.nn.dropout(
            encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        (encoder_output, encoder_extra_output) = (invertible_UT_encoder(
            encoder_input,
            self_attention_bias,
            hparams,
            nonpadding=transformer.features_to_nonpadding(features, "inputs"),
            save_weights_to=self.attention_weights))

        for var in tf.trainable_variables():
            print(var)

        return encoder_output, encoder_decoder_attention_bias, encoder_extra_output
예제 #34
0
    def encode(self, encoder_input, target_space, hparams):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        config_file = os.path.join(dir_path, "config.yml")
        config = yaml.load(open(config_file))
        enc_name = config["model_params"].split('_')[0][3:]

        if enc_name == "simple":
            encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias = transformer.transformer_prepare_encoder(
                encoder_input, target_space, hparams)
            encoder_input = tf.nn.dropout(
                encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
            encoder_output = transformer.transformer_encoder(
                encoder_input, encoder_self_attention_bias, hparams)
        else:
            encoder_input, encoder_self_attention_bias_slices, encoder_decoder_attention_bias_slices = parallel_transformer_prepare_encoder(
                encoder_input, target_space, hparams)
            encoder_input = tf.nn.dropout(
                encoder_input, 1.0 - hparams.layer_prepostprocess_dropout)
            encoder_output = getattr(encode_fn, enc_name)(
                encoder_input, encoder_self_attention_bias_slices, hparams,
                "encoder")
            encoder_decoder_attention_bias = tf.stack(
                encoder_decoder_attention_bias_slices)
            encoder_decoder_attention_bias = tf.reduce_mean(
                encoder_decoder_attention_bias, 0)
        return encoder_output, encoder_decoder_attention_bias
예제 #35
0
def vae_transformer_internal(inputs, targets, target_space, hparams):
    """VAE Transformer, main step used for training."""
    with tf.variable_scope("vae_transformer"):
        is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN
        # Prepare inputs, targets, and k.
        inputs = common_layers.flatten4d3d(inputs)
        targets = common_layers.flatten4d3d(targets)
        k = 2**hparams.num_compress_steps
        _, targets = common_layers.pad_to_same_length(
            inputs, targets, final_length_divisible_by=k)

        # Transformer preparations and encoder.
        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias
         ) = transformer.transformer_prepare_encoder(inputs, target_space,
                                                     hparams)
        residual_fn = transformer.get_residual_fn(hparams)
        encoder_input = tf.nn.dropout(encoder_input,
                                      1.0 - hparams.residual_dropout)
        encoder_output = transformer.transformer_encoder(
            encoder_input, residual_fn, encoder_self_attention_bias, hparams)

        def get_decoder_autoregressive():
            """Decoder input for autoregressive computation."""
            (a, b) = transformer.transformer_prepare_decoder(targets, hparams)
            return (a, b, tf.constant(0.0))

        # 10% of the time we compress all-zeros, as will be at decoding start.
        prob_targets = 0.9 if is_training else 1.0
        to_compress = tf.cond(tf.less(tf.random_uniform([]), prob_targets),
                              lambda: targets, lambda: tf.zeros_like(targets))
        z, kl_loss = compress_vae(to_compress, hparams, "vae")
        # Decompress.
        for i in xrange(hparams.num_compress_steps):
            j = hparams.num_hidden_layers - i - 1
            z = decompress(z, hparams, "decompress_%d" % j)

        def get_decoder_from_vae():
            """Decoder input computed by VAE."""
            # Return decoder stuff.
            (a, b) = transformer.transformer_prepare_decoder(
                tf.squeeze(z, axis=2), hparams)
            return (a, b, kl_loss)

        # Randomize decoder inputs..
        prob_do_vae = common_layers.inverse_exp_decay(40000) * 0.7
        step = tf.to_float(tf.contrib.framework.get_global_step())
        if not is_training:
            prob_do_vae = tf.cond(tf.less(step,
                                          40000.0), lambda: tf.constant(0.0),
                                  lambda: tf.constant(1.0))
        (decoder_input, decoder_self_attention_bias,
         kl_loss2) = tf.cond(tf.less(tf.random_uniform([]), prob_do_vae),
                             get_decoder_from_vae, get_decoder_autoregressive)

        # Transformer decoder.
        decoder_output = transformer.transformer_decoder(
            decoder_input, encoder_output, residual_fn,
            decoder_self_attention_bias, encoder_decoder_attention_bias,
            hparams)
        decoder_output = tf.expand_dims(decoder_output, 2)

        cond_self = tf.cond(tf.less(step, 30000.0), lambda: tf.constant(1.0),
                            lambda: tf.constant(0.0))
        prob_self = 0.4 if is_training else cond_self
        (ret, kl_loss) = tf.cond(tf.less(tf.random_uniform([]),
                                         prob_self), lambda: (z, kl_loss),
                                 lambda: (decoder_output, kl_loss2))

        kl_loss *= common_layers.inverse_exp_decay(50000) * 2.0
        return ret, kl_loss
예제 #36
0
def encode_decode_task(features, hparams, train, attention_weights=None):
    """Model core graph for the one-shot action.

  Args:
    features: a dictionary contains "inputs" that is a tensor in shape of
        [batch_size, num_tokens], "verb_id_seq" that is in shape of
        [batch_size, num_actions], "object_spans" and "param_span" tensor
        in shape of [batch_size, num_actions, 2]. 0 is used as padding or
        non-existent values.
    hparams: the general hyperparameters for the model.
    train: the train mode.
    attention_weights: the dict to keep attention weights for analysis.
  Returns:
    loss_dict: the losses for training.
    prediction_dict: the predictions for action tuples.
    areas: the area encodings of the task.
    scope: the embedding scope.
  """
    del train
    input_embeddings, scope = common_embed.embed_tokens(
        features["task"], hparams.task_vocab_size, hparams.hidden_size,
        hparams)
    with tf.variable_scope("encode_decode", reuse=tf.AUTO_REUSE):
        encoder_nonpadding = tf.minimum(tf.to_float(features["task"]), 1.0)
        input_embeddings = tf.multiply(tf.expand_dims(encoder_nonpadding, 2),
                                       input_embeddings)
        encoder_input, self_attention_bias, encoder_decoder_attention_bias = (
            transformer.transformer_prepare_encoder(input_embeddings,
                                                    None,
                                                    hparams,
                                                    features=None))
        encoder_input = tf.nn.dropout(encoder_input,
                                      keep_prob=1.0 -
                                      hparams.layer_prepostprocess_dropout)
        if hparams.instruction_encoder == "transformer":
            encoder_output = transformer.transformer_encoder(
                encoder_input,
                self_attention_bias,
                hparams,
                save_weights_to=attention_weights,
                make_image_summary=not common_layers.is_xla_compiled())
        else:
            raise ValueError("Unsupported instruction encoder %s" %
                             (hparams.instruction_encoder))
        span_rep = hparams.get("span_rep", "area")
        area_encodings, area_starts, area_ends = area_utils.compute_sum_image(
            encoder_output, max_area_width=hparams.max_span)
        current_shape = tf.shape(area_encodings)
        if span_rep == "area":
            area_encodings, _, _ = area_utils.compute_sum_image(
                encoder_output, max_area_width=hparams.max_span)
        elif span_rep == "basic":
            area_encodings = area_utils.compute_alternative_span_rep(
                encoder_output,
                input_embeddings,
                max_area_width=hparams.max_span,
                hidden_size=hparams.hidden_size,
                advanced=False)
        elif span_rep == "coref":
            area_encodings = area_utils.compute_alternative_span_rep(
                encoder_output,
                input_embeddings,
                max_area_width=hparams.max_span,
                hidden_size=hparams.hidden_size,
                advanced=True)
        else:
            raise ValueError("xyz")
        areas = {}
        areas["encodings"] = area_encodings
        areas["starts"] = area_starts
        areas["ends"] = area_ends
        with tf.control_dependencies([
                tf.print("encoder_output", tf.shape(encoder_output)),
                tf.assert_equal(current_shape,
                                tf.shape(area_encodings),
                                summarize=100)
        ]):
            paddings = tf.cast(tf.less(self_attention_bias, -1), tf.int32)
        padding_sum, _, _ = area_utils.compute_sum_image(
            tf.expand_dims(tf.squeeze(paddings, [1, 2]), 2),
            max_area_width=hparams.max_span)
        num_areas = common_layers.shape_list(area_encodings)[1]
        area_paddings = tf.reshape(tf.minimum(tf.to_float(padding_sum), 1.0),
                                   [-1, num_areas])
        areas["bias"] = area_paddings
        decoder_nonpadding = tf.to_float(
            tf.greater(features["verb_refs"][:, :, 1],
                       features["verb_refs"][:, :, 0]))
        if hparams.instruction_encoder == "lstm":
            hparams_decoder = copy.copy(hparams)
            hparams_decoder.set_hparam("pos", "none")
        else:
            hparams_decoder = hparams
        decoder_input, decoder_self_attention_bias = _prepare_decoder_input(
            area_encodings,
            decoder_nonpadding,
            features,
            hparams_decoder,
            embed_scope=scope)
        decoder_input = tf.nn.dropout(decoder_input,
                                      keep_prob=1.0 -
                                      hparams.layer_prepostprocess_dropout)
        if hparams.instruction_decoder == "transformer":
            decoder_output = transformer.transformer_decoder(
                decoder_input=decoder_input,
                encoder_output=encoder_output,
                decoder_self_attention_bias=decoder_self_attention_bias,
                encoder_decoder_attention_bias=encoder_decoder_attention_bias,
                hparams=hparams_decoder)
        else:
            raise ValueError("Unsupported instruction encoder %s" %
                             (hparams.instruction_encoder))
        return decoder_output, decoder_nonpadding, areas, scope