예제 #1
0
  def transform(self, inputs, mode):
    embeddings = tf.get_variable(
        "w_char_embs", shape=[self.vocabulary_size, self.embedding_size], dtype=self.dtype)

    outputs = embedding_lookup(embeddings, inputs)
    outputs = tf.layers.dropout(
        outputs,
        rate=self.dropout,
        training=mode == tf.estimator.ModeKeys.TRAIN)

    # Merge batch and sequence timesteps dimensions.
    outputs = tf.reshape(outputs, [-1, tf.shape(inputs)[-1], self.embedding_size])

    # Pad on both sides.
    outputs = tf.pad(outputs, [[0, 0], [self.kernel_size - 1, self.kernel_size - 1], [0, 0]])
    outputs.set_shape((None, None, self.embedding_size))

    outputs = tf.layers.conv1d(
        outputs,
        self.num_outputs,
        self.kernel_size,
        strides=self.stride)

    # Max pooling over depth.
    outputs = tf.reduce_max(outputs, axis=1)

    # Split batch and sequence timesteps dimensions.
    outputs = tf.reshape(outputs, [-1, tf.shape(inputs)[1], self.num_outputs])

    return outputs
예제 #2
0
  def transform(self, inputs, mode):
    try:
      embeddings = tf.get_variable("w_embs", dtype=self.dtype, trainable=self.trainable)
    except ValueError:
      # Variable does not exist yet.
      if self.embedding_file:
        pretrained = load_pretrained_embeddings(
            self.embedding_file,
            self.vocabulary_file,
            num_oov_buckets=self.num_oov_buckets,
            with_header=self.embedding_file_with_header,
            case_insensitive_embeddings=self.case_insensitive_embeddings)
        self.embedding_size = pretrained.shape[-1]

        shape = None
        initializer = tf.constant(pretrained.astype(self.dtype.as_numpy_dtype()))
      else:
        shape = [self.vocabulary_size, self.embedding_size]
        initializer = None

      embeddings = tf.get_variable(
          "w_embs",
          shape=shape,
          dtype=self.dtype,
          initializer=initializer,
          trainable=self.trainable)

    outputs = embedding_lookup(embeddings, inputs)

    outputs = tf.layers.dropout(
        outputs,
        rate=self.dropout,
        training=mode == tf.estimator.ModeKeys.TRAIN)

    return outputs
예제 #3
0
 def encode(self, reuse=None):
     input_ids, input_length = self.input_ids, self.input_length
     input_ = embedding_lookup(self.src_emb, input_ids)
     with tf.variable_scope("encoder", reuse=reuse):
         return self.encoder.encode(input_,
                                    sequence_length=input_length,
                                    mode=self.mode)
예제 #4
0
 def encode(self, positions, depth, dtype=tf.float32):
   positions = tf.minimum(positions, self.maximum_position)
   embeddings_grad_mask = tf.concat([tf.zeros(shape=[1, depth], dtype=dtype), tf.ones(
       shape=[self.maximum_position, depth], dtype=dtype)], axis=0)
   embeddings = tf.get_variable(
       "w_embs", shape=[self.maximum_position + 1, depth], dtype=dtype,
       initializer=tf.random_normal_initializer(0, 0.1))
   # The gradient for vector at the position of padding is always zero.
   embeddings = scale_gradient(embeddings, embeddings_grad_mask)
   return embedding_lookup(embeddings, positions)
예제 #5
0
  def _embed(self, inputs, mode):
    embeddings = tf.get_variable(
        "w_char_embs", shape=[self.vocabulary_size, self.embedding_size], dtype=self.dtype)

    outputs = embedding_lookup(embeddings, inputs)
    outputs = tf.layers.dropout(
        outputs,
        rate=self.dropout,
        training=mode == tf.estimator.ModeKeys.TRAIN)
    return outputs
예제 #6
0
def get_embedding_fn(embedding):
    """Returns the embedding function.

  Args:
    embedding: The embedding tensor or a callable that takes word ids.

  Returns:
    A callable that takes word ids.
  """
    if callable(embedding):
        return embedding
    else:
        return lambda ids: embedding_lookup(embedding, ids)
예제 #7
0
 def encode(self, positions, depth, dtype=tf.float32):
   positions = tf.minimum(positions, self.maximum_position)
   embeddings = tf.get_variable(
       "w_embs", shape=[self.maximum_position + 1, depth], dtype=dtype)
   return embedding_lookup(embeddings, positions)
예제 #8
0
    def decode(self, encoder_outputs_tuple, output_layer=None, reuse=False):
        (encoder_outputs, encoder_state, encoder_sequence_length) = encoder_outputs_tuple
        self.encoder_outputs = encoder_outputs
        input_ids, target_ids_in, target_length_in = self.input_ids, self.target_ids_in, self.target_length_in_or_out

        with tf.variable_scope("decoder", reuse=reuse):
            if output_layer is None:
                output_layer = tf.layers.Dense(self.tgt_vocab_size)
            output_layer.build([None, encoder_outputs.get_shape()[-1]])

            predictions = None
            logits = None
            if self.mode != constants.INFER:
                target_in = embedding_lookup(self.tgt_emb, target_ids_in)
                logits, _, _ = self.decoder.decode(
                    target_in,
                    target_length_in,
                    vocab_size=self.tgt_vocab_size,
                    initial_state=encoder_state,
                    mode=self.mode,
                    memory=encoder_outputs,
                    memory_sequence_length=encoder_sequence_length,
                    output_layer=output_layer)
            else:
                batch_size = tf.shape(encoder_sequence_length)[0]
                maximum_iterations = self.params.get("maximum_iterations", 30)
                start_tokens = tf.fill([batch_size], constants.START_OF_SENTENCE_ID)
                end_token = constants.END_OF_SENTENCE_ID

                decode_type = self.params.get("decode_type", constants.GREEDY)
                decode_width = self.params.get("decode_width", 1)
                if decode_type == constants.RANDOM:
                    print("random decode_width:", decode_width)
                    tile_start_tokens = tf.contrib.seq2seq.tile_batch(start_tokens, multiplier=decode_width)
                    tile_encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=decode_width)
                    tile_encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=decode_width)
                    tile_encoder_sequence_length = tf.contrib.seq2seq.tile_batch(encoder_sequence_length,
                                                                                 multiplier=decode_width)
                    sampled_ids, _, sampled_length, log_probs, alignment = self.decoder.dynamic_decode(
                        self.tgt_emb,
                        tile_start_tokens,
                        end_token,
                        vocab_size=self.tgt_vocab_size,
                        initial_state=tile_encoder_state,
                        output_layer=output_layer,
                        maximum_iterations=maximum_iterations,
                        mode=self.mode,
                        memory=tile_encoder_outputs,
                        memory_sequence_length=tile_encoder_sequence_length,
                        return_alignment_history=True,
                        sample_from=0,
                        # penalize_previous_words=True  # True for Transformer
                    )
                    sampled_ids = tf.reshape(sampled_ids, (batch_size, decode_width, -1))
                    sampled_length = tf.reshape(sampled_length, (batch_size, decode_width))
                    log_probs = tf.reshape(log_probs, (batch_size, decode_width))
                elif decode_type == constants.BEAM:
                    sampled_ids, _, sampled_length, log_probs, alignment = \
                        self.decoder.dynamic_decode_and_search(
                            self.tgt_emb,
                            start_tokens,
                            end_token,
                            vocab_size=self.tgt_vocab_size,
                            initial_state=encoder_state,
                            output_layer=output_layer,
                            beam_width=decode_width,
                            maximum_iterations=maximum_iterations,
                            mode=self.mode,
                            memory=encoder_outputs,
                            memory_sequence_length=encoder_sequence_length,
                            return_alignment_history=True)
                elif decode_type == constants.GREEDY or decode_width <= 1:
                    sampled_ids, _, sampled_length, log_probs, alignment = self.decoder.dynamic_decode(
                        self.tgt_emb,
                        start_tokens,
                        end_token,
                        vocab_size=self.tgt_vocab_size,
                        initial_state=encoder_state,
                        output_layer=output_layer,
                        maximum_iterations=maximum_iterations,
                        mode=self.mode,
                        memory=encoder_outputs,
                        memory_sequence_length=encoder_sequence_length,
                        return_alignment_history=True)

                target_tokens = self.tgt_vocab_rev.lookup(tf.cast(sampled_ids, tf.int64))
                predictions = {
                    "ids": sampled_ids,
                    "tokens": target_tokens,
                    "length": sampled_length,
                    "log_probs": log_probs}
        return logits, predictions