Esempio n. 1
0
    def encode(self, inputs):
        inputs = tf.image.resize_images(
            images=inputs,
            size=[self.params["resize_height"], self.params["resize_width"]],
            method=tf.image.ResizeMethod.BILINEAR)

        outputs, _ = inception_v3_base(tf.to_float(inputs))
        output_shape = outputs.get_shape()  #pylint: disable=E1101
        shape_list = output_shape.as_list()

        # Take attentin over output elemnts in width and height dimension:
        # Shape: [B, W*H, ...]
        outputs_flat = tf.reshape(outputs, [shape_list[0], -1, shape_list[-1]])

        # Final state is the pooled output
        # Shape: [B, W*H*...]
        final_state = tf.contrib.slim.avg_pool2d(outputs,
                                                 output_shape[1:3],
                                                 padding="VALID",
                                                 scope="pool")
        final_state = tf.contrib.slim.flatten(outputs, scope="flatten")

        return EncoderOutput(outputs=outputs_flat,
                             final_state=final_state,
                             attention_values=outputs_flat,
                             attention_values_length=tf.shape(outputs_flat)[1])
  def encode(self, inputs, sequence_length):
    if self.params["position_embeddings.enable"]:
      positions_embed = _create_position_embedding(
          embedding_dim=inputs.get_shape().as_list()[-1],
          num_positions=self.params["position_embeddings.num_positions"],
          lengths=sequence_length,
          maxlen=tf.shape(inputs)[1])
      inputs = self._combiner_fn(inputs, positions_embed)

    # Apply dropout
    inputs = tf.contrib.layers.dropout(
        inputs=inputs,
        keep_prob=self.params["dropout_keep_prob"],
        is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)

    outputs = self._pooling_fn(
        inputs=inputs,
        pool_size=self.params["pool_size"],
        strides=self.params["strides"],
        padding="SAME")

    # Final state is the average representation of the pooled embeddings
    final_state = tf.reduce_mean(outputs, 1)

    return EncoderOutput(
        outputs=outputs,
        final_state=final_state,
        attention_values=inputs,
        attention_values_length=sequence_length)
Esempio n. 3
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        self.params["rnn_cell"]["distributed"] = False
        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            0)
        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            self.params["rnn_cell"]["num_layers"])
        if self.params["rnn_cell"][
                "device_name"] == training_utils.getDeviceName(0):
            self.params["rnn_cell"][
                "device_name"] = training_utils.getDeviceName(
                    1
                )  # to ensure the backward cell is working on aniother GPU
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            **kwargs)

        # Concatenate outputs and states of the forward and backward RNNs
        outputs_concat = tf.concat(outputs, 2)

        return EncoderOutput(outputs=outputs_concat,
                             final_state=states,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 4
0
    def initialize(self, name=None):

        finished = tf.tile([False], [self.config.beam_width])

        start_tokens_batch = tf.fill([self.config.beam_width],
                                     self.start_tokens)
        first_inputs = tf.nn.embedding_lookup(self.target_embedding,
                                              start_tokens_batch)
        first_inputs = tf.expand_dims(first_inputs, 1)
        zeros_padding = tf.zeros([
            self.config.beam_width, self.params['max_decode_length'] - 1,
            self.target_embedding.get_shape().as_list()[-1]
        ])
        first_inputs = tf.concat([first_inputs, zeros_padding], axis=1)

        outputs = tf.tile(self.initial_state.outputs,
                          [self.config.beam_width, 1, 1])
        attention_values = tf.tile(self.initial_state.attention_values,
                                   [self.config.beam_width, 1, 1])
        enc_output = EncoderOutput(
            outputs=outputs,
            final_state=self.initial_state.final_state,
            attention_values=attention_values,
            attention_values_length=self.initial_state.attention_values_length)

        return finished, first_inputs, enc_output
Esempio n. 5
0
    def encode(self, inputs, sequence_length, **kwargs):

        print("\n--------------------------\ninputs to encode\n" +
              str(inputs) + "\n\n")

        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            **kwargs)

        # Concatenate outputs and states of the forward and backward RNNs
        outputs_concat = tf.concat(outputs, 2)

        print("\noutput state tensor\n" + str(outputs_concat) + "\n\n")
        print("\nfinal state tensor\n" + str(states) + "\n\n")

        return EncoderOutput(outputs=outputs_concat,
                             final_state=states,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 6
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        cells_fw = _unpack_cell(cell_fw)
        cells_bw = _unpack_cell(cell_bw)

        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=inputs,
            dtype=tf.float32,
            sequence_length=sequence_length,
            **kwargs)
        outputs_concat, _output_state_fw, _output_state_bw = result
        final_state = (_output_state_fw, _output_state_bw)
        return EncoderOutput(outputs=outputs_concat,
                             final_state=final_state,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 7
0
    def encode(self, inputs, sequence_length):

        embed_size = inputs.get_shape().as_list()[-1]

        if self.params["position_embeddings.enable"]:
            positions_embed = self._create_position_embedding(
                lengths=sequence_length,  # tensor, data lengths
                maxlen=tf.shape(inputs)[1])  # max len in this batch
            inputs = self._combiner_fn(inputs, positions_embed)

        # Apply dropout to embeddings
        inputs = tf.contrib.layers.dropout(
            inputs=inputs,
            keep_prob=self.params["embedding_dropout_keep_prob"],
            is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)

        with tf.variable_scope("encoder_cnn"):
            next_layer = inputs
            if self.params["cnn.layers"] > 0:
                nhids_list = parse_list_or_default(
                    self.params["cnn.nhids"], self.params["cnn.layers"],
                    self.params["cnn.nhid_default"])
                kwidths_list = parse_list_or_default(
                    self.params["cnn.kwidths"], self.params["cnn.layers"],
                    self.params["cnn.kwidth_default"])

                # mapping emb dim to hid dim
                next_layer = linear_mapping_weightnorm(
                    next_layer,
                    nhids_list[0],
                    dropout=self.params["embedding_dropout_keep_prob"],
                    var_scope_name="linear_mapping_before_cnn")
                next_layer = conv_encoder_stack(
                    next_layer,
                    nhids_list,
                    kwidths_list, {
                        'src': self.params["embedding_dropout_keep_prob"],
                        'hid': self.params["nhid_dropout_keep_prob"]
                    },
                    mode=self.mode)

                next_layer = linear_mapping_weightnorm(
                    next_layer,
                    embed_size,
                    var_scope_name="linear_mapping_after_cnn")
            ## The encoder stack will receive gradients *twice* for each attention pass: dot product and weighted sum.
            ##cnn = nn.GradMultiply(cnn, 1 / (2 * nattn))
            cnn_c_output = (next_layer + inputs) * tf.sqrt(0.5)

        final_state = tf.reduce_mean(cnn_c_output, 1)

        return EncoderOutput(outputs=next_layer,
                             final_state=final_state,
                             attention_values=cnn_c_output,
                             attention_values_length=sequence_length)
Esempio n. 8
0
 def encode(self, inputs, sequence_length, **kwargs):
   cell = training_utils.get_rnn_cell(**self.params["rnn_cell"])
   outputs, state = tf.nn.dynamic_rnn(
       cell=cell,
       inputs=inputs,
       sequence_length=sequence_length,
       dtype=tf.float32,
       **kwargs)
   return EncoderOutput(
       outputs=outputs,
       final_state=state,
       attention_values=outputs,
       attention_values_length=sequence_length)
Esempio n. 9
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(tf.random_uniform_initializer(
            -self.params["init_scale"],
            self.params["init_scale"]))

        embedding_size = inputs.get_shape().as_list()[-1]  # TODO: Different size for words and context

        self.params["rnn_cell"]["cell_params"]["num_units"] = embedding_size + self.positional_embedding_size

        inner_cell = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        cell = AttentionRNNCell(inner_cell,
                                embedding_size,
                                self.positional_embedding_size,
                                self.attention_num_layers,
                                self.attention_num_units)

        positional_embeddings_var = tf.get_variable("positional_embeddings",
                                                    [self.max_sequence_length, self.positional_embedding_size],
                                                    dtype=tf.float32)  # TODO: Make dtype configurable

        position_sequence = tf.range(tf.shape(inputs)[1])

        positional_embeddings = tf.nn.embedding_lookup(positional_embeddings_var, position_sequence)

        positional_embeddings = tf.expand_dims(positional_embeddings, axis=0)

        positional_embeddings_for_batch = tf.tile(positional_embeddings, [tf.shape(inputs)[0], 1, 1])

        initial_state_0 = tf.zeros([tf.shape(inputs)[0], inner_cell.state_size])

        initial_state_1 = tf.concat([inputs, positional_embeddings_for_batch], axis=2)

        initial_state_2 = tf.concat([inputs, positional_embeddings_for_batch], axis=2)

        initial_state = (initial_state_0, initial_state_1, initial_state_2)

        outputs, state = tf.nn.dynamic_rnn(
            cell=cell,
            inputs=tf.zeros([tf.shape(inputs)[0], tf.shape(inputs)[1] * 1, 1], tf.float32),
            # Todo : Make this * 1 configurable
            initial_state=initial_state,
            sequence_length=sequence_length * 1,  # Todo : Make this 1 configurable
            dtype=tf.float32,
            **kwargs)

        return EncoderOutput(
            outputs=state[2],
            final_state=state[0],
            attention_values=state[2],
            attention_values_length=tf.ones([tf.shape(inputs)[0]], dtype=tf.int32) * tf.shape(inputs)[1])
Esempio n. 10
0
  def encode(self, inputs, sequence_length):
    if self.params["position_embeddings.enable"]:
      positions_embed = _create_position_embedding(
          embedding_dim=inputs.get_shape().as_list()[-1],
          num_positions=self.params["position_embeddings.num_positions"],
          lengths=sequence_length,
          maxlen=tf.shape(inputs)[1])
      inputs = self._combiner_fn(inputs, positions_embed)

    # Apply dropout to embeddings
    inputs = tf.contrib.layers.dropout(
        inputs=inputs,
        keep_prob=self.params["embedding_dropout_keep_prob"],
        is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)

    with tf.variable_scope("cnn_a"):
      cnn_a_output = inputs
      for layer_idx in range(self.params["attention_cnn.layers"]):
        next_layer = tf.contrib.layers.conv2d(
            inputs=cnn_a_output,
            num_outputs=self.params["attention_cnn.units"],
            kernel_size=self.params["attention_cnn.kernel_size"],
            padding="SAME",
            activation_fn=None)
        # Add a residual connection, except for the first layer
        if layer_idx > 0:
          next_layer += cnn_a_output
        cnn_a_output = tf.tanh(next_layer)

    with tf.variable_scope("cnn_c"):
      cnn_c_output = inputs
      for layer_idx in range(self.params["output_cnn.layers"]):
        next_layer = tf.contrib.layers.conv2d(
            inputs=cnn_c_output,
            num_outputs=self.params["output_cnn.units"],
            kernel_size=self.params["output_cnn.kernel_size"],
            padding="SAME",
            activation_fn=None)
        # Add a residual connection, except for the first layer
        if layer_idx > 0:
          next_layer += cnn_c_output
        cnn_c_output = tf.tanh(next_layer)

    final_state = tf.reduce_mean(cnn_c_output, 1)

    return EncoderOutput(
        outputs=cnn_a_output,
        final_state=final_state,
        attention_values=cnn_c_output,
        attention_values_length=sequence_length)
Esempio n. 11
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        cell = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        outputs, state = tf.nn.dynamic_rnn(cell=cell,
                                           inputs=inputs,
                                           sequence_length=sequence_length,
                                           dtype=tf.float32,
                                           **kwargs)
        return EncoderOutput(outputs=outputs,
                             final_state=state,
                             attention_values=outputs,
                             attention_values_length=sequence_length)
Esempio n. 12
0
    def encode(self, features, labels):
        # 1. query source encoder sequence output
        query_embedded = tf.nn.embedding_lookup(self.source_embedding,
                                                features["source_ids"])
        query_encoder_fn = self.encoder_class(self.params["encoder.params"],
                                              self.mode)
        query_output = query_encoder_fn(query_embedded, features["source_len"])
        # return query_output
        # 2. candidate source encoder sequence output
        candidate_embedded = tf.nn.embedding_lookup(
            self.source_candidate_embedding, features["source_candidate_ids"])
        candidate_encoder_fn = self.encoder_class(
            self.params["encoder.params"], self.mode)
        candidate_output = candidate_encoder_fn(
            candidate_embedded, features["source_candidate_len"])

        print("query_output:{}".format(query_output))
        print("candidate_output:{}".format(candidate_output))
        # 3. merge two encoder generated output
        # outputs = tf.concat([query_output.outputs, candidate_output.outputs], 0)
        #final_state = tf.reshape(tf.concat([query_output.final_state, candidate_output.final_state], 0), [-1, 128])
        # final_state = tf.concat([query_output.final_state, candidate_output.final_state], 0)
        # final_state = (tf.concat([query_output.final_state[0], candidate_output.final_state[0]], 0),
        #                tf.concat([query_output.final_state[1], candidate_output.final_state[1]], 0))

        # attention_values = tf.concat([query_output.attention_values, candidate_output.attention_values], 0)
        # att_v_len = tf.concat([query_output.attention_values_length, candidate_output.attention_values_length], 0)
        #
        outputs = query_output.outputs + candidate_output.outputs
        final_state = (query_output.final_state[0] +
                       candidate_output.final_state[0],
                       query_output.final_state[1] +
                       candidate_output.final_state[1])
        attention_values = query_output.attention_values + candidate_output.attention_values
        att_v_len = query_output.attention_values_length + candidate_output.attention_values_length

        # outputs = query_output.outputs
        # final_state = query_output.final_state
        # attention_values = query_output.attention_values
        # att_v_len = query_output.attention_values_length

        encoderOutput = EncoderOutput(outputs=outputs,
                                      final_state=final_state,
                                      attention_values=attention_values,
                                      attention_values_length=att_v_len)
        # print("encoderOutput:{}".format(encoderOutput))
        return encoderOutput
Esempio n. 13
0
    def encode(self, inputs, sequence_length, **kwargs):
        CONTEXT_SIZE = 10
        inputs_c = tf.slice(inputs, [0, 0, 0], [-1, CONTEXT_SIZE, -1])
        inputs_p = tf.slice(inputs, [0, CONTEXT_SIZE, 0], [-1, -1, -1])

        with tf.variable_scope("encoder_c"):
            scope = tf.get_variable_scope()
            scope.set_initializer(
                tf.random_uniform_initializer(-self.params["init_scale"],
                                              self.params["init_scale"]))
            cell_fw_c = training_utils.get_rnn_cell(**self.params["rnn_cell"])
            cell_bw_c = training_utils.get_rnn_cell(**self.params["rnn_cell"])
            outputs_c, states_c = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw_c,
                cell_bw=cell_bw_c,
                inputs=inputs_c,
                sequence_length=sequence_length,
                dtype=tf.float32,
                **kwargs)

        with tf.variable_scope("encoder_p"):
            scope = tf.get_variable_scope()
            scope.set_initializer(
                tf.random_uniform_initializer(-self.params["init_scale"],
                                              self.params["init_scale"]))
            cell_fw_p = training_utils.get_rnn_cell(**self.params["rnn_cell"])
            cell_bw_p = training_utils.get_rnn_cell(**self.params["rnn_cell"])
            outputs_p, states_p = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw_p,
                cell_bw=cell_bw_p,
                inputs=inputs_p,
                sequence_length=sequence_length,
                dtype=tf.float32,
                **kwargs)

        # Concatenate outputs and states of the forward and backward RNNs
        outputs_concat_c = tf.concat(outputs_c, 2)
        outputs_concat_p = tf.concat(outputs_p, 2)

        final_output = tf.concat([outputs_concat_c, outputs_concat_p], 1)
        final_states = states_c + states_p
        return EncoderOutput(outputs=final_output,
                             final_state=final_states,
                             attention_values=final_output,
                             attention_values_length=sequence_length)
Esempio n. 14
0
  def encode(self, inputs, sequence_length, **kwargs):
    cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
    cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
    outputs, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=cell_fw,
        cell_bw=cell_bw,
        inputs=inputs,
        sequence_length=sequence_length,
        dtype=tf.float32,
        **kwargs)

    # Concatenate outputs and states of the forward and backward RNNs
    outputs_concat = tf.concat(outputs, 2)

    return EncoderOutput(
        outputs=outputs_concat,
        final_state=states,
        attention_values=outputs_concat,
        attention_values_length=sequence_length)
Esempio n. 15
0
  def encode(self, inputs, sequence_length, **kwargs):
    cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
    cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

    cells_fw = _unpack_cell(cell_fw)
    cells_bw = _unpack_cell(cell_bw)

    result = rnn.stack_bidirectional_dynamic_rnn(
        cells_fw=cells_fw,
        cells_bw=cells_bw,
        inputs=inputs,
        dtype=tf.float32,
        sequence_length=sequence_length,
        **kwargs)
    outputs_concat, _output_state_fw, _output_state_bw = result
    final_state = (_output_state_fw, _output_state_bw)
    return EncoderOutput(
        outputs=outputs_concat,
        final_state=final_state,
        attention_values=outputs_concat,
        attention_values_length=sequence_length)
Esempio n. 16
0
    def encode(self, inputs, sequence_length):
        if self.params["position_embeddings.enable"]:
            positions_embed = _create_position_embedding(
                embedding_dim=inputs.get_shape().as_list()[-1],
                num_positions=self.params["position_embeddings.num_positions"],
                lengths=sequence_length,
                maxlen=tf.shape(inputs)[1])
            inputs = self._combiner_fn(inputs, positions_embed)

        outputs = self._pooling_fn(inputs=inputs,
                                   pool_size=self.params["pool_size"],
                                   strides=self.params["strides"],
                                   padding="SAME")

        # Final state is the average representation of the pooled embeddings
        final_state = tf.reduce_mean(outputs, 1)

        return EncoderOutput(outputs=outputs,
                             final_state=final_state,
                             attention_values=inputs,
                             attention_values_length=sequence_length)
Esempio n. 17
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        query_inputs, retrieved_inputs = tf.split(inputs, 2)

        query_cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        query_cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        query_outputs, query_states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=query_cell_fw,
            cell_bw=query_cell_bw,
            inputs=query_inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            **kwargs)

        retrieved_cell_fw = training_utils.get_rnn_cell(
            **self.params["rnn_cell"])
        retrieved_cell_bw = training_utils.get_rnn_cell(
            **self.params["rnn_cell"])
        retrieved_outputs, retrieved_states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=retrieved_cell_fw,
            cell_bw=retrieved_cell_bw,
            inputs=retrieved_inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            **kwargs)

        outputs = tf.concat([query_outputs, retrieved_outputs], 0)
        # Concatenate outputs and states of the forward and backward RNNs
        outputs_concat = tf.concat(outputs, 2)

        states = tf.concat([query_states, retrieved_states], 0)

        return EncoderOutput(outputs=outputs_concat,
                             final_state=states,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 18
0
 def setUp(self):
     super(BridgeTest, self).setUp()
     self.batch_size = 4
     self.encoder_cell = tf.contrib.rnn.MultiRNNCell(
         [tf.contrib.rnn.GRUCell(4),
          tf.contrib.rnn.GRUCell(8)])
     self.decoder_cell = tf.contrib.rnn.MultiRNNCell(
         [tf.contrib.rnn.LSTMCell(16),
          tf.contrib.rnn.GRUCell(8)])
     final_encoder_state = nest.map_structure(
         lambda x: tf.convert_to_tensor(
             value=np.random.randn(self.batch_size, x), dtype=tf.float32),
         self.encoder_cell.state_size)
     self.encoder_outputs = EncoderOutput(
         outputs=tf.convert_to_tensor(value=np.random.randn(
             self.batch_size, 10, 16),
                                      dtype=tf.float32),
         attention_values=tf.convert_to_tensor(value=np.random.randn(
             self.batch_size, 10, 16),
                                               dtype=tf.float32),
         attention_values_length=np.full([self.batch_size], 10),
         final_state=final_encoder_state)
Esempio n. 19
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        self.params["rnn_cell"]["distributed"] = False
        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            0)
        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            self.params["rnn_cell"]["num_layers"])
        if self.params["rnn_cell"][
                "device_name"] == training_utils.getDeviceName(0):
            self.params["rnn_cell"][
                "device_name"] = training_utils.getDeviceName(
                    1
                )  # to ensure the backward cell is working on aniother GPU
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        cells_fw = _unpack_cell(cell_fw)
        cells_bw = _unpack_cell(cell_bw)

        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=inputs,
            dtype=tf.float32,
            sequence_length=sequence_length,
            **kwargs)
        outputs_concat, _output_state_fw, _output_state_bw = result
        final_state = (_output_state_fw, _output_state_bw)
        return EncoderOutput(outputs=outputs_concat,
                             final_state=final_state,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 20
0
    def encode(self, inputs, sequence_length, source_images, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(tf.contrib.layers.xavier_initializer())

        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            **kwargs)

        # Concatenate outputs and states of the forward and backward RNNs
        outputs_concat = tf.concat(outputs, 2)

        with slim.arg_scope(resnet_utils.resnet_arg_scope()):
            moving_average_decay = self.params["resnet"][
                "moving_average_decay"]
            logits, end_points = _resnet_v1(
                source_images,
                states,
                num_classes=None,
                global_pool=True,
                spatial_squeeze=False,
                is_training=self.is_training,
                moving_average_decay=moving_average_decay)

        logits = tf.reshape(logits, [-1, 2048])

        return EncoderOutput(outputs=outputs_concat,
                             final_state=states,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length,
                             image_features=logits)
Esempio n. 21
0
    def encode(self, inputs, sequence_length, **kwargs):
        num_layers_2 = self.params['rnn_cell_uni']['num_layers']
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        bi_encoder_output = self.bi_encoder.encode(inputs, sequence_length,
                                                   **kwargs)
        uni_cell = training_utils.get_rnn_cell(**self.params["rnn_cell_uni"])

        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            uni_cell,
            bi_encoder_output.outputs,
            dtype=tf.float32,
            sequence_length=sequence_length)
        #time_major=self.time_major TODO needs to be checked
        encoder_state = (bi_encoder_output.final_state[1], ) + (
            (encoder_state, ) if num_layers_2 == 1 else encoder_state)

        return EncoderOutput(outputs=encoder_outputs,
                             final_state=encoder_state,
                             attention_values=encoder_outputs,
                             attention_values_length=sequence_length)