def encode(self, inputs): inputs = tf.image.resize_images( images=inputs, size=[self.params["resize_height"], self.params["resize_width"]], method=tf.image.ResizeMethod.BILINEAR) outputs, _ = inception_v3_base(tf.to_float(inputs)) output_shape = outputs.get_shape() #pylint: disable=E1101 shape_list = output_shape.as_list() # Take attentin over output elemnts in width and height dimension: # Shape: [B, W*H, ...] outputs_flat = tf.reshape(outputs, [shape_list[0], -1, shape_list[-1]]) # Final state is the pooled output # Shape: [B, W*H*...] final_state = tf.contrib.slim.avg_pool2d(outputs, output_shape[1:3], padding="VALID", scope="pool") final_state = tf.contrib.slim.flatten(outputs, scope="flatten") return EncoderOutput(outputs=outputs_flat, final_state=final_state, attention_values=outputs_flat, attention_values_length=tf.shape(outputs_flat)[1])
def encode(self, inputs, sequence_length): if self.params["position_embeddings.enable"]: positions_embed = _create_position_embedding( embedding_dim=inputs.get_shape().as_list()[-1], num_positions=self.params["position_embeddings.num_positions"], lengths=sequence_length, maxlen=tf.shape(inputs)[1]) inputs = self._combiner_fn(inputs, positions_embed) # Apply dropout inputs = tf.contrib.layers.dropout( inputs=inputs, keep_prob=self.params["dropout_keep_prob"], is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN) outputs = self._pooling_fn( inputs=inputs, pool_size=self.params["pool_size"], strides=self.params["strides"], padding="SAME") # Final state is the average representation of the pooled embeddings final_state = tf.reduce_mean(outputs, 1) return EncoderOutput( outputs=outputs, final_state=final_state, attention_values=inputs, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) self.params["rnn_cell"]["distributed"] = False self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName( 0) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName( self.params["rnn_cell"]["num_layers"]) if self.params["rnn_cell"][ "device_name"] == training_utils.getDeviceName(0): self.params["rnn_cell"][ "device_name"] = training_utils.getDeviceName( 1 ) # to ensure the backward cell is working on aniother GPU cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) return EncoderOutput(outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length)
def initialize(self, name=None): finished = tf.tile([False], [self.config.beam_width]) start_tokens_batch = tf.fill([self.config.beam_width], self.start_tokens) first_inputs = tf.nn.embedding_lookup(self.target_embedding, start_tokens_batch) first_inputs = tf.expand_dims(first_inputs, 1) zeros_padding = tf.zeros([ self.config.beam_width, self.params['max_decode_length'] - 1, self.target_embedding.get_shape().as_list()[-1] ]) first_inputs = tf.concat([first_inputs, zeros_padding], axis=1) outputs = tf.tile(self.initial_state.outputs, [self.config.beam_width, 1, 1]) attention_values = tf.tile(self.initial_state.attention_values, [self.config.beam_width, 1, 1]) enc_output = EncoderOutput( outputs=outputs, final_state=self.initial_state.final_state, attention_values=attention_values, attention_values_length=self.initial_state.attention_values_length) return finished, first_inputs, enc_output
def encode(self, inputs, sequence_length, **kwargs): print("\n--------------------------\ninputs to encode\n" + str(inputs) + "\n\n") scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) print("\noutput state tensor\n" + str(outputs_concat) + "\n\n") print("\nfinal state tensor\n" + str(states) + "\n\n") return EncoderOutput(outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cells_fw = _unpack_cell(cell_fw) cells_bw = _unpack_cell(cell_bw) result = rnn.stack_bidirectional_dynamic_rnn( cells_fw=cells_fw, cells_bw=cells_bw, inputs=inputs, dtype=tf.float32, sequence_length=sequence_length, **kwargs) outputs_concat, _output_state_fw, _output_state_bw = result final_state = (_output_state_fw, _output_state_bw) return EncoderOutput(outputs=outputs_concat, final_state=final_state, attention_values=outputs_concat, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length): embed_size = inputs.get_shape().as_list()[-1] if self.params["position_embeddings.enable"]: positions_embed = self._create_position_embedding( lengths=sequence_length, # tensor, data lengths maxlen=tf.shape(inputs)[1]) # max len in this batch inputs = self._combiner_fn(inputs, positions_embed) # Apply dropout to embeddings inputs = tf.contrib.layers.dropout( inputs=inputs, keep_prob=self.params["embedding_dropout_keep_prob"], is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN) with tf.variable_scope("encoder_cnn"): next_layer = inputs if self.params["cnn.layers"] > 0: nhids_list = parse_list_or_default( self.params["cnn.nhids"], self.params["cnn.layers"], self.params["cnn.nhid_default"]) kwidths_list = parse_list_or_default( self.params["cnn.kwidths"], self.params["cnn.layers"], self.params["cnn.kwidth_default"]) # mapping emb dim to hid dim next_layer = linear_mapping_weightnorm( next_layer, nhids_list[0], dropout=self.params["embedding_dropout_keep_prob"], var_scope_name="linear_mapping_before_cnn") next_layer = conv_encoder_stack( next_layer, nhids_list, kwidths_list, { 'src': self.params["embedding_dropout_keep_prob"], 'hid': self.params["nhid_dropout_keep_prob"] }, mode=self.mode) next_layer = linear_mapping_weightnorm( next_layer, embed_size, var_scope_name="linear_mapping_after_cnn") ## The encoder stack will receive gradients *twice* for each attention pass: dot product and weighted sum. ##cnn = nn.GradMultiply(cnn, 1 / (2 * nattn)) cnn_c_output = (next_layer + inputs) * tf.sqrt(0.5) final_state = tf.reduce_mean(cnn_c_output, 1) return EncoderOutput(outputs=next_layer, final_state=final_state, attention_values=cnn_c_output, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): cell = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, state = tf.nn.dynamic_rnn( cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) return EncoderOutput( outputs=outputs, final_state=state, attention_values=outputs, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer(tf.random_uniform_initializer( -self.params["init_scale"], self.params["init_scale"])) embedding_size = inputs.get_shape().as_list()[-1] # TODO: Different size for words and context self.params["rnn_cell"]["cell_params"]["num_units"] = embedding_size + self.positional_embedding_size inner_cell = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell = AttentionRNNCell(inner_cell, embedding_size, self.positional_embedding_size, self.attention_num_layers, self.attention_num_units) positional_embeddings_var = tf.get_variable("positional_embeddings", [self.max_sequence_length, self.positional_embedding_size], dtype=tf.float32) # TODO: Make dtype configurable position_sequence = tf.range(tf.shape(inputs)[1]) positional_embeddings = tf.nn.embedding_lookup(positional_embeddings_var, position_sequence) positional_embeddings = tf.expand_dims(positional_embeddings, axis=0) positional_embeddings_for_batch = tf.tile(positional_embeddings, [tf.shape(inputs)[0], 1, 1]) initial_state_0 = tf.zeros([tf.shape(inputs)[0], inner_cell.state_size]) initial_state_1 = tf.concat([inputs, positional_embeddings_for_batch], axis=2) initial_state_2 = tf.concat([inputs, positional_embeddings_for_batch], axis=2) initial_state = (initial_state_0, initial_state_1, initial_state_2) outputs, state = tf.nn.dynamic_rnn( cell=cell, inputs=tf.zeros([tf.shape(inputs)[0], tf.shape(inputs)[1] * 1, 1], tf.float32), # Todo : Make this * 1 configurable initial_state=initial_state, sequence_length=sequence_length * 1, # Todo : Make this 1 configurable dtype=tf.float32, **kwargs) return EncoderOutput( outputs=state[2], final_state=state[0], attention_values=state[2], attention_values_length=tf.ones([tf.shape(inputs)[0]], dtype=tf.int32) * tf.shape(inputs)[1])
def encode(self, inputs, sequence_length): if self.params["position_embeddings.enable"]: positions_embed = _create_position_embedding( embedding_dim=inputs.get_shape().as_list()[-1], num_positions=self.params["position_embeddings.num_positions"], lengths=sequence_length, maxlen=tf.shape(inputs)[1]) inputs = self._combiner_fn(inputs, positions_embed) # Apply dropout to embeddings inputs = tf.contrib.layers.dropout( inputs=inputs, keep_prob=self.params["embedding_dropout_keep_prob"], is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN) with tf.variable_scope("cnn_a"): cnn_a_output = inputs for layer_idx in range(self.params["attention_cnn.layers"]): next_layer = tf.contrib.layers.conv2d( inputs=cnn_a_output, num_outputs=self.params["attention_cnn.units"], kernel_size=self.params["attention_cnn.kernel_size"], padding="SAME", activation_fn=None) # Add a residual connection, except for the first layer if layer_idx > 0: next_layer += cnn_a_output cnn_a_output = tf.tanh(next_layer) with tf.variable_scope("cnn_c"): cnn_c_output = inputs for layer_idx in range(self.params["output_cnn.layers"]): next_layer = tf.contrib.layers.conv2d( inputs=cnn_c_output, num_outputs=self.params["output_cnn.units"], kernel_size=self.params["output_cnn.kernel_size"], padding="SAME", activation_fn=None) # Add a residual connection, except for the first layer if layer_idx > 0: next_layer += cnn_c_output cnn_c_output = tf.tanh(next_layer) final_state = tf.reduce_mean(cnn_c_output, 1) return EncoderOutput( outputs=cnn_a_output, final_state=final_state, attention_values=cnn_c_output, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) cell = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) return EncoderOutput(outputs=outputs, final_state=state, attention_values=outputs, attention_values_length=sequence_length)
def encode(self, features, labels): # 1. query source encoder sequence output query_embedded = tf.nn.embedding_lookup(self.source_embedding, features["source_ids"]) query_encoder_fn = self.encoder_class(self.params["encoder.params"], self.mode) query_output = query_encoder_fn(query_embedded, features["source_len"]) # return query_output # 2. candidate source encoder sequence output candidate_embedded = tf.nn.embedding_lookup( self.source_candidate_embedding, features["source_candidate_ids"]) candidate_encoder_fn = self.encoder_class( self.params["encoder.params"], self.mode) candidate_output = candidate_encoder_fn( candidate_embedded, features["source_candidate_len"]) print("query_output:{}".format(query_output)) print("candidate_output:{}".format(candidate_output)) # 3. merge two encoder generated output # outputs = tf.concat([query_output.outputs, candidate_output.outputs], 0) #final_state = tf.reshape(tf.concat([query_output.final_state, candidate_output.final_state], 0), [-1, 128]) # final_state = tf.concat([query_output.final_state, candidate_output.final_state], 0) # final_state = (tf.concat([query_output.final_state[0], candidate_output.final_state[0]], 0), # tf.concat([query_output.final_state[1], candidate_output.final_state[1]], 0)) # attention_values = tf.concat([query_output.attention_values, candidate_output.attention_values], 0) # att_v_len = tf.concat([query_output.attention_values_length, candidate_output.attention_values_length], 0) # outputs = query_output.outputs + candidate_output.outputs final_state = (query_output.final_state[0] + candidate_output.final_state[0], query_output.final_state[1] + candidate_output.final_state[1]) attention_values = query_output.attention_values + candidate_output.attention_values att_v_len = query_output.attention_values_length + candidate_output.attention_values_length # outputs = query_output.outputs # final_state = query_output.final_state # attention_values = query_output.attention_values # att_v_len = query_output.attention_values_length encoderOutput = EncoderOutput(outputs=outputs, final_state=final_state, attention_values=attention_values, attention_values_length=att_v_len) # print("encoderOutput:{}".format(encoderOutput)) return encoderOutput
def encode(self, inputs, sequence_length, **kwargs): CONTEXT_SIZE = 10 inputs_c = tf.slice(inputs, [0, 0, 0], [-1, CONTEXT_SIZE, -1]) inputs_p = tf.slice(inputs, [0, CONTEXT_SIZE, 0], [-1, -1, -1]) with tf.variable_scope("encoder_c"): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) cell_fw_c = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw_c = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs_c, states_c = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw_c, cell_bw=cell_bw_c, inputs=inputs_c, sequence_length=sequence_length, dtype=tf.float32, **kwargs) with tf.variable_scope("encoder_p"): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) cell_fw_p = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw_p = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs_p, states_p = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw_p, cell_bw=cell_bw_p, inputs=inputs_p, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat_c = tf.concat(outputs_c, 2) outputs_concat_p = tf.concat(outputs_p, 2) final_output = tf.concat([outputs_concat_c, outputs_concat_p], 1) final_states = states_c + states_p return EncoderOutput(outputs=final_output, final_state=final_states, attention_values=final_output, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) return EncoderOutput( outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cells_fw = _unpack_cell(cell_fw) cells_bw = _unpack_cell(cell_bw) result = rnn.stack_bidirectional_dynamic_rnn( cells_fw=cells_fw, cells_bw=cells_bw, inputs=inputs, dtype=tf.float32, sequence_length=sequence_length, **kwargs) outputs_concat, _output_state_fw, _output_state_bw = result final_state = (_output_state_fw, _output_state_bw) return EncoderOutput( outputs=outputs_concat, final_state=final_state, attention_values=outputs_concat, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length): if self.params["position_embeddings.enable"]: positions_embed = _create_position_embedding( embedding_dim=inputs.get_shape().as_list()[-1], num_positions=self.params["position_embeddings.num_positions"], lengths=sequence_length, maxlen=tf.shape(inputs)[1]) inputs = self._combiner_fn(inputs, positions_embed) outputs = self._pooling_fn(inputs=inputs, pool_size=self.params["pool_size"], strides=self.params["strides"], padding="SAME") # Final state is the average representation of the pooled embeddings final_state = tf.reduce_mean(outputs, 1) return EncoderOutput(outputs=outputs, final_state=final_state, attention_values=inputs, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) query_inputs, retrieved_inputs = tf.split(inputs, 2) query_cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) query_cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) query_outputs, query_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=query_cell_fw, cell_bw=query_cell_bw, inputs=query_inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) retrieved_cell_fw = training_utils.get_rnn_cell( **self.params["rnn_cell"]) retrieved_cell_bw = training_utils.get_rnn_cell( **self.params["rnn_cell"]) retrieved_outputs, retrieved_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=retrieved_cell_fw, cell_bw=retrieved_cell_bw, inputs=retrieved_inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) outputs = tf.concat([query_outputs, retrieved_outputs], 0) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) states = tf.concat([query_states, retrieved_states], 0) return EncoderOutput(outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length)
def setUp(self): super(BridgeTest, self).setUp() self.batch_size = 4 self.encoder_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(4), tf.contrib.rnn.GRUCell(8)]) self.decoder_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.LSTMCell(16), tf.contrib.rnn.GRUCell(8)]) final_encoder_state = nest.map_structure( lambda x: tf.convert_to_tensor( value=np.random.randn(self.batch_size, x), dtype=tf.float32), self.encoder_cell.state_size) self.encoder_outputs = EncoderOutput( outputs=tf.convert_to_tensor(value=np.random.randn( self.batch_size, 10, 16), dtype=tf.float32), attention_values=tf.convert_to_tensor(value=np.random.randn( self.batch_size, 10, 16), dtype=tf.float32), attention_values_length=np.full([self.batch_size], 10), final_state=final_encoder_state)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) self.params["rnn_cell"]["distributed"] = False self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName( 0) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName( self.params["rnn_cell"]["num_layers"]) if self.params["rnn_cell"][ "device_name"] == training_utils.getDeviceName(0): self.params["rnn_cell"][ "device_name"] = training_utils.getDeviceName( 1 ) # to ensure the backward cell is working on aniother GPU cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cells_fw = _unpack_cell(cell_fw) cells_bw = _unpack_cell(cell_bw) result = rnn.stack_bidirectional_dynamic_rnn( cells_fw=cells_fw, cells_bw=cells_bw, inputs=inputs, dtype=tf.float32, sequence_length=sequence_length, **kwargs) outputs_concat, _output_state_fw, _output_state_bw = result final_state = (_output_state_fw, _output_state_bw) return EncoderOutput(outputs=outputs_concat, final_state=final_state, attention_values=outputs_concat, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, source_images, **kwargs): scope = tf.get_variable_scope() scope.set_initializer(tf.contrib.layers.xavier_initializer()) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) with slim.arg_scope(resnet_utils.resnet_arg_scope()): moving_average_decay = self.params["resnet"][ "moving_average_decay"] logits, end_points = _resnet_v1( source_images, states, num_classes=None, global_pool=True, spatial_squeeze=False, is_training=self.is_training, moving_average_decay=moving_average_decay) logits = tf.reshape(logits, [-1, 2048]) return EncoderOutput(outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length, image_features=logits)
def encode(self, inputs, sequence_length, **kwargs): num_layers_2 = self.params['rnn_cell_uni']['num_layers'] scope = tf.get_variable_scope() scope.set_initializer( tf.random_uniform_initializer(-self.params["init_scale"], self.params["init_scale"])) bi_encoder_output = self.bi_encoder.encode(inputs, sequence_length, **kwargs) uni_cell = training_utils.get_rnn_cell(**self.params["rnn_cell_uni"]) encoder_outputs, encoder_state = tf.nn.dynamic_rnn( uni_cell, bi_encoder_output.outputs, dtype=tf.float32, sequence_length=sequence_length) #time_major=self.time_major TODO needs to be checked encoder_state = (bi_encoder_output.final_state[1], ) + ( (encoder_state, ) if num_layers_2 == 1 else encoder_state) return EncoderOutput(outputs=encoder_outputs, final_state=encoder_state, attention_values=encoder_outputs, attention_values_length=sequence_length)