def get_word_chars(table, char_embedding, word_chars, char_lengths, word_size): word_chars = tf.reshape(word_chars, [-1, word_size, WORD_CHAR_SIZE]) char_ids = table.lookup(word_chars) x = char_embedding(char_ids) mask = tf.sequence_mask(char_lengths, WORD_CHAR_SIZE, dtype=tf.float32) mask = tf.expand_dims(mask, 3) # [batch, seq_len, char_dim, 1] x = x * mask x = tf.reshape(x, [-1, WORD_CHAR_SIZE, CHAR_DIM]) # [batch * word_size, word_char_size, char_dim] if self.args.word_char_type == 'cnn': filters = 16 output = shallow_and_wide_cnn(x, filters, [1,2,3]) last_states = output else: length = tf.reshape(char_lengths, [-1]) outputs, last_states = stack_bidirectional_dynamic_rnn(x, [CHAR_DIM], length, dropout_keep_prob=dropout_keep_prob, cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent, base_cell=base_cell, is_training=is_training) return tf.reshape(last_states, [-1, word_size, CHAR_DIM*2]) # [batch, word_size, char_dim*2]
def build_graph(self, data_paths, batch_size, graph_mod): """Builds generic graph for training or eval.""" tensors = GraphReferences() is_training = graph_mod == GraphMod.TRAIN tf.keras.backend.set_learning_phase(1 if is_training else 0) if data_paths: tensors.keys, tensors.examples = util.read_examples( data_paths, batch_size, shuffle=is_training, num_epochs=None if is_training else 2) else: tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,)) if graph_mod == GraphMod.PREDICT: inception_input, inception_embeddings = self.build_inception_graph() image_embeddings = inception_embeddings title_embeddings = tf.placeholder(tf.float32, shape=[None, TITLE_EMBEDDING_SIZE]) title_words_count = tf.placeholder(tf.int64, shape=[None]) content_embeddings = tf.placeholder(tf.float32, shape=[None, CONTENT_EMBEDDING_SIZE]) content_words_count = tf.placeholder(tf.int64, shape=[None]) title_word_chars = tf.placeholder(tf.string, shape=[None, TITLE_WORD_CHARS_SIZE]) content_word_chars = tf.placeholder(tf.string, shape=[None, CONTENT_WORD_CHARS_SIZE]) title_word_char_lengths = tf.placeholder(tf.int64, shape=[None, TITLE_WORD_SIZE]) content_word_char_lengths = tf.placeholder(tf.int64, shape=[None, CONTENT_WORD_SIZE]) category_ids = tf.placeholder(tf.int64, shape=[None]) price = tf.placeholder(tf.int64, shape=[None]) images_count = tf.placeholder(tf.int64, shape=[None]) recent_articles_count = tf.placeholder(tf.int64, shape=[None]) title_length = tf.placeholder(tf.int64, shape=[None]) content_length = tf.placeholder(tf.int64, shape=[None]) blocks_inline = tf.placeholder(tf.string, shape=[None]) username_chars = tf.placeholder(tf.string, shape=[None, USERNAME_CHAR_SIZE]) username_length = tf.placeholder(tf.int64, shape=[None]) created_at_ts = tf.placeholder(tf.int64, shape=[None]) offerable = tf.placeholder(tf.int64, shape=[None]) tensors.input_image = inception_input tensors.input_title = title_embeddings tensors.input_title_words_count = title_words_count tensors.input_content = content_embeddings tensors.input_content_words_count = content_words_count tensors.input_category_id = category_ids tensors.input_price = price tensors.input_images_count = images_count tensors.input_recent_articles_count = recent_articles_count tensors.input_title_length = title_length tensors.input_content_length = content_length tensors.input_blocks_inline = blocks_inline tensors.input_username_chars = username_chars tensors.input_username_length = username_length tensors.input_created_at_ts = created_at_ts tensors.input_offerable = offerable tensors.input_title_word_chars = title_word_chars tensors.input_content_word_chars = content_word_chars tensors.input_title_word_char_lengths = title_word_char_lengths tensors.input_content_word_char_lengths = content_word_char_lengths username_chars = tf.reshape(username_chars, [-1, USERNAME_CHAR_SIZE]) else: # For training and evaluation we assume data is preprocessed, so the # inputs are tf-examples. # Generate placeholders for examples. with tf.name_scope('inputs'): feature_map = { 'id': tf.FixedLenFeature( shape=[], dtype=tf.string, default_value=['']), # Some images may have no labels. For those, we assume a default # label. So the number of labels is label_count+1 for the default # label. 'label': tf.FixedLenFeature( shape=[1], dtype=tf.int64, default_value=[self.label_count]), 'embedding': tf.FixedLenFeature( shape=[BOTTLENECK_TENSOR_SIZE], dtype=tf.float32), 'title_embedding': tf.FixedLenFeature( shape=[TITLE_EMBEDDING_SIZE], dtype=tf.float32), 'title_words_count': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'content_embedding': tf.FixedLenFeature( shape=[CONTENT_EMBEDDING_SIZE], dtype=tf.float32), 'content_words_count': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'category_id': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'price': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'images_count': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'recent_articles_count': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'title_length': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'content_length': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'blocks_inline': tf.FixedLenFeature(shape=[], dtype=tf.string), 'username_chars': tf.FixedLenFeature(shape=[USERNAME_CHAR_SIZE], dtype=tf.string), 'username_length': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'created_at_ts': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'offerable': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'title_word_chars': tf.FixedLenFeature(shape=[TITLE_WORD_CHARS_SIZE], dtype=tf.string), 'content_word_chars': tf.FixedLenFeature(shape=[CONTENT_WORD_CHARS_SIZE], dtype=tf.string), 'title_word_char_lengths': tf.FixedLenFeature(shape=[TITLE_WORD_SIZE], dtype=tf.int64), 'content_word_char_lengths': tf.FixedLenFeature(shape=[CONTENT_WORD_SIZE], dtype=tf.int64), } parsed = tf.parse_example(tensors.examples, features=feature_map) labels = tf.squeeze(parsed['label']) tensors.labels = labels tensors.ids = tf.squeeze(parsed['id']) image_embeddings = parsed['embedding'] title_embeddings = parsed['title_embedding'] title_words_count = parsed['title_words_count'] content_embeddings = parsed['content_embedding'] content_words_count = parsed['content_words_count'] category_ids = parsed['category_id'] price = parsed['price'] images_count = parsed['images_count'] recent_articles_count = parsed['recent_articles_count'] title_length = parsed['title_length'] content_length = parsed['content_length'] blocks_inline = parsed['blocks_inline'] username_chars = parsed['username_chars'] username_length = parsed['username_length'] created_at_ts = parsed['created_at_ts'] offerable = parsed['offerable'] title_word_chars = parsed['title_word_chars'] content_word_chars = parsed['content_word_chars'] title_word_char_lengths = parsed['title_word_char_lengths'] content_word_char_lengths = parsed['content_word_char_lengths'] dropout_keep_prob = self.dropout if is_training else None if self.rnn_type == 'LSTM': if tf.test.gpu_device_name(): base_cell = tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell else: base_cell = tf.contrib.rnn.BasicLSTMCell else: if tf.test.gpu_device_name(): base_cell = tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell else: base_cell = tf.contrib.rnn.GRUCell def dropout(x, keep_prob): if keep_prob: return tf.nn.dropout(x, keep_prob) return x if self.args.l2_reg_scale > 0.: regularizer = tf.contrib.layers.l2_regularizer(self.args.l2_reg_scale) else: regularizer = None def dense(x, units): for unit in units: if self.activation == 'maxout': x = layers.fully_connected(x, unit, activation_fn=None, weights_regularizer=regularizer) x = tf.contrib.layers.maxout(x, unit) x = tf.reshape(x, [-1, unit]) elif self.activation == 'none': x = layers.fully_connected(x, unit, weights_regularizer=regularizer, normalizer_fn=tf.contrib.layers.batch_norm, normalizer_params={'is_training': is_training}) else: x = layers.fully_connected(x, unit, weights_regularizer=regularizer) x = dropout(x, dropout_keep_prob) return x def shallow_and_wide_cnn(inputs, filters, kernel_sizes): outputs = [] for kernel_size in kernel_sizes: conv = tf.layers.conv1d(inputs, filters, kernel_size, padding="same", kernel_regularizer=regularizer) conv = tf.layers.batch_normalization(conv, training=is_training) conv = tf.nn.relu(conv) conv = GlobalMaxPooling1D()(conv) outputs.append(conv) output = tf.concat(outputs, 1) return dropout(output, dropout_keep_prob) def get_word_chars(table, char_embedding, word_chars, char_lengths, word_size): word_chars = tf.reshape(word_chars, [-1, word_size, WORD_CHAR_SIZE]) char_ids = table.lookup(word_chars) x = char_embedding(char_ids) mask = tf.sequence_mask(char_lengths, WORD_CHAR_SIZE, dtype=tf.float32) mask = tf.expand_dims(mask, 3) # [batch, seq_len, char_dim, 1] x = x * mask x = tf.reshape(x, [-1, WORD_CHAR_SIZE, CHAR_DIM]) # [batch * word_size, word_char_size, char_dim] if self.args.word_char_type == 'cnn': filters = 16 output = shallow_and_wide_cnn(x, filters, [1,2,3]) last_states = output else: length = tf.reshape(char_lengths, [-1]) outputs, last_states = stack_bidirectional_dynamic_rnn(x, [CHAR_DIM], length, dropout_keep_prob=dropout_keep_prob, cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent, base_cell=base_cell, is_training=is_training) return tf.reshape(last_states, [-1, word_size, CHAR_DIM*2]) # [batch, word_size, char_dim*2] if self.args.word_char_type != 'none': with tf.variable_scope("word_chars", reuse=tf.AUTO_REUSE): table = tf.contrib.lookup.index_table_from_tensor( mapping=tf.constant(self.text_chars), default_value=len(self.text_chars)) char_dict_size = len(self.text_chars) + 1 # add unknown char char_embedding = Embedding(char_dict_size, CHAR_DIM) title_word_chars = get_word_chars(table, char_embedding, title_word_chars, title_word_char_lengths, TITLE_WORD_SIZE) content_word_chars = get_word_chars(table, char_embedding, content_word_chars, content_word_char_lengths, CONTENT_WORD_SIZE) with tf.variable_scope("username"): table = tf.contrib.lookup.index_table_from_tensor( mapping=tf.constant(self.username_chars), default_value=len(self.username_chars)) char_ids = table.lookup(username_chars) char_dict_size = len(self.username_chars) + 1 # add unknown char x = Embedding(char_dict_size, CHAR_DIM)(char_ids) mask = tf.sequence_mask(username_length, USERNAME_CHAR_SIZE, dtype=tf.float32) x = x * tf.expand_dims(mask, 2) if self.username_type == 'dense': username = tf.reshape(x, [-1, USERNAME_CHAR_SIZE * CHAR_DIM]) username = dense(username, [30, 30]) elif self.username_type == 'cnn': def conv_username(x, filters): k3 = tf.layers.conv1d(x, filters, 3) k3 = tf.nn.relu(k3) k3 = tf.layers.max_pooling1d(k3, 3, 3) k3 = tf.layers.conv1d(k3, filters, 3) k3 = tf.nn.relu(k3) k2 = tf.layers.conv1d(x, filters, 2) k2 = tf.nn.relu(k2) k2 = tf.layers.max_pooling1d(k2, 2, 2) k2 = tf.layers.conv1d(k2, filters, 2, strides=2) k2 = tf.nn.relu(k2) k2 = tf.layers.max_pooling1d(k2, 2, 2) k1 = tf.layers.conv1d(x, filters, 1) k1 = tf.nn.relu(k1) k1 = tf.layers.max_pooling1d(k1, 3, 3) k1 = tf.layers.conv1d(k1, filters, 2, strides=2) k1 = tf.nn.relu(k1) k1 = tf.layers.max_pooling1d(k1, 2, 2) x = tf.concat([k1, k2, k3], 2) x = tf.reshape(x, [-1, filters * 3]) return tf.layers.batch_normalization(x, training=is_training) filters = 10 #username = shallow_and_wide_cnn(x, filters, [1,2,3]) username = conv_username(x, filters) elif self.username_type == 'rnn': outputs, last_states = stack_bidirectional_dynamic_rnn(x, [CHAR_DIM], username_length, dropout_keep_prob=dropout_keep_prob, cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent, base_cell=base_cell, is_training=is_training) username = last_states elif self.username_type == 'none': username = None else: raise Exception('Invaild username_type: %s' % self.username_type) with tf.variable_scope("user"): recent_articles_count = tf.minimum(recent_articles_count, 300) recent_articles_count = tf.expand_dims(recent_articles_count, 1) recent_articles_count = tf.to_int32(recent_articles_count) blocks = blocks_inline_to_matrix(blocks_inline) blocks = tf.minimum(blocks, 50) user = tf.concat([recent_articles_count#, blocks ], 1) user = tf.cast(user, tf.float32) user = tf.layers.batch_normalization(user, training=is_training) user = dropout(user, dropout_keep_prob) with tf.variable_scope("category"): category_ids = tf.minimum(category_ids - 1, TOTAL_CATEGORIES_COUNT - 1) category = Embedding(TOTAL_CATEGORIES_COUNT, 10)(category_ids) category = dropout(category, dropout_keep_prob) with tf.variable_scope("continuous"): price = tf.minimum(price, 1000000000) title_length = tf.minimum(title_length, 100) content_length = tf.minimum(content_length, 3000) created_time = tf.mod(created_at_ts, DAY_TIME) day = tf.mod(created_at_ts / DAY_TIME, 7) continuous = tf.stack([price, images_count, title_length, content_length#, offerable, created_time, day ], 1) continuous = tf.cast(continuous, tf.float32) continuous = tf.concat([continuous, tf.square(continuous)], 1) continuous = tf.layers.batch_normalization(continuous, training=is_training) continuous = dropout(continuous, dropout_keep_prob) with tf.variable_scope("image"): image_embeddings = dense(image_embeddings, [256]) with tf.variable_scope('bunch'): bunch = tf.concat([image_embeddings, category, continuous, user], 1) if self.username_type != 'none': bunch = tf.concat([bunch, username], 1) if self.args.word_char_type != 'none': word_dim = CHAR_WORD_DIM else: word_dim = WORD_DIM with tf.variable_scope('title'): initial_state = dense(bunch, [word_dim*2]) layer_sizes = [word_dim * (2**i) for i in range(max(1, self.rnn_layers_count-1))] title_embeddings = tf.reshape(title_embeddings, [-1, TITLE_WORD_SIZE, WORD_DIM]) if self.args.word_char_type != 'none': title_embeddings = tf.concat([title_embeddings, title_word_chars], -1) title_outputs, title_last_states = stack_bidirectional_dynamic_rnn(title_embeddings, layer_sizes, title_words_count, initial_state=initial_state, cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent, base_cell=base_cell, dropout_keep_prob=dropout_keep_prob, is_training=is_training) with tf.variable_scope('content'): bunch = tf.concat([bunch, title_last_states], 1) initial_state = dense(bunch, [192, word_dim*2]) layer_sizes = [word_dim * (2**i) for i in range(self.rnn_layers_count)] content_embeddings = tf.reshape(content_embeddings, [-1, CONTENT_WORD_SIZE, WORD_DIM]) if self.args.word_char_type != 'none': content_embeddings = tf.concat([content_embeddings, content_word_chars], -1) content_outputs, content_last_states = stack_bidirectional_dynamic_rnn(content_embeddings, layer_sizes, content_words_count, initial_state=initial_state, cell_wrapper=self.rnn_cell_wrapper, variational_recurrent=self.variational_recurrent, base_cell=base_cell, dropout_keep_prob=dropout_keep_prob, is_training=is_training) with tf.variable_scope('final_ops'): hidden = tf.concat([bunch, content_last_states], 1) if self.final_layers_count > 0: hidden = dense(hidden, [192] + [64] * (self.final_layers_count-1)) softmax, logits = self.add_final_training_ops(hidden, self.label_count) # Prediction is the index of the label with the highest score. We are # interested only in the top score. prediction = tf.argmax(logits, 1) tensors.predictions = [prediction, softmax] if graph_mod == GraphMod.PREDICT: return tensors def is_l2_var_name(name): for token in ['bias', 'table', 'BatchNorm']: if token in name: return False return True with tf.name_scope('evaluate'): loss_value = loss(logits, labels) #l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if is_l2_var_name(v.name) ]) #loss_value += l2_loss * 0.001 # Add to the Graph the Ops that calculate and apply gradients. if is_training: tensors.train, tensors.global_step = training(loss_value) else: tensors.global_step = tf.Variable(0, name='global_step', trainable=False) # Add means across all batches. loss_updates, loss_op = util.loss(loss_value) accuracy_updates, accuracy_op = util.accuracy(logits, labels) all_precision_op, all_precision_update = tf.metrics.precision(labels, prediction) all_recall_op, all_recall_update = tf.metrics.recall(labels, prediction) precision = {'ops': [], 'updates': []} recall = {'ops': [], 'updates': []} with tf.name_scope('metrics'): for i in range(self.label_count): op, update = tf.metrics.recall_at_k(labels, logits, 1, class_id=i) recall['ops'].append(op) recall['updates'].append(update) op, update = tf.metrics.precision_at_k(labels, logits, 1, class_id=i) precision['ops'].append(op) precision['updates'].append(update) if not is_training: tf.summary.scalar('accuracy', accuracy_op, family='general') tf.summary.scalar('loss', loss_op, family='general') tf.summary.scalar('precision', all_precision_op, family='general') tf.summary.scalar('recall', all_recall_op, family='general') for i in range(self.label_count): label_name = self.labels[i] tf.summary.scalar('%s' % label_name, recall['ops'][i], family='recall') tf.summary.scalar('%s' % label_name, precision['ops'][i], family='precision') tensors.metric_updates = loss_updates + accuracy_updates + \ [all_precision_update, all_recall_update] + \ recall['updates'] + precision['updates'] tensors.metric_values = [loss_op, accuracy_op, all_precision_op, all_recall_op] return tensors
def multi_encoder(encoder_inputs, encoders, encoder_input_length, other_inputs=None, **kwargs): """ Build multiple encoders according to the configuration in `encoders`, reading from `encoder_inputs`. The result is a list of the outputs produced by those encoders (for each time-step), and their final state. :param encoder_inputs: list of tensors of shape (batch_size, input_length), one tensor for each encoder. :param encoders: list of encoder configurations :param encoder_input_length: list of tensors of shape (batch_size,) (one tensor for each encoder) :return: encoder outputs: a list of tensors of shape (batch_size, input_length, encoder_cell_size), hidden states of the encoders. encoder state: concatenation of the final states of all encoders, tensor of shape (batch_size, sum_of_state_sizes) new_encoder_input_length: list of tensors of shape (batch_size,) with the true length of the encoder outputs. May be different than `encoder_input_length` because of maxout strides, and time pooling. """ encoder_states = [] encoder_outputs = [] # create embeddings in the global scope (allows sharing between encoder and decoder) embedding_variables = [] for encoder in encoders: if encoder.binary: embedding_variables.append(None) continue # inputs are token ids, which need to be mapped to vectors (embeddings) embedding_shape = [encoder.vocab_size, encoder.embedding_size] if encoder.embedding_initializer == 'sqrt3': initializer = tf.random_uniform_initializer(-math.sqrt(3), math.sqrt(3)) else: initializer = None device = '/cpu:0' if encoder.embeddings_on_cpu else None with tf.device(device): # embeddings can take a very large amount of memory, so # storing them in GPU memory can be impractical embedding = get_variable('embedding_{}'.format(encoder.name), shape=embedding_shape, initializer=initializer) embedding_variables.append(embedding) new_encoder_input_length = [] for i, encoder in enumerate(encoders): if encoder.use_lstm is False: encoder.cell_type = 'GRU' with tf.variable_scope('encoder_{}'.format(encoder.name)): encoder_inputs_ = encoder_inputs[i] encoder_input_length_ = encoder_input_length[i] def get_cell(input_size=None, reuse=False): if encoder.cell_type.lower() == 'lstm': cell = CellWrapper(BasicLSTMCell(encoder.cell_size, reuse=reuse)) elif encoder.cell_type.lower() == 'dropoutgru': cell = DropoutGRUCell(encoder.cell_size, reuse=reuse, layer_norm=encoder.layer_norm, input_size=input_size, input_keep_prob=encoder.rnn_input_keep_prob, state_keep_prob=encoder.rnn_state_keep_prob) else: cell = GRUCell(encoder.cell_size, reuse=reuse, layer_norm=encoder.layer_norm) if encoder.use_dropout and encoder.cell_type.lower() != 'dropoutgru': cell = DropoutWrapper(cell, input_keep_prob=encoder.rnn_input_keep_prob, output_keep_prob=encoder.rnn_output_keep_prob, state_keep_prob=encoder.rnn_state_keep_prob, variational_recurrent=encoder.pervasive_dropout, dtype=tf.float32, input_size=input_size) return cell embedding = embedding_variables[i] batch_size = tf.shape(encoder_inputs_)[0] time_steps = tf.shape(encoder_inputs_)[1] if embedding is not None: flat_inputs = tf.reshape(encoder_inputs_, [tf.multiply(batch_size, time_steps)]) flat_inputs = tf.nn.embedding_lookup(embedding, flat_inputs) encoder_inputs_ = tf.reshape(flat_inputs, tf.stack([batch_size, time_steps, flat_inputs.get_shape()[1].value])) if other_inputs is not None: encoder_inputs_ = tf.concat([encoder_inputs_, other_inputs], axis=2) if encoder.use_dropout: noise_shape = [1, time_steps, 1] if encoder.pervasive_dropout else [batch_size, time_steps, 1] encoder_inputs_ = tf.nn.dropout(encoder_inputs_, keep_prob=encoder.word_keep_prob, noise_shape=noise_shape) size = tf.shape(encoder_inputs_)[2] noise_shape = [1, 1, size] if encoder.pervasive_dropout else [batch_size, time_steps, size] encoder_inputs_ = tf.nn.dropout(encoder_inputs_, keep_prob=encoder.embedding_keep_prob, noise_shape=noise_shape) if encoder.input_layers: for j, layer_size in enumerate(encoder.input_layers): if encoder.input_layer_activation is not None and encoder.input_layer_activation.lower() == 'relu': activation = tf.nn.relu else: activation = tf.tanh encoder_inputs_ = dense(encoder_inputs_, layer_size, activation=activation, use_bias=True, name='layer_{}'.format(j)) if encoder.use_dropout: encoder_inputs_ = tf.nn.dropout(encoder_inputs_, keep_prob=encoder.input_layer_keep_prob) # Contrary to Theano's RNN implementation, states after the sequence length are zero # (while Theano repeats last state) inter_layer_keep_prob = None if not encoder.use_dropout else encoder.inter_layer_keep_prob parameters = dict( inputs=encoder_inputs_, sequence_length=encoder_input_length_, dtype=tf.float32, parallel_iterations=encoder.parallel_iterations ) input_size = encoder_inputs_.get_shape()[2].value state_size = (encoder.cell_size * 2 if encoder.cell_type.lower() == 'lstm' else encoder.cell_size) def get_initial_state(name='initial_state'): if encoder.train_initial_states: initial_state = get_variable(name, initializer=tf.zeros(state_size)) return tf.tile(tf.expand_dims(initial_state, axis=0), [batch_size, 1]) else: return None if encoder.bidir: rnn = lambda reuse: stack_bidirectional_dynamic_rnn( cells_fw=[get_cell(input_size if j == 0 else 2 * encoder.cell_size, reuse=reuse) for j in range(encoder.layers)], cells_bw=[get_cell(input_size if j == 0 else 2 * encoder.cell_size, reuse=reuse) for j in range(encoder.layers)], initial_states_fw=[get_initial_state('initial_state_fw')] * encoder.layers, initial_states_bw=[get_initial_state('initial_state_bw')] * encoder.layers, time_pooling=encoder.time_pooling, pooling_avg=encoder.pooling_avg, **parameters) initializer = CellInitializer(encoder.cell_size) if encoder.orthogonal_init else None with tf.variable_scope(tf.get_variable_scope(), initializer=initializer): try: encoder_outputs_, _, encoder_states_ = rnn(reuse=False) except ValueError: # Multi-task scenario where we're reusing the same RNN parameters encoder_outputs_, _, encoder_states_ = rnn(reuse=True) else: if encoder.time_pooling or encoder.final_state == 'concat_last': raise NotImplementedError if encoder.layers > 1: cell = MultiRNNCell([get_cell(input_size if j == 0 else encoder.cell_size) for j in range(encoder.layers)]) initial_state = (get_initial_state(),) * encoder.layers else: cell = get_cell(input_size) initial_state = get_initial_state() encoder_outputs_, encoder_states_ = auto_reuse(tf.nn.dynamic_rnn)(cell=cell, initial_state=initial_state, **parameters) last_backward = encoder_outputs_[:, 0, encoder.cell_size:] indices = tf.stack([tf.range(batch_size), encoder_input_length_ - 1], axis=1) last_forward = tf.gather_nd(encoder_outputs_[:, :, :encoder.cell_size], indices) last_forward.set_shape([None, encoder.cell_size]) if encoder.final_state == 'concat_last': # concats last states of all backward layers (full LSTM states) encoder_state_ = tf.concat(encoder_states_, axis=1) elif encoder.final_state == 'average': mask = tf.sequence_mask(encoder_input_length_, maxlen=tf.shape(encoder_outputs_)[1], dtype=tf.float32) mask = tf.expand_dims(mask, axis=2) encoder_state_ = tf.reduce_sum(mask * encoder_outputs_, axis=1) / tf.reduce_sum(mask, axis=1) elif encoder.final_state == 'average_inputs': mask = tf.sequence_mask(encoder_input_length_, maxlen=tf.shape(encoder_inputs_)[1], dtype=tf.float32) mask = tf.expand_dims(mask, axis=2) encoder_state_ = tf.reduce_sum(mask * encoder_inputs_, axis=1) / tf.reduce_sum(mask, axis=1) elif encoder.bidir and encoder.final_state == 'last_both': encoder_state_ = tf.concat([last_forward, last_backward], axis=1) elif encoder.bidir and not encoder.final_state == 'last_forward': # last backward hidden state encoder_state_ = last_backward else: # last forward hidden state encoder_state_ = last_forward if encoder.bidir and encoder.bidir_projection: encoder_outputs_ = dense(encoder_outputs_, encoder.cell_size, use_bias=False, name='bidir_projection') encoder_outputs.append(encoder_outputs_) encoder_states.append(encoder_state_) new_encoder_input_length.append(encoder_input_length_) encoder_state = tf.concat(encoder_states, 1) return encoder_outputs, encoder_state, new_encoder_input_length