def biGRU(input, input_length, params, dropout=None, layers=None): dropout = dropout or params.dropout cell_fw = MultiRNNCell([ DropoutWrapper( GRUCell(params.units), # output_keep_prob=1.0 - dropout, input_keep_prob=1.0 - dropout, state_keep_prob=1.0 - dropout, variational_recurrent=True, dtype=tf.float32, input_size=input.get_shape()[-1] if layer == 0 else tf.TensorShape(params.units)) for layer in range(layers or params.layers) ]) cell_bw = MultiRNNCell([ DropoutWrapper( GRUCell(params.units), # output_keep_prob=1.0 - dropout, input_keep_prob=1.0 - dropout, state_keep_prob=1.0 - dropout, variational_recurrent=True, dtype=tf.float32, input_size=input.get_shape()[-1] if layer == 0 else tf.TensorShape(params.units)) for layer in range(layers or params.layers) ]) output, states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, input, sequence_length=input_length, dtype=tf.float32) output = tf.concat(output, -1) return output, states
def BiRNN(sequence, num_hidden, sequence_w_len = None, reuse = None, keep_prob = 0.8, scope = None): cell_fw = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse), output_keep_prob=keep_prob, dtype=tf.float32) cell_bw = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse), output_keep_prob=keep_prob, dtype=tf.float32) outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, sequence, sequence_length=sequence_w_len, dtype=tf.float32, scope = scope) return tf.concat(outputs, 2)
def __init__(self, num_symbols, num_embed_units, num_units, num_labels, embed, learning_rate=0.001, max_gradient_norm=5.0): self.texts = tf.placeholder(tf.int32, [None, None]) # shape: sentence*max_word self.text_length = tf.placeholder(tf.int32, [None]) # shape: sentence self.labels = tf.placeholder(tf.int32, [None]) # shape: sentence self.keep_prob = tf.placeholder(tf.float32) self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) # build the embedding table (index to vector) self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_inputs = tf.nn.embedding_lookup(self.embed, self.texts) # shape: sentence*max_word*num_embed_units fw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) bw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) middle_outputs, middle_states = bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_inputs, self.text_length, dtype=tf.float32, scope="word_rnn") middle_outputs = tf.concat(middle_outputs, 2) # shape: sentence*max_word*(2*num_units) middle_inputs = tf.expand_dims(tf.reduce_max(middle_outputs, axis=1), 0) # shape: 1*sentence*(2*num_units) top_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) outputs, states = dynamic_rnn(top_cell, middle_inputs, dtype=tf.float32, scope="sentence_rnn") self.outputs = outputs[0] # shape: sentence*num_units logits = tf.layers.dense(self.outputs, num_labels) self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) self.predict_labels = tf.argmax(logits, 1, 'predict_labels', output_type=tf.int32) self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, self.predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=3, pad_step_number=True)
def __init__(self, num_tokens, embeddings, embeddings_size, train_embeddings, dropout_input, rnn_hidden_size, id2token, token2id, id2label, label2id, mode='teacher', vocab_proj_dim=None): super(eVSNLI_net, self).__init__() self.mode = mode assert mode == 'teacher' or 'forloop' self.num_tokens = num_tokens self.lstm_cell = DropoutWrapper( tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input) if embeddings is not None: self.embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings) print("Loaded GloVe embeddings!") else: self.embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings) #vocab_proj_dim for vocab projection #self.decoder = RNN_Decoder(embeddings_size, rnn_hidden_size, num_tokens, vocab_proj_dim) self.decoder = RNN_Decoder(self.embedding_matrix, rnn_hidden_size, num_tokens, vocab_proj_dim) keys = list(token2id.keys()) values = [token2id[k] for k in keys] self.token2id_table = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values, key_dtype=tf.string, value_dtype=tf.int64), -1) mapping_token = tf.constant(list(id2token.values()), dtype=tf.string) self.id2token_table = tf.contrib.lookup.index_to_string_table_from_tensor( mapping_token, default_value="#unk#", name=None) mapping_label = tf.constant(list(id2label.values()), dtype=tf.string) self.id2label_table = tf.contrib.lookup.index_to_string_table_from_tensor( mapping_label, default_value="#unk#", name=None)
def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0, state_keep_prob=1.0, variational_recurrent=False, input_size=None, dtype=None, seed=None, dropout_state_filter_visitor=None, is_train=True): DropoutWrapper.__init__(self, cell, input_keep_prob, output_keep_prob, state_keep_prob, variational_recurrent, input_size, dtype, seed, dropout_state_filter_visitor) self.is_train = is_train
def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None): """Create single RNN cell based on cell_fn.""" cell = cell_fn(num_units) if is_dropout: cell = DropoutWrapper(cell, input_keep_prob=keep_prob) if is_residual: cell = ResidualWrapper(cell) return cell
def dropout(): """为每一个rnn核后面加一个dropout层""" if self.config.rnn == 'lstm': cell = lstm_cell() else: cell = gru_cell() return DropoutWrapper(cell, output_keep_prob=self.config.dropout_keep_prob)
def build_simple_ic_model(sentence_input, img_features_input, dropout_input, num_tokens, num_labels, embeddings, embeddings_size, train_embeddings, rnn_hidden_size, multimodal_fusion_hidden_size, classification_hidden_size): sentence_length = tf.cast( tf.reduce_sum( tf.cast( tf.not_equal(sentence_input, tf.zeros_like(sentence_input, dtype=tf.int32)), tf.int64), 1), tf.int32) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings) sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix, sentence_input) lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input) sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=sentence_embeddings, sequence_length=sentence_length, dtype=tf.float32) normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1) gated_sentence_hidden_layer = tf.nn.dropout(gated_tanh( sentence_final_states.h, multimodal_fusion_hidden_size), keep_prob=dropout_input) gated_img_hidden_layer = tf.nn.dropout(gated_tanh( normalized_img_features, multimodal_fusion_hidden_size), keep_prob=dropout_input) sentence_img_multimodal_fusion = tf.multiply(gated_sentence_hidden_layer, gated_img_hidden_layer) gated_first_layer = tf.nn.dropout(gated_tanh( sentence_img_multimodal_fusion, classification_hidden_size), keep_prob=dropout_input) gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer, classification_hidden_size), keep_prob=dropout_input) gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer, classification_hidden_size), keep_prob=dropout_input) return tf.contrib.layers.fully_connected(gated_third_layer, num_labels, activation_fn=None)
def RNN(sequence, num_hidden, sequence_w_len = None, reuse = None, keep_prob = 0.8, scope = None): cell = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse), output_keep_prob=keep_prob, dtype=tf.float32) outputs, _ = tf.nn.dynamic_rnn(cell, sequence, sequence_length=sequence_w_len, dtype=tf.float32, scope = scope) return outputs
def highway_lstm_cell(size): _cell = HighwayLSTMCell(size, highway=True, initializer=numpy_orthogonal_initializer, use_layer_norm=config.layer_norm) return DropoutWrapper(_cell, variational_recurrent=True, dtype=tf.float32, state_keep_prob=keep_prob, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob)
def cell(_size, name=None): _cell = LSTMCell(config.state_size, name=name, initializer=orthogonal_initializer(4), forget_bias=config.forget_bias) return DropoutWrapper(_cell, variational_recurrent=True, dtype=tf.float32, input_size=_size, output_keep_prob=output_keep_prob, state_keep_prob=keep_prob, input_keep_prob=input_keep_prob)
def create_cell(): if self.dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) hidden = DropoutWrapper( hidden, input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) return hidden
def build_simple_te_model_h(premise_input, hypothesis_input, dropout_input, num_tokens, num_labels, embeddings, embeddings_size, train_embeddings, rnn_hidden_size, classification_hidden_size): hypothesis_length = tf.cast( tf.reduce_sum( tf.cast( tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64), 1), tf.int32) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings) hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input) lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input) hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=hypothesis_embeddings, sequence_length=hypothesis_length, dtype=tf.float32) gated_first_layer = tf.nn.dropout(gated_tanh(hypothesis_final_states.h, classification_hidden_size), keep_prob=dropout_input) gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer, classification_hidden_size), keep_prob=dropout_input) gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer, classification_hidden_size), keep_prob=dropout_input) return tf.contrib.layers.fully_connected(gated_third_layer, num_labels, activation_fn=None)
def build_tl_mt_model(sentence_input, premise_input, hypothesis_input, img_features_input, dropout_input, num_tokens, num_ic_labels, num_vte_labels, embeddings, embeddings_size, num_img_features, img_features_size, train_embeddings, rnn_hidden_size, multimodal_fusion_hidden_size, classification_hidden_size): sentence_length = tf.cast( tf.reduce_sum( tf.cast( tf.not_equal(sentence_input, tf.zeros_like(sentence_input, dtype=tf.int32)), tf.int64), 1), tf.int32) premise_length = tf.cast( tf.reduce_sum( tf.cast( tf.not_equal(premise_input, tf.zeros_like(premise_input, dtype=tf.int32)), tf.int64), 1), tf.int32) hypothesis_length = tf.cast( tf.reduce_sum( tf.cast( tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64), 1), tf.int32) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings) sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix, sentence_input) premise_embeddings = tf.nn.embedding_lookup(embedding_matrix, premise_input) hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input) lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input) sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=sentence_embeddings, sequence_length=sentence_length, dtype=tf.float32) premise_outputs, premise_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=premise_embeddings, sequence_length=premise_length, dtype=tf.float32) hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=hypothesis_embeddings, sequence_length=hypothesis_length, dtype=tf.float32) normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=2) reshaped_sentence = tf.reshape( tf.tile(sentence_final_states.h, [1, num_img_features]), [-1, num_img_features, rnn_hidden_size]) img_sentence_concatenation = tf.concat( [normalized_img_features, reshaped_sentence], -1) gated_img_sentence_concatenation = tf.nn.dropout(gated_tanh( img_sentence_concatenation, rnn_hidden_size), keep_prob=dropout_input) att_wa_sentence = lambda x: tf.nn.dropout( tf.contrib.layers.fully_connected( x, 1, activation_fn=None, biases_initializer=None), keep_prob=dropout_input) a_sentence = att_wa_sentence(gated_img_sentence_concatenation) a_sentence = tf.nn.softmax(tf.squeeze(a_sentence)) v_head_sentence = tf.squeeze( tf.matmul(tf.expand_dims(a_sentence, 1), normalized_img_features)) with tf.variable_scope( "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b: gated_sentence_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b) with tf.variable_scope("gated_sentence_scope_W_plus_b_prime" ) as gated_sentence_scope_W_plus_b_prime: gated_sentence_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b_prime) gated_sentence = tf.nn.dropout( gated_tanh(sentence_final_states.h, multimodal_fusion_hidden_size, W_plus_b=gated_sentence_W_plus_b, W_plus_b_prime=gated_sentence_W_plus_b_prime), keep_prob=dropout_input, ) v_head_sentence.set_shape( (sentence_embeddings.get_shape()[0], img_features_size)) with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b" ) as gated_img_features_sentence_scope_W_plus_b: gated_img_features_sentence_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b) with tf.variable_scope( "gated_img_features_sentence_scope_W_plus_b_prime" ) as gated_img_features_sentence_scope_W_plus_b_prime: gated_img_features_sentence_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b_prime) gated_img_features_sentence = tf.nn.dropout(gated_tanh( v_head_sentence, multimodal_fusion_hidden_size, W_plus_b=gated_img_features_sentence_W_plus_b, W_plus_b_prime=gated_img_features_sentence_W_plus_b_prime), keep_prob=dropout_input) h_premise_img = tf.multiply(gated_sentence, gated_img_features_sentence) with tf.variable_scope("gated_first_layer_scope_W_plus_b" ) as gated_first_layer_scope_W_plus_b: gated_first_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b) with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime" ) as gated_first_layer_scope_W_plus_b_prime: gated_first_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b_prime) gated_first_layer = tf.nn.dropout(gated_tanh( h_premise_img, W_plus_b=gated_first_layer_W_plus_b, W_plus_b_prime=gated_first_layer_W_plus_b_prime), keep_prob=dropout_input) gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer, classification_hidden_size), keep_prob=dropout_input) gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer, classification_hidden_size), keep_prob=dropout_input) ic_classification = tf.nn.dropout(tf.contrib.layers.fully_connected( gated_third_layer, num_ic_labels, activation_fn=None), keep_prob=dropout_input) reshaped_premise = tf.reshape( tf.tile(premise_final_states.h, [1, num_img_features]), [-1, num_img_features, rnn_hidden_size]) img_premise_concatenation = tf.concat( [normalized_img_features, reshaped_premise], -1) gated_img_premise_concatenation = tf.nn.dropout(gated_tanh( img_premise_concatenation, rnn_hidden_size), keep_prob=dropout_input) att_wa_premise = lambda x: tf.nn.dropout(tf.contrib.layers.fully_connected( x, 1, activation_fn=None, biases_initializer=None), keep_prob=dropout_input) a_premise = att_wa_premise(gated_img_premise_concatenation) a_premise = tf.nn.softmax(tf.squeeze(a_premise)) v_head_premise = tf.squeeze( tf.matmul(tf.expand_dims(a_premise, 1), normalized_img_features)) reshaped_hypothesis = tf.reshape( tf.tile(hypothesis_final_states.h, [1, num_img_features]), [-1, num_img_features, rnn_hidden_size]) img_hypothesis_concatenation = tf.concat( [normalized_img_features, reshaped_hypothesis], -1) gated_img_hypothesis_concatenation = tf.nn.dropout(gated_tanh( img_hypothesis_concatenation, rnn_hidden_size), keep_prob=dropout_input) att_wa_hypothesis = lambda x: tf.nn.dropout( tf.contrib.layers.fully_connected( x, 1, activation_fn=None, biases_initializer=None), keep_prob=dropout_input) a_hypothesis = att_wa_hypothesis(gated_img_hypothesis_concatenation) a_hypothesis = tf.nn.softmax(tf.squeeze(a_hypothesis)) v_head_hypothesis = tf.squeeze( tf.matmul(tf.expand_dims(a_hypothesis, 1), normalized_img_features)) with tf.variable_scope( "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b: gated_premise_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b, reuse=True) with tf.variable_scope("gated_sentence_scope_W_plus_b_prime" ) as gated_sentence_scope_W_plus_b_prime: gated_premise_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b_prime, reuse=True) gated_premise = tf.nn.dropout( gated_tanh(premise_final_states.h, multimodal_fusion_hidden_size, W_plus_b=gated_premise_W_plus_b, W_plus_b_prime=gated_premise_W_plus_b_prime), keep_prob=dropout_input, ) with tf.variable_scope( "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b: gated_hypothesis_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b, reuse=True) with tf.variable_scope("gated_sentence_scope_W_plus_b_prime" ) as gated_sentence_scope_W_plus_b_prime: gated_hypothesis_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_sentence_scope_W_plus_b_prime, reuse=True) gated_hypothesis = tf.nn.dropout( gated_tanh(hypothesis_final_states.h, multimodal_fusion_hidden_size, W_plus_b=gated_hypothesis_W_plus_b, W_plus_b_prime=gated_hypothesis_W_plus_b_prime), keep_prob=dropout_input, ) v_head_premise.set_shape( (premise_embeddings.get_shape()[0], img_features_size)) with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b" ) as gated_img_features_sentence_scope_W_plus_b: gated_img_features_premise_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b, reuse=True) with tf.variable_scope( "gated_img_features_sentence_scope_W_plus_b_prime" ) as gated_img_features_sentence_scope_W_plus_b_prime: gated_img_features_premise_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b_prime, reuse=True) gated_img_features_premise = tf.nn.dropout(gated_tanh( v_head_premise, multimodal_fusion_hidden_size, W_plus_b=gated_img_features_premise_W_plus_b, W_plus_b_prime=gated_img_features_premise_W_plus_b_prime), keep_prob=dropout_input) v_head_hypothesis.set_shape( (hypothesis_embeddings.get_shape()[0], img_features_size)) with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b" ) as gated_img_features_sentence_scope_W_plus_b: gated_img_features_hypothesis_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b, reuse=True) with tf.variable_scope( "gated_img_features_sentence_scope_W_plus_b_prime" ) as gated_img_features_sentence_scope_W_plus_b_prime: gated_img_features_hypothesis_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, multimodal_fusion_hidden_size, activation_fn=None, scope=gated_img_features_sentence_scope_W_plus_b_prime, reuse=True) gated_img_features_hypothesis = tf.nn.dropout(gated_tanh( v_head_hypothesis, multimodal_fusion_hidden_size, W_plus_b=gated_img_features_hypothesis_W_plus_b, W_plus_b_prime=gated_img_features_hypothesis_W_plus_b_prime), keep_prob=dropout_input) h_premise_img = tf.multiply(gated_premise, gated_img_features_premise) h_hypothesis_img = tf.multiply(gated_hypothesis, gated_img_features_hypothesis) with tf.variable_scope("gated_first_layer_scope_W_plus_b" ) as gated_first_layer_scope_W_plus_b: gated_h_premise_img_hidden_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b, reuse=True) with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime" ) as gated_first_layer_scope_W_plus_b_prime: gated_h_premise_hidden_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b_prime, reuse=True) gated_h_premise_img_hidden_layer = tf.nn.dropout(gated_tanh( h_premise_img, W_plus_b=gated_h_premise_img_hidden_layer_W_plus_b, W_plus_b_prime=gated_h_premise_hidden_layer_W_plus_b_prime), keep_prob=dropout_input) with tf.variable_scope("gated_first_layer_scope_W_plus_b" ) as gated_first_layer_scope_W_plus_b: gated_h_hypothesis_img_hidden_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b, reuse=True) with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime" ) as gated_first_layer_scope_W_plus_b_prime: gated_h_hypothesis_hidden_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected( x, classification_hidden_size, activation_fn=None, scope=gated_first_layer_scope_W_plus_b_prime, reuse=True) gated_h_hypothesis_img_hidden_layer = tf.nn.dropout( gated_tanh( h_hypothesis_img, W_plus_b=gated_h_hypothesis_img_hidden_layer_W_plus_b, W_plus_b_prime=gated_h_hypothesis_hidden_layer_W_plus_b_prime), keep_prob=dropout_input) final_concatenation = tf.concat([ gated_h_premise_img_hidden_layer, gated_h_hypothesis_img_hidden_layer ], 1) gated_first_layer = tf.nn.dropout(gated_tanh(final_concatenation, classification_hidden_size), keep_prob=dropout_input) gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer, classification_hidden_size), keep_prob=dropout_input) vte_classification = tf.nn.dropout(tf.contrib.layers.fully_connected( gated_second_layer, num_vte_labels, activation_fn=None), keep_prob=dropout_input) return ic_classification, vte_classification
# 词向量表-随机初始化 train_x, vocab_size = get_vocabulary(train_x) test_x, vocab_size_test = get_vocabulary(test_x) print("datas shape:",train_x.shape) embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size], initializer=tf.truncated_normal_initializer) # 将词索引号转换为词向量[None, max_document_length] => [None, max_document_length, embedding_size] embedded = tf.nn.embedding_lookup(embeddings, datas_placeholder) # 转换为LSTM的输入格式,要求是数组,数组的每个元素代表一个Batch下,一个时序的数据(即一个词) rnn_input = tf.unstack(embedded, max_document_length, axis=1) # 定义LSTM网络结构 lstm_cell = BasicLSTMCell(num_units=num_units, forget_bias=1.0) # cell lstm_cell = DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob) rnn_outputs, rnn_states = static_rnn(cell=lstm_cell, inputs=rnn_input, dtype=tf.float32) # network # 最后一层 logits = tf.layers.dense(units=num_classes,inputs=rnn_outputs[-1]) # fully-connected pred_labels = tf.arg_max(input=logits,dimension=1) # 概率最大的类别为预测的类别 # 定义损失函数, logists为网络最后一层输出, labels为真实标签 losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.one_hot(labels_placeholder, num_classes)) mean_losses = tf.reduce_mean(losses) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_losses) with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # 初始化变量 print("---init all variables---")
def layer_fn(): return DropoutWrapper(cell_fn(), output_keep_prob=keep_probability)
def model_fn(features, labels, mode, params, word_embeddings_np=None, char_embeddings_np=None): attention_fun = partial(BahdanauAttention, num_units=params.units) if params.attention == 'bahdanau' \ else partial(LuongAttention, num_units=2 * params.units) dropout = params.dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0 passage_count = params.passage_count if mode != tf.estimator.ModeKeys.TRAIN \ else params.train_passage_count question_words_length = features['question_length'] passage_words_length = features['passage_length'] devices = get_devices() with tf.device('/cpu:0'): word_embeddings_placeholder = tf.placeholder( shape=[params.vocab_size, params.emb_size], dtype=tf.float32) char_embeddings_placeholder = tf.placeholder( shape=[params.char_vocab_size, params.char_emb_size], dtype=tf.float32) # word_embeddings = tf.create_partitioned_variables(shape=[params.vocab_size, params.emb_size], # slicing=[10, 1], # initializer=word_embeddings_placeholder, # trainable=False, name="word_embeddings") word_embeddings = tf.Variable(word_embeddings_placeholder, trainable=False, name="word_embeddings") char_embeddings = tf.Variable(char_embeddings_placeholder, trainable=False, name="char_embeddings") word_embeddings = tf.nn.dropout(word_embeddings, 1.0 - dropout, noise_shape=[params.vocab_size, 1]) char_embeddings = tf.nn.dropout( char_embeddings, 1.0 - dropout, noise_shape=[params.char_vocab_size, 1]) question_words_emb = tf.nn.embedding_lookup(word_embeddings, features['question_words']) question_chars_emb = tf.nn.embedding_lookup(char_embeddings, features['question_chars']) passage_words_emb = tf.nn.embedding_lookup(word_embeddings, features['passage_words']) passage_chars_emb = tf.nn.embedding_lookup(char_embeddings, features['passage_chars']) with tf.device(next(devices)): with tf.variable_scope('question_encoding'): question_enc = encoder(question_words_emb, question_words_length, question_chars_emb, features['question_char_length'], params, dropout=dropout) with tf.device(next(devices)): with tf.variable_scope('passage_encoding'): passage_enc = encoder(passage_words_emb, passage_words_length, passage_chars_emb, features['passage_char_length'], params, dropout=dropout) # question_enc = tf.Print(question_enc, [question_enc], summarize=1000) with tf.variable_scope('attention'): attention = attention_fun( memory=question_enc, memory_sequence_length=question_words_length) cell_fw = GatedAttentionWrapper( attention, DropoutWrapper( GRUCell(params.units, name="attention_gru"), # output_keep_prob=1.0 - dropout, input_keep_prob=1.0 - dropout, # state_keep_prob=1.0 - dropout, variational_recurrent=True, input_size=4 * params.units, dtype=tf.float32), dropout=0) cell_bw = GatedAttentionWrapper( attention, DropoutWrapper( GRUCell(params.units, name="attention_gru"), # output_keep_prob=1.0 - dropout, input_keep_prob=1.0 - dropout, # state_keep_prob=1.0 - dropout variational_recurrent=True, input_size=4 * params.units, dtype=tf.float32), dropout=0) passage_repr, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, passage_enc, passage_words_length, dtype=tf.float32) passage_repr = tf.concat(passage_repr, -1) with tf.variable_scope('pointer'): question_att = attention_fun( memory=question_enc, memory_sequence_length=question_words_length, name="question_align") pool_param = tf.get_variable('pool_param', shape=(question_att._num_units, ), initializer=tf.initializers.ones) pool_param = tf.reshape( tf.tile(pool_param, [tf.shape(question_enc)[0]]), (-1, question_att._num_units)) question_alignments, _ = question_att(pool_param, None) question_pool = tf.reduce_sum( tf.expand_dims(question_alignments, -1) * question_enc, 1) logits1, logits2 = pointer_net(passage_repr, passage_words_length, question_pool, params, attention_fun=attention_fun, dropout=dropout) outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) p1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) p2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'start': p1, 'end': p2} export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) with tf.variable_scope('passage_ranking'): W_g = Dense(params.units, activation=tf.tanh, use_bias=False) v_g = Dense(1, use_bias=False) memory_layer = Dense(params.units, name="memory_layer", use_bias=False, dtype=tf.float32) query_layer = Dense(params.units, name="query_layer", use_bias=False, dtype=tf.float32) g = [] for i in range(passage_count): passage_mask = tf.boolean_mask( passage_repr, tf.equal(features['partitions'], i)) passage_i = tf.split(passage_mask, features['partitions_len'][:, i]) passage_i = [ pad_to_shape_2d( p, (tf.Dimension(params.passage_max_len), p.shape[1])) for p in passage_i ] passage_i = tf.stack(passage_i) passage_alignment, _ = ReusableBahdanauAttention( params.units, passage_i, features['partitions_len'][:, i], memory_layer=memory_layer, query_layer=query_layer, name="passage_align")(question_pool, None) passage_pool = tf.reduce_sum( tf.expand_dims(passage_alignment, -1) * passage_i, 1) g_i = v_g(W_g(tf.concat([question_pool, passage_pool], -1))) # g_i = tf.Print(g_i, [passage_mask, passage_i], message='is_nan_{}'.format(i), summarize=1000) g.append(g_i) g = tf.concat(g, -1) answer_start, answer_end, passage_rank = labels loss1 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(answer_start)) loss2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(answer_end)) loss3 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=g, labels=tf.stop_gradient(passage_rank)) # loss1 = tf.Print(loss1, [tf.argmax(answer_start, -1), tf.argmax(answer_end, -1), # tf.reduce_mean(loss1), tf.reduce_mean(loss2), tf.reduce_mean(loss3)], message="loss") loss = (params.r * tf.reduce_mean(loss1 + loss2) + (1 - params.r) * tf.reduce_mean(loss3)) \ if params.r < 1 else tf.reduce_mean(loss1 + loss2) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdadeltaOptimizer( learning_rate=params.learning_rate, epsilon=1e-6) global_step = tf.train.get_or_create_global_step() grads = optimizer.compute_gradients(loss) gradients, variables = zip(*grads) capped_grads, _ = tf.clip_by_global_norm(gradients, params.grad_clip) train_op = optimizer.apply_gradients(zip(capped_grads, variables), global_step=global_step) return EstimatorSpec( mode, loss=loss, train_op=train_op, scaffold=tf.train.Scaffold( init_feed_dict={ word_embeddings_placeholder: word_embeddings_np, char_embeddings_placeholder: char_embeddings_np }), ) if mode == tf.estimator.ModeKeys.EVAL: table = lookup_ops.index_to_string_table_from_file( params.word_vocab_file, value_column_index=0, delimiter=" ") return EstimatorSpec(mode, loss=loss, eval_metric_ops={ 'rouge-l': extraction_metric(p1, p2, tf.argmax(answer_start, -1), tf.argmax(answer_end, -1), features['passage_words'], params, table), 'f1': extraction_metric(p1, p2, tf.argmax(answer_start, -1), tf.argmax(answer_end, -1), features['passage_words'], params, table, metric='f1') })
def cell(num_units): cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units) return DropoutWrapper(cell, output_keep_prob=keep_prob)
def build_bottom_up_top_down_ic_model(sentence_input, img_features_input, dropout_input, num_tokens, num_labels, embeddings, embeddings_size, num_img_features, img_features_size, train_embeddings, rnn_hidden_size, multimodal_fusion_hidden_size, classification_hidden_size): sentence_length = tf.cast( tf.reduce_sum( tf.cast(tf.not_equal(sentence_input, tf.zeros_like(sentence_input, dtype=tf.int32)), tf.int64), 1 ), tf.int32 ) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings ) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings ) sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix, sentence_input) lstm_cell = DropoutWrapper( tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input ) sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=sentence_embeddings, sequence_length=sentence_length, dtype=tf.float32 ) normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=2) reshaped_sentence = tf.reshape(tf.tile(sentence_final_states.h, [1, num_img_features]), [-1, num_img_features, rnn_hidden_size]) img_sentence_concatenation = tf.concat([normalized_img_features, reshaped_sentence], -1) gated_img_sentence_concatenation = gated_tanh(img_sentence_concatenation, rnn_hidden_size) att_wa_sentence = lambda x: tf.contrib.layers.fully_connected(x, 1, activation_fn=None, biases_initializer=None) a_sentence = att_wa_sentence(gated_img_sentence_concatenation) a_sentence = tf.nn.softmax(tf.squeeze(a_sentence)) v_head_sentence = tf.squeeze(tf.matmul(tf.expand_dims(a_sentence, 1), normalized_img_features)) v_head_sentence.set_shape((sentence_embeddings.get_shape()[0], img_features_size)) gated_sentence = tf.nn.dropout( gated_tanh(sentence_final_states.h, multimodal_fusion_hidden_size), keep_prob=dropout_input ) gated_img_features_sentence = tf.nn.dropout( gated_tanh(v_head_sentence, multimodal_fusion_hidden_size), keep_prob=dropout_input ) h_sentence_img = tf.multiply(gated_sentence, gated_img_features_sentence) gated_first_layer = tf.nn.dropout( gated_tanh(h_sentence_img, classification_hidden_size), keep_prob=dropout_input ) gated_second_layer = tf.nn.dropout( gated_tanh(gated_first_layer, classification_hidden_size), keep_prob=dropout_input ) gated_third_layer = tf.nn.dropout( gated_tanh(gated_second_layer, classification_hidden_size), keep_prob=dropout_input ) return tf.contrib.layers.fully_connected( gated_third_layer, num_labels, activation_fn=None )
learning_rate = 0.002 training_epochs = 10 batch_size = 100 steps_for_print = 5 steps_for_validate = 10 keep_prob = tf.placeholder(tf.float32) # input place holders X = tf.placeholder(tf.float32, [None, 784]) X_img = tf.reshape(X, [-1, 28, 28]) Y = tf.placeholder(tf.int32, [None, 1]) Y_onehot = tf.reshape(tf.one_hot(Y, 10), [-1, 10]) # layers cells = tf.nn.rnn_cell.MultiRNNCell([ DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(num_units=256), output_keep_prob=keep_prob) for _ in range(3) ]) h0 = cells.zero_state(batch_size, dtype=tf.float32) output, hs = tf.nn.dynamic_rnn(cells, inputs=X_img, initial_state=h0) L1 = output[:, -1, :] W2 = tf.Variable(tf.random_normal([256, 10])) b2 = tf.Variable(tf.random_normal([10])) hypothesis = tf.matmul(L1, W2) + b2 # define cost/loss & optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y_onehot)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
def build_lstm_vte_model(premise_input, hypothesis_input, img_features_input, dropout_input, num_tokens, num_labels, embeddings, embeddings_size, train_embeddings, rnn_hidden_size, multimodal_fusion_hidden_size, classification_hidden_size): premise_length = tf.cast( tf.reduce_sum( tf.cast(tf.not_equal(premise_input, tf.zeros_like(premise_input, dtype=tf.int32)), tf.int64), 1 ), tf.int32 ) hypothesis_length = tf.cast( tf.reduce_sum( tf.cast(tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64), 1 ), tf.int32 ) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings ) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings ) premise_embeddings = tf.nn.embedding_lookup(embedding_matrix, premise_input) hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input) lstm_cell = DropoutWrapper( tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input ) premise_outputs, premise_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=premise_embeddings, sequence_length=premise_length, dtype=tf.float32 ) # premise_last = extract_axis_1(premise_outputs, premise_length - 1) hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=hypothesis_embeddings, sequence_length=hypothesis_length, dtype=tf.float32 ) # hypothesis_last = extract_axis_1(hypothesis_outputs, hypothesis_length - 1) normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1) premise_hidden_features = tf.contrib.layers.fully_connected( premise_final_states.h, multimodal_fusion_hidden_size, activation_fn=tf.nn.relu ) hypothesis_hidden_features = tf.contrib.layers.fully_connected( hypothesis_final_states.h, multimodal_fusion_hidden_size, activation_fn=tf.nn.relu ) img_hidden_features = tf.contrib.layers.fully_connected( normalized_img_features, multimodal_fusion_hidden_size, activation_fn=tf.nn.relu ) premise_img_multimodal_fusion = tf.multiply(premise_hidden_features, img_hidden_features) hypothesis_img_multimodal_fusion = tf.multiply(hypothesis_hidden_features, img_hidden_features) final_concatenation = tf.concat([premise_img_multimodal_fusion, hypothesis_img_multimodal_fusion], axis=1) return tf.contrib.layers.fully_connected( tf.contrib.layers.fully_connected( tf.contrib.layers.fully_connected( tf.contrib.layers.fully_connected( final_concatenation, classification_hidden_size, activation_fn=tf.nn.relu ), classification_hidden_size, activation_fn=tf.nn.relu ), classification_hidden_size, activation_fn=tf.nn.relu ), num_labels, activation_fn=None )