def __build_key_memory(self): #print ("memory") key_states = [] with variable_scope.variable_scope("EncoderRNN"): for i in xrange(0, self.hps.key_slots): if i > 0: variable_scope.get_variable_scope().reuse_variables() (outputs, state_fw, state_bw) = rnn.static_bidirectional_rnn(self.enc_cell_fw, self.enc_cell_bw, self.emb_key_inps[i], dtype=tf.float32) key_state = array_ops.concat([state_fw, state_bw], 1) key_states.append(key_state) with variable_scope.variable_scope("key_memory"): key_states = [ array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size * 2]) for e in key_states ] key_states = array_ops.concat(key_states, 1) key_states = tf.multiply(self.key_mask, key_states) final_state = math_ops.reduce_mean(key_states, axis=1) final_state = linear(final_state, self.hps.hidden_size, True, scope="key_initial") final_state = tf.tanh(final_state) return final_state, key_states
def __build_encoder(self, step): with variable_scope.variable_scope("EncoderRNN", reuse=True): (outputs, enc_state_fw, enc_state_bw) = rnn.static_bidirectional_rnn( self.enc_cell_fw, self.enc_cell_bw, self.emb_enc_inps[step][:self.enc_len], dtype=tf.float32) enc_outs = outputs with variable_scope.variable_scope("seq2seq_Encoder"): enc_state = enc_state_bw final_state = linear(enc_state, self.hps.hidden_size, True, scope="enc_initial") final_state = tf.tanh(final_state) top_states = [ array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size * 2]) for e in enc_outs ] attention_states = array_ops.concat(top_states, 1) final_attn_states = tf.multiply(self.enc_mask[step], attention_states) return final_state, final_attn_states, enc_outs
def testTimeReversedFusedRNN(self): with self.cached_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) fw_cell = rnn_cell.BasicRNNCell(10) bw_cell = rnn_cell.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = rnn.static_bidirectional_rnn( fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10)) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN( fused_rnn_cell.FusedRNNCellAdaptor(rnn_cell.BasicRNNCell(10))) fw_outputs, fw_state = fused_cell( inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell( inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def __build_encoder_state_computer(self, emb_encoder_inputs, encoder_mask): with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=None): with variable_scope.variable_scope("seq2seq_Encoder"): encoder_cell_fw = tf.nn.rnn_cell.LSTMCell(self.hidden_size) encoder_cell_bw = tf.nn.rnn_cell.LSTMCell(self.hidden_size) encoder_cell_fw = tf.nn.rnn_cell.DropoutWrapper( encoder_cell_fw, output_keep_prob=self.keep_prob) encoder_cell_bw = tf.nn.rnn_cell.DropoutWrapper( encoder_cell_bw, output_keep_prob=self.keep_prob) (outputs, encoder_state_fw, encoder_state_bw) = rnn.static_bidirectional_rnn( encoder_cell_fw, encoder_cell_bw, emb_encoder_inputs, dtype=tf.float32) encoder_outputs = outputs encoder_state_c = encoder_state_bw[0] encoder_state_m = encoder_state_bw[1] with variable_scope.variable_scope("initial_transfor_c"): final_state_c = core_rnn_cell._linear( encoder_state_c, self.hidden_size, True) final_state_c = tf.tanh(final_state_c) with variable_scope.variable_scope("initial_transfor_m"): final_state_m = core_rnn_cell._linear( encoder_state_m, self.hidden_size, True) final_state_m = tf.tanh(final_state_m) final_state = tf.nn.rnn_cell.LSTMStateTuple( final_state_c, final_state_m) # First calculate a concatenation of encoder outputs to put attention on. # cell.output_size is embedding_size top_states = [ array_ops.reshape(e, [-1, 1, encoder_cell_fw.output_size * 2]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) final_attention_states = tf.multiply(encoder_mask, attention_states) return final_state, final_attention_states
def __build_encoder(self, step): with variable_scope.variable_scope("EncoderRNN", reuse=True): #为什么reuse??? (outputs , enc_state_fw, enc_state_bw) = rnn.static_bidirectional_rnn( self.enc_cell_fw, self.enc_cell_bw, self.emb_enc_inps[step][:self.enc_len], dtype=tf.float32) #input是长度为bucket[0]的list,每个元素都是[batch_size,dim]的tensor enc_outs = outputs #长度为time的list,每个元素为[batch,cell_fw.output_size + cell_bw.output_size] with variable_scope.variable_scope("seq2seq_Encoder"): enc_state = enc_state_bw #反向 final_state = linear(enc_state, self.hps.hidden_size, True, scope="enc_initial") final_state = tf.tanh(final_state) top_states = [array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size*2]) for e in enc_outs] attention_states = array_ops.concat(top_states, 1) #[batch_size,enc_len,self.enc_cell_fw.output_size*2] final_attn_states = tf.multiply(self.enc_mask[step], attention_states) #enc_mask的shape是[batch_size,self.enc_len,1] return final_state, final_attn_states, enc_outs
def BiRNN(x, n_input, n_steps, n_hidden): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) # x = tf.split(0, n_steps, x) # old code x = tf.split(x, n_steps, 0) # new code # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) return outputs
def __build_key_memory(self): #print ("memory") key_states = [] with variable_scope.variable_scope("EncoderRNN"): for i in xrange(0, self.hps.key_slots): if i > 0: variable_scope.get_variable_scope().reuse_variables() #重用原来的变量,而不是新创建 (outputs , state_fw, state_bw) = rnn.static_bidirectional_rnn( self.enc_cell_fw, self.enc_cell_bw, self.emb_key_inps[i], dtype=tf.float32) #emb_key_inps是长度为step的list,每个元素都是[batch_size,dim]的tensor key_state = array_ops.concat([state_fw, state_bw], 1) #tensor state_fw和state_bw的shape:[batch_size,hidden_size] key_states.append(key_state) #tensor key_state的shape为[batch_size,2*hidden_size] with variable_scope.variable_scope("key_memory"): key_states = [array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size*2]) for e in key_states] key_states = array_ops.concat(key_states, 1) #返回tensor [-1,key_slots,self.enc_cell_fw.output_size*2] key_states = tf.multiply(self.key_mask, key_states) #element-wise,支持广播 final_state = math_ops.reduce_mean(key_states, axis=1) #tensor [-1,self.enc_cell_fw.output_size*2] final_state = linear(final_state, self.hps.hidden_size, True, scope="key_initial") #[batch_size,hidden_size] final_state = tf.tanh(final_state) return final_state, key_states
def build_model(self): with tf.device('/gpu:0'): with tf.variable_scope('deepcas') as scope: with tf.variable_scope('embedding'): x_vector = tf.nn.dropout( tf.nn.embedding_lookup(self.embedding, self.x), self.dropout_prob) # (batch_size, n_sequences, n_steps, n_input) with tf.variable_scope('BiGRU'): x_vector = tf.transpose(x_vector, [1, 0, 2, 3]) # (n_sequences, batch_size, n_steps, n_input) x_vector = tf.reshape(x_vector, [-1, self.n_steps, self.n_input]) # (n_sequences*batch_size, n_steps, n_input) x_vector = tf.transpose(x_vector, [1, 0, 2]) # (n_steps, n_sequences*batch_size, n_input) x_vector = tf.reshape(x_vector, [-1, self.n_input]) # (n_steps*n_sequences*batch_size, n_input) # Split to get a list of 'n_steps' tensors of shape (n_sequences*batch_size, n_input) x_vector = tf.split(x_vector, self.n_steps, 0) outputs, _, _ = rnn.static_bidirectional_rnn( self.gru_fw_cell, self.gru_bw_cell, x_vector, dtype=tf.float32) hidden_states = tf.transpose(tf.stack(outputs), [1, 0, 2]) # (n_sequences*batch_size, n_steps, 2*n_hidden_gru) hidden_states = tf.transpose( tf.reshape(hidden_states, [ self.n_sequences, -1, self.n_steps, 2 * self.n_hidden_gru ]), [1, 0, 2, 3]) # (batch_size, n_sequences, n_steps, 2*n_hiddent_gru) with tf.variable_scope('attention'): # attention over sequence steps attention_step = tf.nn.softmax(self.p_step) attention_step = tf.transpose(attention_step, [1, 0]) attention_result = batched_scalar_mul( attention_step, hidden_states) # (batch_size, n_sequences, n_steps, 2*n_hiddent_gru) # attention over sequence batches p_geo = tf.sigmoid(self.a_geo) attention_batch = tf.pow( tf.multiply(p_geo, tf.ones_like(self.sz)), tf.div(1.0 + tf.log(self.sz), tf.log(2.0))) attention_batch_seq = tf.tile( attention_batch, [1, self.sequence_batch_size]) for i in range( 1, int(self.n_sequences / self.sequence_batch_size)): attention_batch_seq = tf.concat([ attention_batch_seq, tf.tile( tf.pow(1 - attention_batch, i) * attention_batch, [1, self.sequence_batch_size]) ], 1) attention_batch_lin = tf.reshape(attention_batch_seq, [-1, 1]) shape = attention_result.get_shape() shape = [-1, int(shape[1]), int(shape[2]), int(shape[3])] attention_result_t = tf.multiply( attention_batch_lin, tf.reshape(attention_result, [-1, shape[2] * shape[3]])) attention_result = tf.reshape( attention_result_t, [-1, shape[1], shape[2], shape[3]]) hidden_graph = tf.reduce_sum(attention_result, reduction_indices=[1, 2]) with tf.variable_scope('dense'): dense1 = self.activation( tf.add(tf.matmul(hidden_graph, self.weights['dense1']), self.biases['dense1'])) dense2 = self.activation( tf.add(tf.matmul(dense1, self.weights['dense2']), self.biases['dense2'])) pred = self.activation( tf.add(tf.matmul(dense2, self.weights['out']), self.biases['out'])) return pred
def _build(self, incoming, *args, **kwargs): """ Args: incoming: `Tensor`. 3-D Tensor Layer [samples, timesteps, input dim]. """ assert (self.rnncell_fw.output_size == self.rnncell_bw.output_size ), "RNN Cells number of units must match!" input_shape = get_shape(incoming) # TODO: DropoutWrapper inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array] and not self.dynamic: ndim = len(input_shape) assert ndim >= 3, 'Input dim should be at least 3.' axes = [1, 0] + list(xrange(2, ndim)) inference = tf.transpose(inference, (axes, )) inference = tf.unstack(inference) sequence_length = None if self.dynamic: sequence_length = retrieve_seq_length_op(incoming if isinstance( incoming, tf.Tensor) else tf.stack(incoming)) outputs, states_fw, states_bw = tf.nn.bidirectional_dynamic_rnn( cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, sequence_length=sequence_length, dtype=tf.float32) else: outputs, states_fw, states_bw = rnn.static_bidirectional_rnn( cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, dtype=tf.float32) for v in [ self.rnncell_fw.w, self.rnncell_fw.b, self.rnncell_bw.w, self.rnncell_bw.b ]: if hasattr(v, '__len__'): for var in v: track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name) else: track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name) tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) if self.dynamic: if self.return_seq: o = outputs else: o = get_sequence_relevant_output(outputs, sequence_length) else: o = outputs if self.return_seq else outputs[-1] track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name) return (o, states_fw, states_bw) if self.return_states else o
def generate_embedding_RNN_output(encoder_inputs, cell, num_encoder_symbols, word_embedding_size, num_heads=1, dtype=dtypes.float32, scope=None, initial_state_attention=False, sequence_length=None, bidirectional_rnn=False): """ Generate RNN state outputs with word embeddings as inputs - Note that this example code does not include output label dependency modeling. One may add a loop function as in the rnn_decoder function in tf seq2seq.py example to feed emitted label embedding back to RNN state. """ with variable_scope.variable_scope(scope or "generate_embedding_RNN_output"): if bidirectional_rnn: encoder_cell_fw = cell encoder_cell_bw = cell embedding = variable_scope.get_variable( "embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() encoder_embedded_inputs = [ embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs ] #encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.bidirectional_rnn( encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn( encoder_cell_fw, encoder_cell_bw, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat([ array_ops.concat(encoder_state_fw, 1), array_ops.concat(encoder_state_bw, 1) ], 1) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size * 2]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) else: encoder_cell = cell embedding = variable_scope.get_variable( "embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() encoder_embedded_inputs = [ embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs ] encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat(1, encoder_state) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(1, top_states) return encoder_outputs, encoder_state, attention_states
def _build(self, incoming, *args, **kwargs): """ Args: incoming: `Tensor`. 3-D Tensor Layer [samples, timesteps, input dim]. """ assert (self.rnncell_fw.output_size == self.rnncell_bw.output_size), "RNN Cells number of units must match!" sequence_length = kwargs.get('sequence_length') if self.dynamic and sequence_length is None: sequence_length = retrieve_seq_length_op( incoming if isinstance(incoming, tf.Tensor) else tf.stack(incoming)) input_shape = get_shape(incoming) # TODO: DropoutWrapper inference = incoming # If a static rnn and tensor given, convert it to a per timestep list if type(inference) not in [list, np.array] and not self.dynamic: ndim = len(input_shape) assert ndim >= 3, 'Input dim should be at least 3.' axes = [1, 0] + list(xrange(2, ndim)) inference = tf.transpose(inference, axes) inference = tf.unstack(value=inference) if self.dynamic: # outputs are a tuple of (fw, bw) outputs outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, sequence_length=sequence_length, dtype=tf.float32) else: # outputs are a concatenation of both bw and fw outputs outputs, states_fw, states_bw = rnn.static_bidirectional_rnn( cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, sequence_length=sequence_length, dtype=tf.float32) for v in [self.rnncell_fw.w, self.rnncell_fw.b, self.rnncell_bw.w, self.rnncell_bw.b]: if hasattr(v, '__len__'): for var in v: track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name) else: track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name) if self.dynamic: tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[0][-1]) else: tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) if self.dynamic: if self.return_seq: o = outputs else: # we are only interested in the fw pass here o = get_sequence_relevant_output(outputs[0], sequence_length) else: o = outputs if self.return_seq else outputs[-1] track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name) return (o, states_fw, states_bw) if self.return_states else o
def __init__(self, sequence_length, num_classes, text_vocab_size, text_embedding_size, pos_vocab_size, pos_embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_text = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_text') self.input_p1 = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_p1') self.input_p2 = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_p2') self.input_y = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y') self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') initializer = tf.keras.initializers.glorot_normal # Embedding layer with tf.device('/cpu:0'), tf.variable_scope("text-embedding"): self.W_text = tf.Variable(tf.random_uniform( [text_vocab_size, text_embedding_size], -0.25, 0.25), name="W_text") self.text_embedded_chars = tf.nn.embedding_lookup( self.W_text, self.input_text) self.text_embedded_chars_expanded = tf.expand_dims( self.text_embedded_chars, -1) with tf.device('/cpu:0'), tf.variable_scope("position-embedding"): self.W_pos = tf.get_variable("W_pos", [pos_vocab_size, pos_embedding_size], initializer=initializer()) self.p1_embedded_chars = tf.nn.embedding_lookup( self.W_pos, self.input_p1) self.p2_embedded_chars = tf.nn.embedding_lookup( self.W_pos, self.input_p2) self.p1_embedded_chars_expanded = tf.expand_dims( self.p1_embedded_chars, -1) self.p2_embedded_chars_expanded = tf.expand_dims( self.p2_embedded_chars, -1) self.embedded_chars_expanded = tf.concat([ self.text_embedded_chars_expanded, self.p1_embedded_chars_expanded, self.p2_embedded_chars_expanded ], 2) _embedding_size = text_embedding_size + 2 * pos_embedding_size hidden_size = 128 num_layer = 1 with tf.variable_scope('input_encode'): def create_cell(): if self.dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) hidden = DropoutWrapper( hidden, input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) return hidden self.init_hidden_fw = create_cell() self.init_hidden_bw = create_cell() outputs, hidden_fw, hidden_bw = static_bidirectional_rnn( self.init_hidden_fw, self.init_hidden_bw, self.embedded_chars_expanded, sequence_length=self.seq_length, dtype=tf.float32) # outputs [(,256),..,(,256)] # get last layer state last_hidden_fw = hidden_fw[-1] # (c, h) ((,128), (,128)) last_hidden_bw = hidden_bw[-1] # (c, h) self.last_hidden_state = tf.concat( [tf.concat(last_hidden_fw, 1), tf.concat(last_hidden_bw, 1)], 1) # (, 4*128) self.all_hidden_state = [ tf.reshape(o, [ -1, 1, self.init_hidden_fw.output_size + self.init_hidden_bw.output_size ]) for o in outputs ] # [(,1,256),...(,1,256)] self.all_hidden_state = tf.concat(self.all_hidden_state, 1) # (,30,256) with tf.variable_scope("decode_output"): batch_size = self.all_hidden_state.get_shape()[0] seq_length = self.all_hidden_state.get_shape()[1] att_size = self.all_hidden_state.get_shape()[2] source_hidden = tf.reshape( self.all_hidden_state, [-1, seq_length, 1, att_size]) # (B,30,1,256) attn_weight_list = [] context_vec_list = [ ] # Results of attention reads will be stored here. for i in range(self.num_head): k = tf.get_variable("AttnK_%d" % i, [1, 1, att_size, att_size]) v = tf.get_variable("AttnV_%d" % i, [att_size]) conv_source_hidden = tf.nn.conv2d(source_hidden, k, [1, 1, 1, 1], "SAME") # (B,30,1,256) with tf.variable_scope("Attention_%d" % i): query = tf.layers.dense(self.last_hidden_state, att_size) # (B, 256) query = tf.reshape(query, [-1, 1, 1, att_size]) # (B,1,1,256) # Attention mask is a softmax of v^T * tanh(...). score = v * tf.tanh(conv_source_hidden + query) s = tf.reduce_sum(score, [2, 3]) # (B, 30) att_weight = tf.nn.softmax(s) attn_weight_list.append(att_weight) # Now calculate the attention-weighted context vector. context_vec = tf.reduce_sum( tf.reshape(att_weight, [-1, seq_length, 1, 1]) * source_hidden, [1, 2]) # (B,256) context_vec_list.append( tf.reshape(context_vec, [-1, att_size])) matrix = tf.get_variable("Out_Matrix", [att_size, self.num_class]) # (256,31) res = tf.matmul( context_vec_list[0], matrix) # NOTE: here we temporarily assume num_head = 1 bias_start = 0.0 bias_term = tf.get_variable( "Out_Bias", [self.num_class], initializer=tf.constant_initializer(bias_start)) self.decode_output = [res + bias_term] # (B,32) self.att_weight = attn_weight_list[0] # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer conv = tf.layers.conv2d(self.embedded_chars_expanded, num_filters, [filter_size, _embedding_size], kernel_initializer=initializer(), activation=tf.nn.relu, name="conv") # Maxpooling over the outputs pooled = tf.nn.max_pool( conv, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.variable_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final scores and predictions with tf.variable_scope("output"): self.logits = tf.layers.dense(self.h_drop, num_classes, kernel_initializer=initializer()) self.predictions = tf.argmax(self.logits, 1, name="predictions") # Calculate mean cross-entropy loss with tf.variable_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.input_y) self.l2 = tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * self.l2 # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy")
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size, forget_bias=1.0, state_is_tuple=True) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size, forget_bias=1.0, state_is_tuple=True) #lstm_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True) try: outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x_in, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x_in, dtype=tf.float32) #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] prediction = output cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) optimizer = tf.train.AdamOptimizer().minimize(cost)
def _testSingleLayerBidirectionalLSTMHelper(self, input_size, num_units, seq_length, batch_size): # Only tests single layer bi-Cudnn LSTM. num_layers = 1 np.random.seed(1234) # canonical bidirectional lstm param_size = _MinLSTMParamSize( num_layers, num_units, input_size, direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION) # np data input_data = np.random.randn(seq_length, batch_size, input_size).astype(np.float32) input_h = np.zeros( (num_layers * 2, batch_size, num_units)).astype(np.float32) input_c = np.zeros( (num_layers * 2, batch_size, num_units)).astype(np.float32) cudnn_params = np.random.randn(param_size).astype(np.float32) with ops.Graph().as_default(): # cudnn bidirectional lstm graph cudnn_params_t = variables.Variable(cudnn_params) input_data_t = constant_op.constant(input_data, dtype=dtypes.float32) input_h_t = constant_op.constant(input_h, dtype=dtypes.float32) input_c_t = constant_op.constant(input_c, dtype=dtypes.float32) cudnn_lstm = _CreateModel( "lstm", num_layers, num_units, input_size, direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION) cudnn_output, cudnn_output_h, cudnn_output_c = cudnn_lstm( input_data=input_data_t, input_h=input_h_t, input_c=input_c_t, params=cudnn_params_t) # canonical bidirectional lstm cell_fw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.) cell_bw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.) outputs, output_state_fw, output_state_bw = static_bidirectional_rnn( cell_fw, cell_bw, array_ops.unstack(input_data), dtype=dtypes.float32) weights_list, biases_list = _TransformBidirectionalCudnnLSTMParams( cudnn_lstm, cudnn_params_t) assert len(weights_list) == 2 assert len(biases_list) == 2 with vs.variable_scope("", reuse=True): cell_fw_kernel = vs.get_variable( "bidirectional_rnn/fw/lstm_cell/kernel") cell_fw_bias = vs.get_variable( "bidirectional_rnn/fw/lstm_cell/bias") cell_bw_kernel = vs.get_variable( "bidirectional_rnn/bw/lstm_cell/kernel") cell_bw_bias = vs.get_variable( "bidirectional_rnn/bw/lstm_cell/bias") assign_fw_kernel = state_ops.assign(cell_fw_kernel, weights_list[0]) assign_fw_bias = state_ops.assign(cell_fw_bias, biases_list[0]) assign_bw_kernel = state_ops.assign(cell_bw_kernel, weights_list[1]) assign_bw_bias = state_ops.assign(cell_bw_bias, biases_list[1]) assign_ops = control_flow_ops.group(assign_fw_kernel, assign_fw_bias, assign_bw_kernel, assign_bw_bias) with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) cu_out, cu_h, cu_c = sess.run( [cudnn_output, cudnn_output_h, cudnn_output_c]) sess.run(assign_ops) out, fwd_s, bak_s = sess.run( [outputs, output_state_fw, output_state_bw]) out = np.stack(out) fwd_h, fwd_c = fwd_s.h, fwd_s.c bak_h, bak_c = bak_s.h, bak_s.c h = np.concatenate((fwd_h, bak_h), axis=1) c = np.concatenate((fwd_c, bak_c), axis=1) cu_h = [np.array(x) for x in cu_h] cu_c = [np.array(x) for x in cu_c] cu_h = np.concatenate(cu_h, axis=1) cu_c = np.concatenate(cu_c, axis=1) self.assertAllClose(out, cu_out) self.assertAllClose(h, cu_h) self.assertAllClose(c, cu_c)
def stack_bidirectional_rnn(cells_fw, cells_bw, inputs, initial_states_fw=None, initial_states_bw=None, dtype=None, sequence_length=None, scope=None): """Creates a bidirectional recurrent neural network. Stacks several bidirectional rnn layers. The combined forward and backward layer outputs are used as input of the next layer. tf.bidirectional_rnn does not allow to share forward and backward information between layers. The input_size of the first forward and backward cells must match. The initial state for both directions is zero and no intermediate states are returned. As described in https://arxiv.org/abs/1303.5778 Args: cells_fw: List of instances of RNNCell, one per layer, to be used for forward direction. cells_bw: List of instances of RNNCell, one per layer, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, input_size], or a nested tuple of such elements. initial_states_fw: (optional) A list of the initial states (one per layer) for the forward RNN. Each tensor must has an appropriate type and shape `[batch_size, cell_fw.state_size]`. initial_states_bw: (optional) Same as for `initial_states_fw`, but using the corresponding properties of `cells_bw`. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to None. Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length `T` list of outputs (one for each input), which are depth-concatenated forward and backward outputs. output_states_fw is the final states, one tensor per layer, of the forward rnn. output_states_bw is the final states, one tensor per layer, of the backward rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is None, not a list or an empty list. """ if not cells_fw: raise ValueError("Must specify at least one fw cell for BidirectionalRNN.") if not cells_bw: raise ValueError("Must specify at least one bw cell for BidirectionalRNN.") if not isinstance(cells_fw, list): raise ValueError("cells_fw must be a list of RNNCells (one per layer).") if not isinstance(cells_bw, list): raise ValueError("cells_bw must be a list of RNNCells (one per layer).") if len(cells_fw) != len(cells_bw): raise ValueError("Forward and Backward cells must have the same depth.") if (initial_states_fw is not None and (not isinstance(initial_states_fw, list) or len(initial_states_fw) != len(cells_fw))): raise ValueError( "initial_states_fw must be a list of state tensors (one per layer).") if (initial_states_bw is not None and (not isinstance(initial_states_bw, list) or len(initial_states_bw) != len(cells_bw))): raise ValueError( "initial_states_bw must be a list of state tensors (one per layer).") states_fw = [] states_bw = [] prev_layer = inputs with vs.variable_scope(scope or "stack_bidirectional_rnn"): for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)): initial_state_fw = None initial_state_bw = None if initial_states_fw: initial_state_fw = initial_states_fw[i] if initial_states_bw: initial_state_bw = initial_states_bw[i] with vs.variable_scope("cell_%d" % i) as cell_scope: prev_layer, state_fw, state_bw = rnn.static_bidirectional_rnn( cell_fw, cell_bw, prev_layer, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, dtype=dtype, scope=cell_scope) states_fw.append(state_fw) states_bw.append(state_bw) return prev_layer, tuple(states_fw), tuple(states_bw)
def stack_bidirectional_rnn(cells_fw, cells_bw, inputs, initial_states_fw=None, initial_states_bw=None, dtype=None, sequence_length=None, scope=None): """Creates a bidirectional recurrent neural network. Stacks several bidirectional rnn layers. The combined forward and backward layer outputs are used as input of the next layer. tf.bidirectional_rnn does not allow to share forward and backward information between layers. The input_size of the first forward and backward cells must match. The initial state for both directions is zero and no intermediate states are returned. As described in https://arxiv.org/abs/1303.5778 Args: cells_fw: List of instances of RNNCell, one per layer, to be used for forward direction. cells_bw: List of instances of RNNCell, one per layer, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, input_size], or a nested tuple of such elements. initial_states_fw: (optional) A list of the initial states (one per layer) for the forward RNN. Each tensor must has an appropriate type and shape `[batch_size, cell_fw.state_size]`. initial_states_bw: (optional) Same as for `initial_states_fw`, but using the corresponding properties of `cells_bw`. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to None. Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length `T` list of outputs (one for each input), which are depth-concatenated forward and backward outputs. output_states_fw is the final states, one tensor per layer, of the forward rnn. output_states_bw is the final states, one tensor per layer, of the backward rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is None, not a list or an empty list. """ if not cells_fw: raise ValueError( "Must specify at least one fw cell for BidirectionalRNN.") if not cells_bw: raise ValueError( "Must specify at least one bw cell for BidirectionalRNN.") if not isinstance(cells_fw, list): raise ValueError( "cells_fw must be a list of RNNCells (one per layer).") if not isinstance(cells_bw, list): raise ValueError( "cells_bw must be a list of RNNCells (one per layer).") if len(cells_fw) != len(cells_bw): raise ValueError( "Forward and Backward cells must have the same depth.") if (initial_states_fw is not None and (not isinstance(initial_states_fw, list) or len(initial_states_fw) != len(cells_fw))): raise ValueError( "initial_states_fw must be a list of state tensors (one per layer)." ) if (initial_states_bw is not None and (not isinstance(initial_states_bw, list) or len(initial_states_bw) != len(cells_bw))): raise ValueError( "initial_states_bw must be a list of state tensors (one per layer)." ) states_fw = [] states_bw = [] prev_layer = inputs with vs.variable_scope(scope or "stack_bidirectional_rnn"): for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)): initial_state_fw = None initial_state_bw = None if initial_states_fw: initial_state_fw = initial_states_fw[i] if initial_states_bw: initial_state_bw = initial_states_bw[i] with vs.variable_scope("cell_%d" % i) as cell_scope: prev_layer, state_fw, state_bw = rnn.static_bidirectional_rnn( cell_fw, cell_bw, prev_layer, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, dtype=dtype, scope=cell_scope) states_fw.append(state_fw) states_bw.append(state_bw) return prev_layer, tuple(states_fw), tuple(states_bw)
def _testSingleLayerBidirectionalLSTMHelper(self, input_size, num_units, seq_length, batch_size): # Only tests single layer bi-Cudnn LSTM. num_layers = 1 np.random.seed(1234) # canonical bidirectional lstm param_size = _MinLSTMParamSize( num_layers, num_units, input_size, direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION) # np data input_data = np.random.randn(seq_length, batch_size, input_size).astype(np.float32) input_h = np.zeros((num_layers * 2, batch_size, num_units)).astype(np.float32) input_c = np.zeros((num_layers * 2, batch_size, num_units)).astype(np.float32) cudnn_params = np.random.randn(param_size).astype(np.float32) with ops.Graph().as_default(): # cudnn bidirectional lstm graph cudnn_params_t = variables.Variable(cudnn_params) input_data_t = constant_op.constant(input_data, dtype=dtypes.float32) input_h_t = constant_op.constant(input_h, dtype=dtypes.float32) input_c_t = constant_op.constant(input_c, dtype=dtypes.float32) cudnn_lstm = _CreateModel( "lstm", num_layers, num_units, input_size, direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION) cudnn_output, cudnn_output_h, cudnn_output_c = cudnn_lstm( input_data=input_data_t, input_h=input_h_t, input_c=input_c_t, params=cudnn_params_t) # canonical bidirectional lstm cell_fw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.) cell_bw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.) outputs, output_state_fw, output_state_bw = static_bidirectional_rnn( cell_fw, cell_bw, array_ops.unstack(input_data), dtype=dtypes.float32) weights_list, biases_list = _TransformBidirectionalCudnnLSTMParams( cudnn_lstm, cudnn_params_t) assert len(weights_list) == 2 assert len(biases_list) == 2 with vs.variable_scope("", reuse=True): cell_fw_kernel = vs.get_variable( "bidirectional_rnn/fw/lstm_cell/kernel") cell_fw_bias = vs.get_variable("bidirectional_rnn/fw/lstm_cell/bias") cell_bw_kernel = vs.get_variable( "bidirectional_rnn/bw/lstm_cell/kernel") cell_bw_bias = vs.get_variable("bidirectional_rnn/bw/lstm_cell/bias") assign_fw_kernel = state_ops.assign(cell_fw_kernel, weights_list[0]) assign_fw_bias = state_ops.assign(cell_fw_bias, biases_list[0]) assign_bw_kernel = state_ops.assign(cell_bw_kernel, weights_list[1]) assign_bw_bias = state_ops.assign(cell_bw_bias, biases_list[1]) assign_ops = control_flow_ops.group(assign_fw_kernel, assign_fw_bias, assign_bw_kernel, assign_bw_bias) with self.test_session( use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) cu_out, cu_h, cu_c = sess.run( [cudnn_output, cudnn_output_h, cudnn_output_c]) sess.run(assign_ops) out, fwd_s, bak_s = sess.run( [outputs, output_state_fw, output_state_bw]) out = np.stack(out) fwd_h, fwd_c = fwd_s.h, fwd_s.c bak_h, bak_c = bak_s.h, bak_s.c h = np.concatenate((fwd_h, bak_h), axis=1) c = np.concatenate((fwd_c, bak_c), axis=1) cu_h = [np.array(x) for x in cu_h] cu_c = [np.array(x) for x in cu_c] cu_h = np.concatenate(cu_h, axis=1) cu_c = np.concatenate(cu_c, axis=1) self.assertAllClose(out, cu_out) self.assertAllClose(h, cu_h) self.assertAllClose(c, cu_c)