def __call__(self, inputs, seq_len, return_last_state=False): with tf.variable_scope(self.scope): if return_last_state: _, ((_, output_fw), (_, output_bw)) = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs, sequence_length=seq_len, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) else: (output_fw, output_bw), _ = bidirectional_dynamic_rnn(self.cell_fw, self.cell_bw, inputs, sequence_length=seq_len, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) return output
def model_fn(): inputs = constant_op.constant(2 * [2 * [[0.0, 1.0, 2.0, 3.0, 4.0]]]) cell_fw = rnn_cell_impl.LSTMCell(300) cell_bw = rnn_cell_impl.LSTMCell(300) (outputs, _) = rnn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, dtype=dtypes.float32) return outputs
def apply(self, is_train, inputs, mask=None): fw = self.fw(is_train) bw_spec = self.fw if self.bw is None else self.bw bw = bw_spec(is_train) if self.merge is None: return tf.concat( bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory, dtype=tf.float32)[0], 2, ) else: fw, bw = bidirectional_dynamic_rnn(fw, bw, inputs, mask, swap_memory=self.swap_memory, dtype=tf.float32)[0] return self.merge.apply( is_train, fw, bw) # TODO this should be in a different scope
def apply(self, is_train, x, mask=None): states = bidirectional_dynamic_rnn(self.cell_spec(is_train), self.cell_spec(is_train), x, mask, dtype=tf.float32)[1] output = [] for state in states: for i, x in enumerate(state._fields): if x == self.output: output.append(state[i]) if self.merge is not None: return self.merge.apply(is_train, output[0], output[1]) else: return tf.concat(output, axis=1)
def __call__(self, inputs, seq_len): if self.output_dim % 2 != 0: print("The output dimension of BidirectLSTMLayer should be even. ") exit(-1) with tf.variable_scope(self.scope) as scope: self.check_reuse(scope) scope.reuse_variables() cell = LSTMCell(self.output_dim / 2, initializer=self.initializer(dtype=inputs.dtype)) #rnn.bidirectional_dynamic_rnn(cell,cell,inputs,seq_len,dtype = inputs.dtype) return rnn.bidirectional_dynamic_rnn(cell, cell, inputs, seq_len, dtype=inputs.dtype)
def __call__(self, inputs, sequence_lengths, is_training=False, reuse=False, scope=None): """ Create the variables and do the forward computation Args: inputs: A time minor tensor of shape [batch_size, time, input_size], sequence_lengths: the length of the input sequences is_training: whether or not the network is in training mode reuse: Setting this value to true will cause tensorflow to look for variables with the same name in the graph and reuse these instead of creating new variables. scope: The variable scope sets the namespace under which the variables created during this call will be stored. Returns: the output of the layer, the concatenated outputs of the forward and backward pass shape [batch_size, time, input_size*2] or [batch_size, time/2, input_size*2] if self.plstm is set to true. """ with tf.variable_scope(scope or type(self).__name__, reuse=reuse) as scope: #create the lstm cell that will be used for the forward and backward lstm_init = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) lstm_cell = rnn_cell.LSTMCell(self.num_units, state_is_tuple=True, use_peepholes=True, initializer=lstm_init) if self.pyramidal is True: inputs, sequence_lengths = concatenate(inputs, sequence_lengths, scope) #outputs, output_states outputs, _ = bidirectional_dynamic_rnn( lstm_cell, lstm_cell, inputs, dtype=tf.float32, sequence_length=sequence_lengths) outputs = tf.concat(outputs, 2) return outputs, sequence_lengths
def build_multi_dynamic_brnn(args, maxTimeSteps, inputX, cell_fn, seqLengths, time_major=True): hid_input = inputX for i in range(args.num_layer): scope = 'DBRNN_' + str(i + 1) forward_cell = cell_fn(args.num_hidden, activation=args.activation) backward_cell = cell_fn(args.num_hidden, activation=args.activation) # tensor of shape: [max_time, batch_size, input_size] outputs, output_states = bidirectional_dynamic_rnn( forward_cell, backward_cell, inputs=hid_input, dtype=tf.float32, sequence_length=seqLengths, time_major=True, scope=scope) # forward output, backward ouput # tensor of shape: [max_time, batch_size, input_size] output_fw, output_bw = outputs # forward states, backward states output_state_fw, output_state_bw = output_states # output_fb = tf.concat(2, [output_fw, output_bw]) output_fb = tf.stack([output_fw, output_bw], 2, name='output_fb') #output_fb = tf.concat([output_fw, output_bw], 2) output_fb = tf.Print(output_fb, [output_fb.get_shape()], message='output_fb:') #shape = output_fb.get_shape().as_list() #output_fb = tf.reshape(output_fb, [shape[0], shape[1], 2, int(shape[2] / 2)]) #output_fb = tf.Print(output_fb, [output_fb.get_shape()], message='after reshape:') hidden = tf.reduce_sum(output_fb, 2) # hidden = dropout(hidden, args.keep_prob, (args.mode == 'train')) #Apr 19 if i != args.num_layer - 1: hid_input = hidden else: outputXrs = tf.reshape(hidden, [-1, args.num_hidden]) # output_list = tf.split(0, maxTimeSteps, outputXrs) output_list = tf.split(outputXrs, maxTimeSteps, 0) fbHrs = [ tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list ] return fbHrs
def encoder_rnn_layer(rnn_inputs, rnn_size, num_layers, kep_prob, sequence_length): lstm = tf.nn.rnn_cell.BasicLSTMCell(rnn_size) lstm_dropout = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=kep_prob) encoder_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_dropout] * num_layers) _, encoder_state = bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, sequence_length=sequence_length, inputs=rnn_inputs, dtype=tf.float32) return encoder_state #%% # Decoding the training set
def _char_embedding_layer( embedder: EmbeddingService, chars: tf.Variable, num_words: tf.Variable, num_chars: tf.Variable, char_rnn_size: int, dropout_function: Callable[[tf.Variable], tf.Variable]) -> tf.Variable: batch_size = int(chars.get_shape()[0]) embedding_size = embedder.embedding_dim with tf.variable_scope('char_embedding_layer'): # [batch_size, dim_num_words, dim_num_chars] char_embeddings = tf.get_variable(name='char_embeddings', trainable=True, dtype=tf.float32, initializer=tf.constant( embedder.embedding_matrix, dtype=tf.float32)) char_raw_embed = dropout_function( tf.nn.embedding_lookup(char_embeddings, chars)) # we need to unstack instead of reshape as two dimension are unknown # batch_size * [dim_num_words, dim_num_chars, embedding_size] char_raw_embed_list = tf.unstack(char_raw_embed, batch_size, axis=0) char_raw_embed_length_list = tf.unstack(num_chars, batch_size, axis=0) # batch_size * [dim_num_words, layer_size] char_embed_list = [] with tf.variable_scope('encoding') as scope: fw_cell = GRUCell(char_rnn_size) bw_cell = GRUCell(char_rnn_size) for i in range(len(char_raw_embed_list)): batch_embed = char_raw_embed_list[i] batch_char_length = char_raw_embed_length_list[i] (_, _), (fw_final, bw_final) = bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs=batch_embed, dtype=tf.float32, sequence_length=batch_char_length, scope=scope, parallel_iterations=64, swap_memory=True) out = tf.concat([fw_final, bw_final], axis=1) char_embed_list.append(out) return tf.stack(char_embed_list, axis=0)
def build_graph(self, args, maxTimeSteps): self.graph = tf.Graph() with self.graph.as_default(): self.inputX = tf.placeholder(tf.float32, shape=(maxTimeSteps, args.batch_size, args.num_feature)) #[maxL,32,39] self.inputXX = tf.reshape(self.inputX,shape=(args.batch_size,maxTimeSteps,args.num_feature)) inputXrs = tf.reshape(self.inputX, [-1, args.num_feature]) #self.inputList = tf.split(0, maxTimeSteps, inputXrs) #convert inputXrs from [32*maxL,39] to [32,maxL,39] #self.inputnew = tf.reshape(self.inputX, [1, 0, 2]) self.targetIxs = tf.placeholder(tf.int64) self.targetVals = tf.placeholder(tf.int32) self.targetShape = tf.placeholder(tf.int64) self.targetY = tf.SparseTensor(self.targetIxs, self.targetVals, self.targetShape) self.seqLengths = tf.placeholder(tf.int32, shape=(args.batch_size)) self.config = { 'name':args.model, 'rnncell':self.cell_fn, 'num_layer':args.num_layer, 'num_hidden':args.num_hidden, 'num_class':args.num_class, 'activation':args.activation, 'optimizer':args.optimizer, 'learning rate':args.learning_rate } # forward layer forwardH1 = self.cell_fn(args.num_hidden,activation=tf.nn.relu) # backward layer backwardH1 = self.cell_fn(args.num_hidden,activation=tf.nn.relu) # bi-directional layer fbH1, state = bidirectional_dynamic_rnn(forwardH1, backwardH1, self.inputXX, sequence_length=self.seqLengths, dtype=tf.float32, scope='BDRNN_H1') fbH1 = tf.concat(2, fbH1) print(fbH1.get_shape) shape = fbH1.get_shape().as_list() fbH1 = tf.reshape(fbH1,[shape[0]*shape[1],-1]) #seq*batch,feature fbH1_list = tf.split(0,shape[1],fbH1) logits = [build_forward_layer(t,[shape[2],args.num_class],kernel='linear') for t in fbH1_list] logits3d = tf.pack(logits) self.loss = tf.reduce_mean(ctc.ctc_loss(logits3d, self.targetY, self.seqLengths)) self.optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize(self.loss) self.logitsMaxTest = tf.slice(tf.argmax(logits3d, 2), [0, 0], [self.seqLengths[0], 1]) self.predictions = tf.to_int32(ctc.ctc_beam_search_decoder(logits3d, self.seqLengths)[0][0]) self.errorRate = tf.reduce_sum(tf.edit_distance(self.predictions, self.targetY, normalize=False))/tf.to_float(tf.size(self.targetY.values)) self.initial_op = tf.initialize_all_variables() self.saver = tf.train.Saver(tf.all_variables(),max_to_keep=5,keep_checkpoint_every_n_hours=1) self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','') self.var_op = tf.all_variables() self.var_trainable_op = tf.trainable_variables()
def bi_rnn(self, inputs, scope=None): with tf.variable_scope(scope or 'BiRNN'): fw_cells = tf.nn.rnn_cell.LSTMCell(hidden_units) bw_cells = tf.nn.rnn_cell.LSTMCell(hidden_units) fw_cells = rnn_cell.DropoutWrapper(fw_cells, output_keep_prob=1 - self.dropout_rate) bw_cells = rnn_cell.DropoutWrapper(bw_cells, output_keep_prob=1 - self.dropout_rate) rnn_outputs, _ = rnn.bidirectional_dynamic_rnn(cell_fw=fw_cells, cell_bw=bw_cells, inputs=inputs, dtype=tf.float32) H = tf.concat(rnn_outputs, axis=2) # 2 * hidden_units return H
def BiRNN(x, weights, biases): # shape of x will be (batch_size, n_steps, n_channels) # need to transform to (n_steps, batch_size, n_channels) to comply with tf bi_rnn x = tf.transpose(x, [1, 0, 2]) #x = tf.unpack(x, axis = 0) # Forward direction cell #with tf.variable_scope('forward'): lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell #with tf.variable_scope('backward'): lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, _ = rnn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype = tf.float32, time_major = True) #except Exception: # outputs = rnn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype = tf.float32, time_major = True) tf.concat(2, outputs) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def __call__(self, inputs, sequence_length, scope=None): ''' Create the variables and do the forward computation Args: inputs: the input to the layer as a [batch_size, max_length, dim] tensor sequence_length: the length of the input sequences as a [batch_size] tensor scope: The variable scope sets the namespace under which the variables created during this call will be stored. Returns: the output of the layer ''' with tf.variable_scope(scope or type(self).__name__): #create the lstm cell that will be used for the forward and backward #pass lstm_cell_fw = rnn_cell.LayerNormBasicLeakchLSTMCell( num_units=self.num_units, leak_factor=self.leak_factor, layer_norm=self.layer_norm, dropout_keep_prob=self.recurrent_dropout, reuse=tf.get_variable_scope().reuse) lstm_cell_bw = rnn_cell.LayerNormBasicLeakchLSTMCell( self.num_units, leak_factor=self.leak_factor, layer_norm=self.layer_norm, dropout_keep_prob=self.recurrent_dropout, reuse=tf.get_variable_scope().reuse) #do the forward computation outputs_tupple, _ = bidirectional_dynamic_rnn( lstm_cell_fw, lstm_cell_bw, inputs, dtype=tf.float32, sequence_length=sequence_length) outputs = tf.concat(outputs_tupple, 2) return outputs
def build_multi_dynamic_brnn(maxTimeSteps, inputX, seqLengths, time_major=True): hid_input = inputX for i in range(num_layers): scope = 'DBRNN_' + str(i + 1) forward_cell = tf.contrib.rnn.GRUCell(num_hidden, activation=tf.tanh) backward_cell = tf.contrib.rnn.GRUCell(num_hidden, activation=tf.tanh) # tensor of shape: [max_time, batch_size, input_size] outputs, output_states = bidirectional_dynamic_rnn( forward_cell, backward_cell, inputs=hid_input, dtype=tf.float32, sequence_length=seqLengths, time_major=True, scope=scope) # forward output, backward ouput # tensor of shape: [max_time, batch_size, input_size] output_fw, output_bw = outputs # forward states, backward states output_state_fw, output_state_bw = output_states # output_fb = tf.concat(2, [output_fw, output_bw]) output_fb = array_ops.concat(outputs, 2) #tf.concat([output_fw, output_bw], 2) shape = output_fb.get_shape().dims output_fb = tf.reshape( output_fb, [-1, shape[1].value, 2, int(shape[2].value / 2)]) hidden = tf.reduce_sum(output_fb, 2) hidden = tf.contrib.layers.dropout(hidden, keep_prob=1 - dropout, is_training=True) #hidden = dropout(hidden, args.keep_prob, (args.mode == 'train')) if i != num_layers - 1: hid_input = hidden else: outputXrs = tf.reshape(hidden, [-1, num_hidden]) # output_list = tf.split(0, maxTimeSteps, outputXrs) #output_list = tf.split(outputXrs, maxTimeSteps, 0) #fbHrs = [tf.reshape(t, [batch_size, num_hidden]) for t in output_list] return outputXrs
def bulid_content_encoder(self): print('Building content_encoder...') with tf.variable_scope('pre'): self.conent_inputs_embedded = tf.nn.embedding_lookup( params=self.embedding, ids=self.content_inputs ) self.pre_cell_fw=self.build_rnn_depth_cell() self.pre_cell_bw=self.build_rnn_depth_cell() self.pre_outputs_fw_bw, self.pre_last_state_fw_bw = bidirectional_dynamic_rnn( cell_fw=self.pre_cell_fw, cell_bw=self.pre_cell_bw, inputs=self.conent_inputs_embedded, sequence_length=self.content_inputs_length, dtype=self.dtype, time_major=False, ) self.pre_outputs_fw, self.pre_outputs_bw = self.pre_outputs_fw_bw self.content_outputs = tf.concat([self.pre_outputs_fw, self.pre_outputs_bw], 2)
def __call__(self, inputs, sequence_length, scope=None): ''' Create the variables and do the forward computation Args: inputs: the input to the layer as a [batch_size, max_length, dim] tensor sequence_length: the length of the input sequences as a [batch_size] tensor scope: The variable scope sets the namespace under which the variables created during this call will be stored. Returns: the output of the layer ''' with tf.variable_scope(scope or type(self).__name__): #create the rnn cell that will be used for the forward and backward #pass if self.linear_out_flag: rnn_cell_type = rnn_cell.RNNCellLinearOut else: rnn_cell_type = tf.contrib.rnn.BasicRNNCell rnn_cell_fw = rnn_cell_type(num_units=self.num_units, activation=self.activation_fn, reuse=tf.get_variable_scope().reuse) rnn_cell_bw = rnn_cell_type(num_units=self.num_units, activation=self.activation_fn, reuse=tf.get_variable_scope().reuse) #do the forward computation outputs_tupple, _ = bidirectional_dynamic_rnn( rnn_cell_fw, rnn_cell_bw, inputs, dtype=tf.float32, sequence_length=sequence_length) outputs = tf.concat(outputs_tupple, 2) return outputs
def __call__(self, inputs, sequence_length, is_training=False, reuse=False, scope=None): """ Create the variables and do the forward computation Args: inputs: the input to the layer as a [batch_size, max_length, dim] tensor sequence_length: the length of the input sequences is_training: whether or not the network is in training mode reuse: Setting this value to true will cause tensorflow to look for variables with the same name in the graph and reuse these instead of creating new variables. scope: The variable scope sets the namespace under which the variables created during this call will be stored. Returns: the output of the layer """ with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #create the lstm cell that will be used for the forward and backward #pass lstm_cell = rnn_cell.LSTMCell(self.num_units, state_is_tuple=True, use_peepholes=True) #do the forward computation outputs_tupple, _ = bidirectional_dynamic_rnn( lstm_cell, lstm_cell, inputs, dtype=tf.float32, sequence_length=sequence_length) outputs = tf.concat(outputs_tupple, 2) return outputs
def build_multi_dynamic_brnn(args, maxTimeSteps, inputX, cell_fn, seqLength, time_major=True): hid_input = inputX for i in range(args.num_layer): scope = 'DBRNN_' + str(i + 1) forward_cell = cell_fn(args.num_hidden, name='basic_lstm_cell') backward_cell = cell_fn(args.num_hidden, name='basic_lstm_cell') #forward_cell = cell_fn(args.num_hidden, activation=args.activation, name='basic_lstm_cell') #backward_cell = cell_fn(args.num_hidden, activation=args.activation, name='basic_lstm_cell') # tensor of shape: [max_time, batch_size, input_size] outputs, _ = bidirectional_dynamic_rnn(forward_cell, backward_cell, inputs=hid_input, dtype=tf.float32, sequence_length=seqLength, time_major=time_major, scope=scope) # forward output, backward ouput # tensor of shape: [max_time, batch_size, input_size] output_fw, output_bw = outputs output_fb = tf.stack([output_fw, output_bw], 2, name='output_fb') hidden = tf.reduce_sum(output_fb, 2) if i != args.num_layer - 1: hid_input = hidden else: outputXrs = tf.reshape(hidden, [-1, args.num_hidden]) outputXrs = tf.Print(outputXrs, [outputXrs.get_shape()], message='shape of output xrs') # output_list = tf.split(0, maxTimeSteps, outputXrs) output_list = tf.split(outputXrs, maxTimeSteps, 0) fbHrs = [ tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list ] return fbHrs
def blstm(inputs, sequence_length, num_units, is_training, scope=None): ''' a BLSTM layer args: inputs: the input to the layer as a [batch_size, max_length, dim] tensor sequence_length: the length of the input sequences as a [batch_size] tensor num_units: The number of units in the one directon layer_norm: whether layer normalization should be applied scope: The variable scope sets the namespace under which the variables created during this call will be stored. returns: the blstm outputs ''' with tf.variable_scope(scope or 'BLSTM'): #create the lstm cell that will be used for the forward and backward #pass lstm_cell_fw = BatchNormBasicLSTMCell( num_units=num_units, is_training=is_training, reuse=tf.get_variable_scope().reuse) lstm_cell_bw = BatchNormBasicLSTMCell( num_units=num_units, is_training=is_training, reuse=tf.get_variable_scope().reuse) #do the forward computation outputs_tupple, _ = bidirectional_dynamic_rnn( lstm_cell_fw, lstm_cell_bw, inputs, dtype=tf.float32, sequence_length=sequence_length) outputs = tf.concat(outputs_tupple, 2) return outputs
def _build_rnn_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_rnn_cell() cell_bw = self._create_rnn_cell() if self.cfg["use_stack_rnn"]: rnn_outs, *_ = stack_bidirectional_dynamic_rnn( cell_fw, cell_bw, self.word_emb, dtype=tf.float32) else: rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32) rnn_outs = tf.concat(rnn_outs, axis=-1) rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train) self.rnn_outs = rnn_outs print("rnn output shape: {}".format( rnn_outs.get_shape().as_list()))
def setup_encoder(self): with vs.variable_scope("Encoder"): inp = tf.nn.dropout(self.encoder_inputs, self.keep_prob) self.encoder_fw_cell = rnn_cell.MultiRNNCell( [self.lstm_cell() for _ in range(self.num_layers)], state_is_tuple=True) self.encoder_bw_cell = rnn_cell.MultiRNNCell( [self.lstm_cell() for _ in range(self.num_layers)], state_is_tuple=True) out, _ = rnn.bidirectional_dynamic_rnn(self.encoder_fw_cell, self.encoder_bw_cell, inp, self.src_len, dtype=tf.float32, time_major=True, initial_state_fw=self.encoder_fw_cell.zero_state( self.batch_size, dtype=tf.float32), initial_state_bw=self.encoder_bw_cell.zero_state( self.batch_size, dtype=tf.float32)) out = tf.concat([out[0], out[1]], axis=2) self.encoder_output = out
def _layer_encoder_input(self, encoder_inputs, seq_len, layer_depth=1): """Run a single LSTM on given input. Args: encoder_inputs: A 3-D Tensor input of shape TxBxE on which to run LSTM where T is number of timesteps, B is batch size and E is input dimension at each timestep. seq_len: A 1-D tensor that contains the actual length of each input in the batch. This ensures pad symbols are not processed as input. layer_depth: A integer denoting the depth at which the current layer is constructed. This information is necessary to differentiate the parameters of different layers. Returns: encoder_outputs: Output of LSTM, a 3-D tensor of shape TxBxH. final_state: Final hidden state of LSTM. """ with variable_scope.variable_scope( "RNNLayer%d" % (layer_depth), initializer=tf.random_uniform_initializer(-0.075, 0.075)): # Check if the encoder needs to be bidirectional or not. if self.bi_dir: (encoder_output_fw, encoder_output_bw), (final_state_fw, _) = \ rnn.bidirectional_dynamic_rnn(self.cell, self.cell, encoder_inputs, sequence_length=seq_len, dtype=tf.float32, time_major=True) # Concatenate the output of forward and backward layer encoder_outputs = tf.concat( [encoder_output_fw, encoder_output_bw], 2) # Assume the final state is simply the final state of forward # layer. A combination of hidden states can also be done. final_state = final_state_fw else: encoder_outputs, final_state = \ rnn.dynamic_rnn(self.cell, encoder_inputs, sequence_length=seq_len, dtype=tf.float32, time_major=True) return encoder_outputs, final_state
def __init__(self, cfg): # fed by 'feed_dict' self.context = tf.placeholder(name='context', shape=[None, None], dtype=tf.int32) self.seq_len = tf.placeholder(name='sequence_length', shape=[None], dtype=tf.int32) self.labels = tf.placeholder(name='labels', shape=[None, cfg.num_classes], dtype=tf.float32) self.lr = tf.placeholder(name='learning_rate', dtype=tf.float32) with tf.device('/gpu:0'): with tf.variable_scope('context_lookup_table'): with open(params['default_word_emb_pkl_path'], 'rb') as f: word_emb = pickle.load(f) word_embeddings = tf.constant(word_emb, dtype=tf.float32) # make lookup table for given review context context_emb = tf.nn.embedding_lookup(word_embeddings, self.context) with tf.variable_scope('context_representation'): cell_fw = LSTMCell(num_units = cfg.num_units) cell_bw = LSTMCell(num_units = cfg.num_units) h,_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, context_emb, sequence_length=self.seq_len, dtype=tf.float32, time_major=False) #concat forward and backward hidden states h = tf.concat(h, axis=-1) h = self.self_attention(h) weight = tf.get_variable(name='weight', shape=[2*cfg.num_units, 2*cfg.num_units], dtype=tf.float32) ### h = tf.nn.tanh(tf.matmul(h, weight)) with tf.variable_scope('compute_logits'): context_logits = self.ffn_layer(h, cfg.hidden_units, cfg.num_classes, scope='ffn_layer') with tf.variable_scope('compute_loss'): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=context_logits, labels=self.labels)) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) with tf.variable_scope('accuracy'): #pred is 0 (neg) or 1 (pos) self.pred = tf.argmax(tf.nn.softmax(context_logits),1,name='prediction') num_correct_pred = tf.equal(self.pred, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(num_correct_pred, tf.float32))
def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, name=None): """ :param lstm_inputs: [batch_size, num_steps, emb_size] :return: [batch_size, num_steps, 2*lstm_dim] """ with tf.variable_scope("char_BiLSTM" if not name else name): lstm_cell = {} for direction in ["forward", "backward"]: with tf.variable_scope(direction): lstm_cell[direction] = CoupledInputForgetGateLSTMCell( lstm_dim, use_peepholes=True, initializer=self.initializer, state_is_tuple=True) outputs, final_states = bidirectional_dynamic_rnn( lstm_cell["forward"], lstm_cell["backward"], lstm_inputs, dtype=tf.float32, sequence_length=lengths) return tf.concat(outputs, axis=2)
def define_one_layer_BLSTM(inputX, seqLengths, nHidden): forwardH1 = rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) backwardH1 = rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) # print(inputX, seqLengths) (output_fw, output_bw), _ = bidirectional_dynamic_rnn(forwardH1, backwardH1, inputX, dtype=tf.float32, sequence_length=seqLengths, time_major=False) # print(inputX) # both of shape (batch_size, max_time, hidden_size) output_combined = tf.concat(2, (output_fw, output_bw)) # [num batch] x [num max time] x (hidden_sizex2) return output_combined, nHidden * 2 # hidden*2 is number of actual hidden states.
def __init__(self, FLAGS, vocab_embed): self.n_class = FLAGS.n_class self.max_len = FLAGS.max_len self.embed_size = FLAGS.embed_size self.vocab_embed = tf.convert_to_tensor(vocab_embed, name='vocab_embed') self.global_step = tf.Variable(0, trainable=False, name='global_step') with tf.name_scope('input'): self.X = tf.placeholder(tf.int32, [None, self.max_len], name='X') self.y = tf.placeholder(tf.float32, [None, self.n_class], name='y') with tf.name_scope('embed'): embed_words = tf.nn.embedding_lookup(self.vocab_embed, self.X, name='embed_words') with tf.name_scope('encode-word'): # case 1 # encode_words = embed_words # case 2 (fw_outputs, bw_outputs), _ = bidirectional_dynamic_rnn( BasicLSTMCell(self.max_len), BasicLSTMCell(self.max_len), inputs=embed_words, dtype=tf.float32) encode_words = fw_outputs + bw_outputs with tf.name_scope('word-attn'): v = self.attention(encode_words) with tf.name_scope('output'): self.logits = self.output(v) # mean loss self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y) self.mean_loss = tf.reduce_mean(self.loss, name='mean_loss') tf.summary.scalar('mean_loss', self.mean_loss) # accuracy self.target = tf.argmax(self.y, 1, name='target') self.prediction = tf.argmax(self.logits, 1, name='prediction') correct_prediction = tf.equal(self.prediction, tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'), name='accuracy') tf.summary.scalar('accuracy', self.accuracy)
def _build_model_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_rnn_cell() cell_bw = self._create_rnn_cell() if self.cfg["use_stack_rnn"]: rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32, sequence_length=self.seq_len) else: rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len, dtype=tf.float32) rnn_outs = tf.concat(rnn_outs, axis=-1) rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train) if self.cfg["use_residual"]: word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False) rnn_outs = rnn_outs + word_project outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs # print("rnn output shape: {}".format(outputs.get_shape().as_list())) if self.cfg["use_attention"] == "self_attention": with tf.variable_scope("self_attention"): attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"], drop_rate=self.drop_rate, is_train=self.is_train) if self.cfg["use_residual"]: attn_outs = attn_outs + outputs outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs print("self-attention output shape: {}".format(outputs.get_shape().as_list())) elif self.cfg["use_attention"] == "normal_attention": with tf.variable_scope("normal_attention"): context = tf.transpose(outputs, [1, 0, 2]) p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False) p_context = tf.transpose(p_context, [1, 0, 2]) attn_cell = AttentionCell(self.cfg["num_units"], context, p_context) # time major based attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True, dtype=tf.float32) outputs = tf.transpose(attn_outs, [1, 0, 2]) print("attention output shape: {}".format(outputs.get_shape().as_list())) with tf.variable_scope("project"): self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)
def context_representation(inputs, seq_len, num_units, activation=tf.nn.tanh, use_bias=False, reuse=None, name="context_rep"): with tf.variable_scope(name, reuse=reuse, dtype=tf.float32): cell_fw = LSTMCell(num_units=num_units) cell_bw = LSTMCell(num_units=num_units) context_features, _ = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=seq_len, dtype=tf.float32, time_major=False, scope="bidirectional_dynamic_rnn") context_features = tf.concat(context_features, axis=-1) # self-attention context_features = self_attention(context_features, num_units=num_units, return_alphas=False, reuse=reuse, name="self_attention") # dense layer project context_features = tf.layers.dense( context_features, units=num_units, use_bias=use_bias, kernel_initializer=tf.glorot_uniform_initializer(), activation=activation, name="context_project") return context_features
def rnn_encode(self): (self.enc_out, (fw_state, bw_state)) = bidirectional_dynamic_rnn(LSTMCell(self.hidden_dim), LSTMCell(self.hidden_dim), inputs=self.x_emb, sequence_length=self.x_len, dtype=tf.float32) self.enc_out = tf.concat(self.enc_out, 2) decoder_init_c = Dense(self.hidden_dim, dtype=tf.float32, name='decoder_c', activation=tf.nn.tanh) decoder_init_h = Dense(self.hidden_dim, dtype=tf.float32, name='decoder_h', activation=tf.nn.tanh) self.init_state = LSTMStateTuple( decoder_init_c(tf.concat([fw_state.c, bw_state.c], 1)), decoder_init_h(tf.concat([fw_state.h, bw_state.h], 1)))
def rnn_encode(self): # Bi-direction rnn encoder (forward, backward) (self.enc_out, (fw_state, bw_state)) = bidirectional_dynamic_rnn(LSTMCell(self.hidden_dim), LSTMCell(self.hidden_dim), inputs=self.x_emb, sequence_length=self.x_len, dtype=tf.float32) self.enc_out = tf.concat(self.enc_out, 2) # Init for decoder's first state decoder_init_c = Dense(self.hidden_dim, name="decoder_c", activation=tf.nn.tanh, bias_initializer=self.initializer) decoder_init_h = Dense(self.hidden_dim, name="decoder_h", activation=tf.nn.tanh, bias_initializer=self.initializer) self.init_state = LSTMStateTuple( decoder_init_c(tf.concat([fw_state.c, bw_state.c], 1)), decoder_init_h(tf.concat([fw_state.h, bw_state.h], 1)))
def cal_linear_decomposition_representation(passage_representation, passage_lengths, cosine_matrix, is_training, lex_decompsition_dim, dropout_rate): # passage_representation: [batch_size, passage_len, dim] # cosine_matrix: [batch_size, passage_len, question_len] passage_similarity = tf.reduce_max(cosine_matrix, 2) # [batch_size, passage_len] similar_weights = tf.expand_dims(passage_similarity, -1) # [batch_size, passage_len, 1] dissimilar_weights = tf.subtract(1.0, similar_weights) similar_component = tf.multiply(passage_representation, similar_weights) dissimilar_component = tf.multiply(passage_representation, dissimilar_weights) all_component = tf.concat([similar_component, dissimilar_component], 2) if lex_decompsition_dim == -1: return all_component with tf.variable_scope('lex_decomposition'): lex_lstm_cell_fw = tf.contrib.rnn.BasicLSTMCell(lex_decompsition_dim) lex_lstm_cell_bw = tf.contrib.rnn.BasicLSTMCell(lex_decompsition_dim) if is_training: lex_lstm_cell_fw = tf.contrib.rnn.DropoutWrapper( lex_lstm_cell_fw, output_keep_prob=(1 - dropout_rate)) lex_lstm_cell_bw = tf.contrib.rnn.DropoutWrapper( lex_lstm_cell_bw, output_keep_prob=(1 - dropout_rate)) lex_lstm_cell_fw = tf.contrib.rnn.MultiRNNCell([lex_lstm_cell_fw]) lex_lstm_cell_bw = tf.contrib.rnn.MultiRNNCell([lex_lstm_cell_bw]) (lex_features_fw, lex_features_bw), _ = rnn.bidirectional_dynamic_rnn( lex_lstm_cell_fw, lex_lstm_cell_bw, all_component, dtype=tf.float32, sequence_length=passage_lengths) lex_features = tf.concat([lex_features_fw, lex_features_bw], 2) return lex_features
def _sentence_encoding(step_inputs, seq_length, cell_size): f_rnn_cell = tf.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True) b_rnn_cell = tf.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True) _inputs = tf.pack(step_inputs, axis=1) outputs, states, = rnn.bidirectional_dynamic_rnn( f_rnn_cell, b_rnn_cell, _inputs, sequence_length=tf.cast(seq_length, tf.int64), time_major=False, dtype=tf.float32, scope='birnn') output_fw, output_bw = outputs states_fw, states_bw = states steps_fw = tf.unpack(output_fw, axis=1) steps_bw = tf.unpack(output_bw, axis=1) sent_encoding = tf.concat( 1, [steps_fw[0], steps_bw[0]]) # [batch_size] return sent_encoding
def stack_bidirectional_dynamic_rnn(cells_fw, cells_bw, inputs, initial_states_fw=None, initial_states_bw=None, dtype=None, sequence_length=None, parallel_iterations=None, scope=None): """Creates a dynamic bidirectional recurrent neural network. Stacks several bidirectional rnn layers. The combined forward and backward layer outputs are used as input of the next layer. tf.bidirectional_rnn does not allow to share forward and backward information between layers. The input_size of the first forward and backward cells must match. The initial state for both directions is zero and no intermediate states are returned. Args: cells_fw: List of instances of RNNCell, one per layer, to be used for forward direction. cells_bw: List of instances of RNNCell, one per layer, to be used for backward direction. inputs: The RNN inputs. this must be a tensor of shape: `[batch_size, max_time, ...]`, or a nested tuple of such elements. initial_states_fw: (optional) A list of the initial states (one per layer) for the forward RNN. Each tensor must has an appropriate type and shape `[batch_size, cell_fw.state_size]`. initial_states_bw: (optional) Same as for `initial_states_fw`, but using the corresponding properties of `cells_bw`. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. scope: VariableScope for the created subgraph; defaults to None. Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs: Output `Tensor` shaped: `batch_size, max_time, layers_output]`. Where layers_output are depth-concatenated forward and backward outputs. output_states_fw is the final states, one tensor per layer, of the forward rnn. output_states_bw is the final states, one tensor per layer, of the backward rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is `None`. """ if not cells_fw: raise ValueError("Must specify at least one fw cell for BidirectionalRNN.") if not cells_bw: raise ValueError("Must specify at least one bw cell for BidirectionalRNN.") if not isinstance(cells_fw, list): raise ValueError("cells_fw must be a list of RNNCells (one per layer).") if not isinstance(cells_bw, list): raise ValueError("cells_bw must be a list of RNNCells (one per layer).") if len(cells_fw) != len(cells_bw): raise ValueError("Forward and Backward cells must have the same depth.") if (initial_states_fw is not None and (not isinstance(initial_states_fw, list) or len(initial_states_fw) != len(cells_fw))): raise ValueError( "initial_states_fw must be a list of state tensors (one per layer).") if (initial_states_bw is not None and (not isinstance(initial_states_bw, list) or len(initial_states_bw) != len(cells_bw))): raise ValueError( "initial_states_bw must be a list of state tensors (one per layer).") states_fw = [] states_bw = [] prev_layer = inputs with vs.variable_scope(scope or "stack_bidirectional_rnn"): for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)): initial_state_fw = None initial_state_bw = None if initial_states_fw: initial_state_fw = initial_states_fw[i] if initial_states_bw: initial_state_bw = initial_states_bw[i] with vs.variable_scope("cell_%d" % i): outputs, (state_fw, state_bw) = rnn.bidirectional_dynamic_rnn( cell_fw, cell_bw, prev_layer, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, parallel_iterations=parallel_iterations, dtype=dtype) # Concat the outputs to create the new input. prev_layer = array_ops.concat(outputs, 2) states_fw.append(state_fw) states_bw.append(state_bw) return prev_layer, tuple(states_fw), tuple(states_bw)
unpacked_seqs_embed = tf.unpack(tf.transpose(seqs_embed,perm=[1, 0, 2])) with tf.variable_scope("naive",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output1,_ = rnn.rnn(rnn_cell.BasicRNNCell(rnn_dim),unpacked_seqs_embed,dtype=tf.float32,sequence_length=seq_len ) elif FLAGS.rnn_type == "bi": n_output1,_,_ = rnn.bidirectional_rnn(rnn_cell.BasicRNNCell(rnn_dim/2),rnn_cell.BasicRNNCell(rnn_dim/2),unpacked_seqs_embeds,dtype=tf.float32,sequence_length=seq_len ) with tf.variable_scope("dynamic",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output2,_ = rnn.dynamic_rnn(rnn_cell.BasicRNNCell(rnn_dim),seqs_embed,dtype=tf.float32,time_major=False,sequence_length=seq_len ) elif FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10"): n_output2,_ = rnn.bidirectional_dynamic_rnn(rnn_cell.BasicRNNCell(rnn_dim/2),rnn_cell.BasicRNNCell(rnn_dim/2),seqs_embed,dtype=tf.float32,time_major=False,sequence_length=seq_len ) avgLosses = [] # average pooling loss if FLAGS.rnn_type == "fw" or (FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10")): outputs = [n_output1,n_output2] else: outputs = [n_output1] for i,n_output in enumerate(outputs): if isinstance(n_output,list): neuron = tf.transpose(tf.pack(n_output), perm=[1, 0, 2]) # rnn.rnn, rnn.bidirectional_rnn elif isinstance(n_output,tuple): neuron = tf.concat(2,n_output) # rnn.bidirectional_dynamic_rnn else: neuron = n_output # rnn.dynamic_rnn avgLosses.append(tf.reduce_mean( tf.reduce_mean(neuron,[1,0]) - label))
n_output5,_,_ = rnn.bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[5],dtype=tf.float32,sequence_length=None ) with tf.variable_scope("batch4",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output6,_ = rnn.rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[3],dtype=tf.float32,sequence_length=tf.constant([6,3]) ) elif FLAGS.rnn_type == "bi": n_output6,_,_ = rnn.bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[3],dtype=tf.float32,sequence_length=tf.constant([6,3]) ) with tf.variable_scope("batch5",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output7,_ = rnn.rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[5],dtype=tf.float32,sequence_length=tf.constant([6,3]) ) elif FLAGS.rnn_type == "bi": n_output7,_,_ = rnn.bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[5],dtype=tf.float32,sequence_length=tf.constant([6,3]) ) with tf.variable_scope("dynamic_batch1",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output8,_ = rnn.dynamic_rnn(rnn_cell.BasicRNNCell(2),seqs_embeds[3],dtype=tf.float32,time_major=False ) elif FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10"): n_output8,_ = rnn.bidirectional_dynamic_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),seqs_embeds[3],dtype=tf.float32,time_major=False, sequence_length=tf.constant([6,6],dtype=tf.int64) ) with tf.variable_scope("dynamic_batch2",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output9,_ = rnn.dynamic_rnn(rnn_cell.BasicRNNCell(2),seqs_embeds[5],dtype=tf.float32,time_major=False ) elif FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10"): n_output9,_ = rnn.bidirectional_dynamic_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),seqs_embeds[5],dtype=tf.float32,time_major=False, sequence_length=tf.constant([6,6],dtype=tf.int64) ) with tf.variable_scope("dynamic_batch3",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output10,_ = rnn.dynamic_rnn(rnn_cell.BasicRNNCell(2),seqs_embeds[3],dtype=tf.float32,sequence_length=tf.constant([6,3]),time_major=False ) elif FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10"): n_output10,_ = rnn.bidirectional_dynamic_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),seqs_embeds[3],dtype=tf.float32,sequence_length=tf.constant([6,3],dtype=tf.int64),time_major=False ) with tf.variable_scope("dynamic_batch4",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output11,_ = rnn.dynamic_rnn(rnn_cell.BasicRNNCell(2),seqs_embeds[5],dtype=tf.float32,sequence_length=tf.constant([6,3]),time_major=False ) elif FLAGS.rnn_type == "bi" and tf.__version__.startswith("0.10"): n_output11,_ = rnn.bidirectional_dynamic_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),seqs_embeds[5],dtype=tf.float32,sequence_length=tf.constant([6,3],dtype=tf.int64),time_major=False )