def BiRNN(x, weights, biases): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def blstm_layer(_X, _x_length, batch_s): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, hidden_weights) + hidden_biases # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden_layer, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden_layer, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, max_input_timesteps, _X) # n_steps * (batch_size, n_hidden) istate_fw = lstm_fw_cell.zero_state(batch_s, tf.float32) istate_bw = lstm_bw_cell.zero_state(batch_s, tf.float32) # Get lstm cell output outputs, output_state_fw, output_state_bw = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=istate_fw, initial_state_bw=istate_bw, sequence_length=_x_length ) outputs = tf.concat(0, outputs) activation = tf.matmul(outputs, output_weights) + output_biases return tf.reshape(activation, [max_input_timesteps, batch_s, n_output_classes])
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation # Get inner loop last output output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs] return output
def __init__(self, config): sent_len = config.sent_len batch_size = config.batch_size vocab_size = config.vocab_size embed_size = config.embed_size num_layers = config.num_layers state_size = config.state_size keep_prob = config.keep_prob self.input_data = tf.placeholder(tf.int32, [batch_size, sent_len]) self.lengths = tf.placeholder(tf.int64, [batch_size]) self.targets = tf.placeholder(tf.float32, [batch_size, 1]) # Get embedding layer which requires CPU with tf.device("/cpu:0"): embeding = tf.get_variable("embeding", [vocab_size, embed_size]) inputs = tf.nn.embedding_lookup(embeding, self.input_data) #LSTM 1 -> Encode the characters of every tok into a fixed dense representation with tf.variable_scope("rnn1", reuse=None): cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer()) back_cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer()) cell = rnn_cell.DropoutWrapper( cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob) back_cell = rnn_cell.DropoutWrapper( back_cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([cell] * num_layers) backcell = rnn_cell.MultiRNNCell([back_cell] * num_layers) rnn_splits = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sent_len, inputs)] # Run the bidirectional rnn outputs, last_fw_state, last_bw_state = rnn.bidirectional_rnn( cell, backcell, rnn_splits, sequence_length=self.lengths, dtype=tf.float32) sent_out = tf.concat(1, [last_fw_state, last_bw_state]) #sent_out = outputs[-1] #sent_out = tf.add_n(outputs) output_size = state_size*4 with tf.variable_scope("linear", reuse=None): w = tf.get_variable("w", [output_size, 1]) b = tf.get_variable("b", [1], initializer=tf.constant_initializer(0.0)) raw_logits = tf.matmul(sent_out, w) + b self.probabilities = tf.sigmoid(raw_logits) self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets)) #Calculate gradients and propagate #Aggregation method 2 is really important for rnn per the tensorflow issues list tvars = tf.trainable_variables() self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite optimizer = tf.train.AdamOptimizer() grads, _vars = zip(*optimizer.compute_gradients(self.cost, tvars, aggregation_method=2)) grads, self.grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(grads, _vars))
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len): # BiRNN requires to supply sequence_length as [batch_size, int64] # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set # For a better implementation with latest version of tensorflow, check below _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64)) # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw, sequence_length=_seq_len) # Linear activation # Get inner loop last output return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation # (n_steps*batch_size, n_input) _X = tf.reshape(_X, [-1, self.config.num_input]) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Forward direction cell rnn_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) # Backward direction cell rnn_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) # Split data because rnn cell needs a list of inputs for the RNN inner # loop # n_steps * (batch_size, n_hidden) _X = tf.split(0, self.config.num_steps, _X) # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn( rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation return [ tf.matmul(output, _weights['out']) + _biases['out'] for output in outputs ], final_fw, final_bw
def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation # (n_steps*batch_size, n_input) _X = tf.reshape(_X, [-1, self.config.num_input]) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Forward direction cell single_fw_cell = BasicRNNCellReLu(self.config.num_hidden) single_fw_cell = rnn_cell.DropoutWrapper(single_fw_cell, self.config.input_keep_prob, self.config.output_keep_prob, 0.8) rnn_fw_cell = rnn_cell.MultiRNNCell( [single_fw_cell]*self.config.model_depth) # Backward direction cell single_bw_cell = BasicRNNCellReLu(self.config.num_hidden) single_bw_cell = rnn_cell.DropoutWrapper(single_bw_cell, self.config.input_keep_prob, self.config.output_keep_prob) rnn_bw_cell = rnn_cell.MultiRNNCell( [single_bw_cell]*self.config.model_depth) # Split data because rnn cell needs a list of inputs for the RNN inner # loop # n_steps * (batch_size, n_hidden) _X = tf.split(0, self.config.num_steps, _X) # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn( rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation return [tf.matmul(output, _weights['out']) + _biases['out'] for output in outputs], final_fw, final_bw
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError( "cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, sequence_length=sequence_length, initial_state=initial_state) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding[-1], y)
def BiRNN(x, weights, biases): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output ''' 重点在这,上边创建了两个完全一样的lstm_cell但是所有的逻辑处理都在bidirectional_rnn这个函数里边,不用自己关心那个是feed哪个是back ''' outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError("cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, sequence_length=sequence_length, initial_state=initial_state) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding[-1], y)
def bidirectional_lstm(inputs,keep_prob,INPUT_SIZE,HIDDEN_SIZE,SEQ_LENGTH): initializer = tf.random_uniform_initializer(-0.01,0.01) cell_F = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) cell_B = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) inputs_ = [tf.nn.dropout(each,keep_prob) for each in inputs] outputs = bidirectional_rnn(cell_F, cell_B, inputs_, initial_state_fw=None, initial_state_bw=None, sequence_length=None,dtype=tf.float32) return outputs
def BiLSTM(self, x, n_steps, n_input, seq_len): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) lstm_fw_cell = rnn_cell.BasicLSTMCell(self.n_hidden, forget_bias=1.0) lstm_bw_cell = rnn_cell.BasicLSTMCell(self.n_hidden, forget_bias=1.0) outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32, sequence_length=seq_len) outputs = tf.pack(outputs) outputs = tf.transpose(outputs, [1, 0, 2]) return self.last_relevant(outputs, seq_len)
def bidirectional_lstm(inputs, keep_prob, INPUT_SIZE, HIDDEN_SIZE, SEQ_LENGTH): initializer = tf.random_uniform_initializer(-0.01, 0.01) cell_F = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) cell_B = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) inputs_ = [tf.nn.dropout(each, keep_prob) for each in inputs] outputs = bidirectional_rnn(cell_F, cell_B, inputs_, initial_state_fw=None, initial_state_bw=None, sequence_length=None, dtype=tf.float32) return outputs
def __init__(self, conf): self.conf = conf cell_fw = BasicLSTMCell(self.conf.rnn_size) cell_bw = BasicLSTMCell(self.conf.rnn_size) if conf.keep_prob < 1.0 and not conf.infer: cell_fw = DropoutWrapper(cell_fw, output_keep_prob=conf.keep_prob) cell_bw = DropoutWrapper(cell_bw, output_keep_prob=conf.keep_prob) self.cell_fw = cell_fw = MultiRNNCell([cell_fw] * self.conf.num_layers) self.cell_bw = cell_bw = MultiRNNCell([cell_bw] * self.conf.num_layers) self.input_data = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length]) self.targets = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length]) self.initial_state_fw = cell_fw.zero_state(self.conf.batch_size, tf.float32) self.initial_state_bw = cell_bw.zero_state(self.conf.batch_size, tf.float32) with tf.variable_scope('rnn'): softmax_w = tf.get_variable("softmax_w", [self.conf.rnn_size*2, self.conf.output_size]) softmax_b = tf.get_variable("softmax_b", [self.conf.output_size]) embedding = tf.get_variable("embedding", [self.conf.nerloader.vocab_size, self.conf.rnn_size]) _inputs = tf.nn.embedding_lookup(embedding, self.input_data) if conf.keep_prob < 1.0 and not conf.infer: _inputs = tf.nn.dropout(_inputs,conf.keep_prob) inputs = tf.split(1, conf.seq_length, _inputs) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs_bi = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, scope='rnn') output = tf.reshape(tf.concat(1, outputs_bi), [-1, self.conf.rnn_size*2]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) self.loss_weights = [tf.ones([self.conf.batch_size * self.conf.seq_length])] loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], self.loss_weights) self.cost = (tf.reduce_sum(loss) / self.conf.batch_size / self.conf.seq_length) tf.scalar_summary("loss",self.cost) self.out = output self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.conf.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_summary_op = tf.merge_all_summaries()
def BiRNN(inputs, _seq_length): # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size) inputs = tf.transpose(inputs, [1, 0, 2]) # Reshape before feeding to hidden activation layers inputs = tf.reshape(inputs, [-1, embedding_size]) # Hidden activation #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden']) # Split the inputs to make a list of inputs for the rnn inputs = tf.split(0, seq_width, inputs) # seq_width * (batch_size, n_hidden) initializer = tf.random_uniform_initializer(-1, 1) with tf.variable_scope('forward'): #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm1 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #lstm2 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #fw_cell = rnn_cell.MultiRNNCell([lstm1, lstm2]) fw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) with tf.variable_scope('backward'): #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm3 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #lstm4 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #bw_cell = rnn_cell.MultiRNNCell([lstm3, lstm4]) bw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) # Get lstm cell output outputs, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype="float32", sequence_length=_seq_length) outputs_tensor = tf.reshape(tf.concat(0, outputs), [-1, 2 * n_hidden]) logits = [] for i in xrange(len(outputs)): final_transformed_val = tf.matmul( outputs[i], weights['out']) + biases['out'] logits.append(final_transformed_val) logits = tf.reshape(tf.concat(0, logits), [-1, n_classes]) return logits, outputs_tensor
def prediction(self): fw_cell = rnn_cell.LSTMCell(self._num_hidden) fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout) bw_cell = rnn_cell.LSTMCell(self._num_hidden) bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout) if self._num_layers > 1: fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers) bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers) output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length) max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes) output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def build_model (self, is_training=True): batch_size = self.batch_size n_steps = self.num_steps size = self.n_hidden config = self.config lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=1.0) # add dropout to output if is_training and config.keep_prob < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) #cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) cell_fw = cell_bw = lstm_cell initial_state = lstm_cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [self.vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._inputs) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Build RNN inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, n_steps, inputs)] #outputs, state = rnn.rnn(cell, inputs, initial_state=initial_state) outputs_pair, state_fw, state_bw = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, \ initial_state_fw=initial_state, initial_state_bw=initial_state) # dtype=None, sequence_length=None, scope=None): outputs = [] for out in outputs_pair: out_fw, out_bw = tf.split(1, 2, out) outputs.append (1.0 * out_fw + 0.0 * out_bw) #outputs_fw = [ [0] for out in outputs_pair] self.outputs = outputs return self.outputs
def BiLSTMgraph(self, _X, _C, _T, _istate_fw, _istate_bw, _weights, _biases): # input: a [len_sent,len_seq] (e.g. 7x5) # transform into embeddings if _T: emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X) emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C) emb_t = tf.nn.embedding_lookup(self._weights['t_emb'], _T) # Linear activation _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul( emb_c, self._weights['hidden_c']) + tf.matmul( emb_t, self._weights['hidden_t']) + self._biases['hidden_b'] else: emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X) emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C) # Linear activation _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul( emb_c, self._weights['hidden_c']) + self._biases['hidden_b'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0) lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=0.5) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0) lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=0.5) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, self.sent_max_len, _X) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=self.istate_fw, initial_state_bw=self.istate_bw, sequence_length=self.seq_len) return outputs
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError("cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = cell_fn(rnn_size) # backward direction cell rnn_bw_cell = cell_fn(rnn_size) encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell) else: cell = cell_fn(rnn_size) _, encoding = rnn.rnn(cell, X, dtype=tf.float32) return target_predictor_fn(encoding[-1], y)
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError( "cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = cell_fn(rnn_size) # backward direction cell rnn_bw_cell = cell_fn(rnn_size) encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell) else: cell = cell_fn(rnn_size) _, encoding = rnn.rnn(cell, X, dtype=tf.float32) return target_predictor_fn(encoding[-1], y)
def BiRNN (inputs, _seq_length): # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size) inputs = tf.transpose(inputs, [1, 0, 2]) # Reshape before feeding to hidden activation layers inputs = tf.reshape(inputs, [-1, embedding_size]) # Hidden activation #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden']) # Split the inputs to make a list of inputs for the rnn inputs = tf.split(0, seq_width, inputs) # seq_width * (batch_size, n_hidden) initializer = tf.random_uniform_initializer(-1,1) with tf.variable_scope('forward'): #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) fw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) with tf.variable_scope('backward'): #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) bw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) # Get lstm cell output outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype="float32", sequence_length=_seq_length) outputs_tensor = tf.reshape(tf.concat(0, outputs),[-1, 2*n_hidden]) logits = [] for i in xrange(len(outputs)): final_transformed_val = tf.matmul(outputs[i],weights['out']) + biases['out'] ''' # TODO replace with zeroes where sentences are shorter and biases should not be calculated for length in tf_train_seq_length: tf.shape() if length <= i: final_transformed_val[tf_train_seq_length.index(length)] = empty_pos ''' logits.append(final_transformed_val) logits = tf.reshape(tf.concat(0, logits), [-1, n_classes]) return logits, outputs_tensor
def BiRNN (inputs, _seq_length): # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size) inputs = tf.transpose(inputs, [1, 0, 2]) # Reshape before feeding to hidden activation layers inputs = tf.reshape(inputs, [-1, embedding_size]) # Hidden activation #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden']) # Split the inputs to make a list of inputs for the rnn inputs = tf.split(0, seq_width, inputs) # seq_width * (batch_size, n_hidden) initializer = tf.random_uniform_initializer(-1,1) with tf.variable_scope('forward'): #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm1 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #lstm2 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #fw_cell = rnn_cell.MultiRNNCell([lstm1, lstm2]) fw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) with tf.variable_scope('backward'): #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm3 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #lstm4 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) #bw_cell = rnn_cell.MultiRNNCell([lstm3, lstm4]) bw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer) # Get lstm cell output outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype="float32", sequence_length=_seq_length) outputs_tensor = tf.reshape(tf.concat(0, outputs),[-1, 2*n_hidden]) logits = [] for i in xrange(len(outputs)): final_transformed_val = tf.matmul(outputs[i],weights['out']) + biases['out'] logits.append(final_transformed_val) logits = tf.reshape(tf.concat(0, logits), [-1, n_classes]) return logits, outputs_tensor
def BiLSTM(_X, _C, _T, _istate_fw, _istate_bw, _weights, _biases): # input: a [len_sent,len_seq] (e.g. 7x5) # transform into embeddings with tf.device("/cpu:0"): emb_x = tf.nn.embedding_lookup(_weights['w_emb'],_X) emb_t = tf.nn.embedding_lookup(_weights['t_emb'],_T) emb_c = tf.nn.embedding_lookup(_weights['c_emb'],_C) # Linear activation _X = tf.matmul(emb_x, _weights['hidden_w']) + tf.matmul(emb_c, _weights['hidden_c']) + tf.matmul(emb_t,_weights['hidden_t']) + _biases['hidden_b'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0,max_sent_len,_X) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,initial_state_fw = _istate_fw, initial_state_bw=_istate_bw,sequence_length = seq_len) return outputs
def BiRNN(self, scope): # input shape: (batch_size, step_size, input_dim) # we need to permute step_size and batch_size(change the position of step and batch size) data = tf.transpose(self.input_data, [1, 0, 2]) # Reshape to prepare input to hidden activation # (step_size*batch_size, n_input), flattens the batch and step #after the above transformation, data is now (step_size*batch_size, input_dim) data = tf.reshape(data, [-1, self.config.input_dim + 1]) # Define lstm cells with tensorflow with tf.variable_scope(str(scope)): # Linear activation data = tf.matmul(data, self.weights['hidden']) + self.biases['hidden'] data = tf.nn.dropout(data, self.config.dropout) # Define a cell if self.config.cell_type == 'GRU': lstm_fw_cell = rnn_cell.GRUCell(self.config.hidden_dim) lstm_bw_cell = rnn_cell.GRUCell(self.config.hidden_dim) else: lstm_fw_cell = rnn_cell.LSTMCell( self.config.hidden_dim, forget_bias=self.config.forget_bias, use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip) lstm_bw_cell = rnn_cell.LSTMCell( self.config.hidden_dim, forget_bias=self.config.forget_bias, use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip) # Split data because rnn cell needs a list of inputs for the RNN inner loop data = tf.split(0, self.config.step_size, data) # step_size * (batch_size, hidden_dim) # Get lstm cell output print 'running single stack Bi-directional RNN.......' outputs = rnn.bidirectional_rnn( lstm_fw_cell, lstm_bw_cell, data, initial_state_fw=self.init_state_fw, initial_state_bw=self.init_state_bw, scope="RNN1") # for basic rnn prediction we really just interested in the last state's output, we need to average them in this case total_outputs = tf.div(tf.add_n([outputs[2], outputs[1]]), 2.0) return [ tf.nn.dropout( tf.matmul(total_outputs, self.weights['out1']) + self.biases['out1'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out2']) + self.biases['out2'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out3']) + self.biases['out3'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out4']) + self.biases['out4'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out5']) + self.biases['out5'], self.config.dropout), ]
def __init__(self, num_chars, num_classes, num_steps=100, num_epochs=100, model_path='models/', \ embedding_matrix=None, emb_dim=100,emb_trainable=False, is_training=True, is_crf=True, weight=False, l2_reg_lambda=0.2): # Parameter self.max_f1 = 0 self.learning_rate = 0.002 self.dropout_rate = 0.5 self.batch_size = 128 self.num_layers = 1 self.hidden_dim = 100 self.num_epochs = num_epochs self.num_steps = num_steps self.num_chars = num_chars #self.num_classes = num_classes self.num_classes = 2 self.model_path = model_path self.char2id, self.id2char = helper.loadMap(os.path.join(model_path, "char2id")) self.label2id, self.id2label = helper.loadMap(os.path.join(model_path, "label2id")) self.evaluate_labels = set() for l in self.label2id.keys(): if l[:2] in ['B-', 'I-', 'E-', 'S-']: self.evaluate_labels.add(l[2:]) elif l not in ['OTHER','<PAD>']: self.evaluate_labels.add(l) self.evaluate_labels = list(self.evaluate_labels) self.emb_dim = emb_dim # placeholder of x, y and weight #self.inputs = tf.placeholder(tf.int32, [None, self.num_steps, 2]) self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) self.pairs = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets = tf.placeholder(tf.float32, [None, self.num_classes]) self.pair_indices = tf.placeholder(tf.int32, [None]) self.pair_segment_ids = tf.placeholder(tf.int32, [None]) # char embedding if embedding_matrix != None: self.embedding = tf.Variable(embedding_matrix, trainable=emb_trainable, name="emb", dtype=tf.float32) else: self.embedding = tf.get_variable("emb", [self.num_chars, self.emb_dim]) self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) # shape: [batch_size, num_steps, emb_dim] self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) # shape: [num_steps, batch_size, emb_dim] self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim]) # shape: [(num_steps * batch_size), emb_dim] self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb) # num_steps tensor,[batch_size, emb_dim] # lstm cell lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) # dropout if is_training: lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers) lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers) # get the length of each sample, shape [batch_size] self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) self.length = tf.cast(self.length, tf.int32) # forward and backward # outputs: total num_steps tensors, each tensor's shape: [batch_size, hidden_dim * 2] self.outputs, _, _ = rnn.bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) # softmax self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, self.hidden_dim * 2]) # shape: [batch_size*num_steps, hidden_dim*2] self.softmax_w = tf.get_variable("softmax_w", [self.hidden_dim * 2, self.num_classes]) self.softmax_b = tf.get_variable("softmax_b", [self.num_classes]) self.outputs_emb = tf.reshape(self.outputs, [-1, num_steps, self.hidden_dim*2]) # [batch_size, num_steps, hidden_dim*2] self.attention_w = tf.get_variable("attention_w", [self.hidden_dim*2, 1]) self.attentions = tf.reshape(tf.matmul(self.outputs, self.attention_w),[-1, num_steps] ) # batch_size, num_steps] self.attentions = tf.nn.softmax(self.attentions) self.attentions = tf.reshape(self.attentions, [self.batch_size, 1, num_steps]) #[batch_size, 1, num_steps] self.outputs_emb = tf.batch_matmul(self.attentions, self.outputs_emb) #[batch_size, 1, hidden_dim*2] self.outputs_emb = tf.tanh(self.outputs_emb) self.outputs = tf.reshape(self.outputs_emb, [-1, self.hidden_dim*2] ) #[batch_size, hidden_dim*2] self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b #[batch_size, num_classes] self.predictions = tf.argmax(self.logits, 1, name="predictions") correct_predictions = tf.equal(self.predictions, tf.argmax(self.targets, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") l2_loss = tf.constant(0.0) l2_loss += tf.nn.l2_loss(self.softmax_w) l2_loss += tf.nn.l2_loss(self.softmax_b) losses = tf.nn.softmax_cross_entropy_with_logits(self.logits, self.targets) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # summary self.train_summary = tf.scalar_summary("loss", self.loss) self.val_summary = tf.scalar_summary("loss", self.loss) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
def __init__(self, vocab_size, embedding_size, learning_rate, learning_rate_decay_op, memory_hops, dropout_rate, q_depth, a_depth, episodic_m_depth, ep_depth, m_input_size, attention_ff_l1_size, max_gradient_norm, maximum_story_length=100, maximum_question_length=20, use_lstm=False, forward_only=False): # initialization self.vocab_size = vocab_size self.embedding_size = embedding_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = tf.Variable(float(learning_rate_decay_op), trainable=False) self.dropout_rate = dropout_rate self.global_step = tf.Variable(0, trainable=False, name='global_step') self.q_depth = q_depth # question RNN depth self.a_depth = a_depth # answer RNN depth self.m_depth = episodic_m_depth # memory cell depth self.ep_depth = ep_depth # episodic depth self.max_gradient_norm = max_gradient_norm self.memory_hops = memory_hops # number of episodic memory pass self.m_input_size = m_input_size self.m_size = embedding_size # memory cell size self.a_size = embedding_size # answer RNN size self.attention_ff_l1_size = attention_ff_l1_size # attention_ff_l2_size print("[*] Creating Dynamic Memory Network ...") # question module def seq2seq_fq(encoder_inputs, cell, mask=None): return seq2seq.sentence_embedding_rnn_q( encoder_inputs, self.vocab_size, cell, self.embedding_size, mask) def seq2seq_fs(encoder_inputs, cell, mask=None): return seq2seq.sentence_embedding_rnn_s( encoder_inputs, self.vocab_size, cell, self.embedding_size, mask) # attention gate in episodic # TODO: force gate logits to be sparse, add L1 norm regularization # Sentence token placeholder self.story = [] for i in range(maximum_story_length): self.story.append(tf.placeholder(tf.int32, shape=[None], name="story{0}".format(i))) self.story_mask = tf.placeholder(tf.int32, shape=[None], name="story_mask") self.story_len = tf.placeholder(tf.int32, shape=[], name="story length") print (self.story_len) self.question = [] for i in range(maximum_question_length): self.question.append(tf.placeholder(tf.int32, shape=[None], name="question{0}".format(i))) self.answer = tf.placeholder(tf.int64, name="answer") # self.story_len = 1#= tf.reshape(tf.shape(self.story_mask), []) # TODO: fixed lens problem #self.story_len = 5 # configuration of attention gate # print (self.story) with tf.variable_scope("answer"): softmax_weights = tf.Variable(tf.truncated_normal([self.a_size, self.vocab_size], -0.1, 0.1), name="softmax_weights") softmax_biases = tf.Variable(tf.zeros([self.vocab_size]), name="softmax_biases") answer_weights = tf.Variable(tf.truncated_normal([self.m_size, self.a_size], -0.1, 0.1), name="answer_weights") answer_biases = tf.Variable(tf.zeros([self.a_size]), name="answer_biases") #------------ question module ------------ single_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: single_cell = tf.nn.rnn_cell.DropoutWrapper( single_cell, output_keep_prob=dropout_rate) question_cell = single_cell if q_depth > 1: question_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell]*q_depth) question = seq2seq_fq(self.question, question_cell) self.question_state = question[0] #for e in question: #------------ Input module ------------ reader_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: reader_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: reader_cell = tf.nn.rnn_cell.DropoutWrapper( reader_cell, output_keep_prob=dropout_rate) # Embedded toekn into vector, feed into rnn cell return cell state fusion_fw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) fusion_bw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: fusion_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) fusion_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: fusion_fw_cell = tf.nn.rnn_cell.DropoutWrapper( fusion_fw_cell, output_keep_prob=dropout_rate) fusion_bw_cell = tf.nn.rnn_cell.DropoutWrapper( fusion_bw_cell, output_keep_prob=dropout_rate) (_facts, _, _) = rnn.bidirectional_rnn(fusion_fw_cell,fusion_bw_cell, seq2seq_fs(self.story, reader_cell),dtype=tf.float32) self.facts = _facts[0] #------------ episodic memory module ------------ # TODO: use self.facts to extract ep_size self.ep_size = 2*self.embedding_size# episodic cell size # construct memory cell #single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.m_size) mem_cell = cell.MemCell(self.m_size) #mem_cell = tf.nn.rnn_cell.GRUCell(self.m_size) self.episodic_array = tf.Variable(tf.random_normal([1,1])) # construct episodic_cell # for i in xrange(self.memory_hops): single_cell = cell.MGRUCell(self.ep_size) ep_cell = cell.MultiMGRUCell([single_cell] * ep_depth) e = [] mem_state = self.question_state q_double = tf.concat(1, [self.question_state, self.question_state]) mem_state_double = tf.concat(1, [mem_state, mem_state]) # TODO change z_dim to be z_dim = self.embedding_size * 8 self.attention_ff_size = z_dim self.attention_ff_l2_size = 1 # self._ep_initial_state = [] # for _cell in range(ep_cell) # self._ep_initial_state.append = _cell.zero_state(1, tf.float32) # TODO change batch size # initialize parameters with tf.variable_scope("episodic"): # parameters of attention gate l1_weights = tf.Variable(tf.truncated_normal([self.attention_ff_size, self.attention_ff_l1_size], -0.1, 0.1), name="l1_weights") l1_biases = tf.Variable(tf.zeros([self.attention_ff_l1_size]), name="l1_biases") l2_weights = tf.Variable(tf.truncated_normal([self.attention_ff_l1_size, self.attention_ff_l2_size], -0.1, 0.1), name="l2_weights") l2_biases = tf.Variable(tf.zeros([self.attention_ff_l2_size]), name="l2_biases") # paramters of episodic mem_weights = tf.Variable(tf.truncated_normal([self.m_input_size, self.m_size], -0.1, 0.1), name="mem_weights") mem_biases = tf.Variable(tf.zeros([self.m_size]), name="mem_biases") # initializing variable of feedforward nn seq2seq.def_feedforward_nn(self.attention_ff_size, self.attention_ff_l1_size, self.attention_ff_l2_size) for hops in xrange(self.memory_hops): # gate attention network step = tf.constant(0) tf.while_loop(lambda step, story_len, facts, q_double, mem_state_double: tf.less(step, story_len), lambda step, story_len, facts, q_double, mem_state_double: self.mem_body(step, story_len, facts, q_double, mem_state_double), [step, self.story_len, self.facts, q_double, mem_state_double]) #self.episodic_gate = tf.reshape(tf.nn.softmax(self.episodic_array),[1]) self.episodic_gate = tf.nn.softmax(tf.reshape(self.episodic_array, [1,-1])) print ("episodic_gate",self.episodic_gate) # attention GRU # output, context = cell.rnn(ep_cell[hops], [self.facts], self.episodic_gate, scope="epsodic", dtype=tf.float32) output, context = cell.rnn_ep(ep_cell, [self.facts], self.episodic_gate, dtype=tf.float32, scope="episodic") e.append(output) # memory updates #_, mem_state = mem_cell(context_state, mem_state) # GRU #_, mem_state = cell.rnn_mem(mem_cell, [context], self.question_state, mem_state, self.m_input_size, self.m_size, dtype=tf.float32) mem_state = mem_cell(context, self.question_state, mem_state, self.m_input_size, self.m_size) # if the attentioned module is last e, it means the episodic pass is over if np.argmax(np.asarray(e[-1])) == len(e[-1])-1: break #------------ answer ------------ # TODO: use decoder sequence to generate answer answer_steps = 1 single_cell = tf.nn.rnn_cell.GRUCell(self.a_size) answer_cell = single_cell if a_depth > 1: answer_cell =tf.nn.rnn_cell.MultiRNNCell([single_cell] * a_depth) a_state = mem_state for step in range(answer_steps): y = tf.nn.softmax(tf.matmul(a_state, answer_weights)) (answer, a_state) = answer_cell(tf.concat(1, [self.question_state, y]), a_state) #(answer, a_state) = answer_cell(tf.concat(1, [question, mem_state]), a_state) self.logits = tf.nn.softmax(tf.matmul(answer, softmax_weights)+softmax_biases) answer = tf.reshape(tf.one_hot(self.answer, self.vocab_size, 1.0, 0.0), [1,self.vocab_size]) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(self.logits, answer)) params = tf.trainable_variables() # testing for e in params: print(e.get_shape(), e.name, type(e)) if not forward_only: self.gradient_norms = [] self.updates = [] optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, self.max_gradient_norm) self.gradient_norms = norm self.updates = optimizer.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, num_chars, num_classes, num_steps=200, num_epochs=100, embedding_matrix=None, is_training=True, is_crf=True, weight=False): # Parameter self.max_f1 = 0 self.learning_rate = 0.002 self.dropout_rate = 0.5 self.batch_size = 128 self.num_layers = 1 self.emb_dim = 100 self.hidden_dim = 100 self.num_epochs = num_epochs self.num_steps = num_steps self.num_chars = num_chars self.num_classes = num_classes # placeholder of x, y and weight self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets_weight = tf.placeholder(tf.float32, [None, self.num_steps]) self.targets_transition = tf.placeholder(tf.int32, [None]) # char embedding if embedding_matrix != None: self.embedding = tf.Variable(embedding_matrix, trainable=False, name="emb", dtype=tf.float32) else: self.embedding = tf.get_variable("emb", [self.num_chars, self.emb_dim]) self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim]) self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb) # lstm cell lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) # dropout if is_training: lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper( lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper( lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers) lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers) # get the length of each sample self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) self.length = tf.cast(self.length, tf.int32) # forward and backward self.outputs, _, _ = rnn.bidirectional_rnn(lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length) # softmax self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, self.hidden_dim * 2]) self.softmax_w = tf.get_variable( "softmax_w", [self.hidden_dim * 2, self.num_classes]) self.softmax_b = tf.get_variable("softmax_b", [self.num_classes]) self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b if not is_crf: pass else: self.tags_scores = tf.reshape( self.logits, [self.batch_size, self.num_steps, self.num_classes]) self.transitions = tf.get_variable( "transitions", [self.num_classes + 1, self.num_classes + 1]) dummy_val = -1000 class_pad = tf.Variable(dummy_val * np.ones( (self.batch_size, self.num_steps, 1)), dtype=tf.float32) self.observations = tf.concat(2, [self.tags_scores, class_pad]) begin_vec = tf.Variable(np.array( [[dummy_val] * self.num_classes + [0] for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32) end_vec = tf.Variable(np.array([[0] + [dummy_val] * self.num_classes for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32) begin_vec = tf.reshape(begin_vec, [self.batch_size, 1, self.num_classes + 1]) end_vec = tf.reshape(end_vec, [self.batch_size, 1, self.num_classes + 1]) self.observations = tf.concat( 1, [begin_vec, self.observations, end_vec]) self.mask = tf.cast( tf.reshape(tf.sign(self.targets), [self.batch_size * self.num_steps]), tf.float32) # point score self.point_score = tf.gather( tf.reshape(self.tags_scores, [-1]), tf.range(0, self.batch_size * self.num_steps) * self.num_classes + tf.reshape(self.targets, [self.batch_size * self.num_steps])) self.point_score *= self.mask # transition score self.trans_score = tf.gather(tf.reshape(self.transitions, [-1]), self.targets_transition) # real score self.target_path_score = tf.reduce_sum( self.point_score) + tf.reduce_sum(self.trans_score) # all path score self.total_path_score, self.max_scores, self.max_scores_pre = self.forward( self.observations, self.transitions, self.length) # loss self.loss = -(self.target_path_score - self.total_path_score) # summary self.train_summary = tf.scalar_summary("loss", self.loss) self.val_summary = tf.scalar_summary("loss", self.loss) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss)
def shared_layer(input_data, config, is_training): """Build the model to decoding Args: input_data = size batch_size X num_steps X embedding size Returns: output units """ if config.bidirectional == True: if config.lstm == True: cell_fw = rnn_cell.BasicLSTMCell(config.encoder_size, forget_bias=1.0) cell_bw = rnn_cell.BasicLSTMCell(config.encoder_size, forget_bias=1.0) else: cell_fw = rnn_cell.GRUCell(config.encoder_size) cell_bw = rnn_cell.GRUCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell_fw = rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=config.keep_prob) cell_bw = rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=config.keep_prob) cell_fw = rnn_cell.MultiRNNCell([cell_fw] * config.num_shared_layers) cell_bw = rnn_cell.MultiRNNCell([cell_bw] * config.num_shared_layers) initial_state_fw = cell_fw.zero_state(config.batch_size, tf.float32) initial_state_bw = cell_bw.zero_state(config.batch_size, tf.float32) encoder_outputs, _, _ = rnn.bidirectional_rnn( cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, scope="encoder_rnn") else: if config.lstm == True: cell = rnn_cell.BasicLSTMCell(config.encoder_size) else: cell = rnn_cell.GRUCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) encoder_outputs, encoder_states = rnn.rnn(cell, inputs, initial_state=initial_state, scope="encoder_rnn") return encoder_outputs
def __init__(self, label_size, vocab_size, data_x_seq, data_x_ep, data_y, ep_pattern_map, FLAGS): self.ep_pattern_map = ep_pattern_map self.label_size = label_size self.vocab_size = vocab_size self.FLAGS = FLAGS # shuffle data zipped_data = zip(data_x_seq, data_x_ep, data_y) shuffle(zipped_data) data_x_seq, data_x_ep, data_y = zip(*zipped_data) # convert data to numpy arrays - labels must be dense one-hot vectors dense_y = [] for epoch, j in enumerate(data_y): dense_y.append([0] * label_size) dense_y[epoch][j] = 1 data_x_seq, data_x_ep, data_y = np.array(data_x_seq), np.array(data_x_ep), np.array(dense_y) self.train_x, self.dev_x = data_x_seq[:-FLAGS.dev_samples], data_x_seq[-FLAGS.dev_samples:] self.train_x_ep, self.dev_x_ep = data_x_ep[:-FLAGS.dev_samples], data_x_ep[-FLAGS.dev_samples:] self.train_y, self.dev_y = data_y[:-FLAGS.dev_samples], data_y[-FLAGS.dev_samples:] # set up graph with tf.device('/gpu:'+str(FLAGS.gpuid)): self.is_training = tf.placeholder(tf.bool) self.batch_size = tf.placeholder(tf.float32) self.input_x = tf.placeholder(tf.int32, [None, FLAGS.seq_len], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, label_size], name="input_y") self.state = tf.placeholder(tf.float32) with tf.device('/cpu:0'): lookup_table = tf.Variable(tf.random_uniform([vocab_size, FLAGS.word_dim], -1.0, 1.0)) inputs = tf.nn.embedding_lookup(lookup_table, self.input_x) inputs = tf.nn.dropout(inputs, 1 - FLAGS.dropout) inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, FLAGS.seq_len, inputs)] lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=FLAGS.hidden_dim, input_size=FLAGS.word_dim) if self.is_training and 1 - FLAGS.dropout < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.num_layers) if FLAGS.bi: back_cell = tf.nn.rnn_cell.LSTMCell(num_units=FLAGS.hidden_dim, input_size=FLAGS.word_dim) if self.is_training and 1 - FLAGS.dropout < 1: back_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout) back_cell = tf.nn.rnn_cell.MultiRNNCell([back_cell] * FLAGS.num_layers) outputs = rnn.bidirectional_rnn(cell, back_cell, inputs, dtype=tf.float32) state = outputs[-1] + outputs[len(outputs)/2] else: outputs, state = rnn.rnn(cell, inputs, dtype=tf.float32) # lstm returns [hiddenstate+cell] -- extact just the hidden state self._state = tf.slice(state, [0, 0], tf.cast(tf.pack([self.batch_size, FLAGS.hidden_dim]), tf.int32)) softmax_w = tf.get_variable("softmax_w", [FLAGS.hidden_dim, label_size]) softmax_b = tf.get_variable("softmax_b", [label_size]) self._logits = tf.nn.xw_plus_b(self.state, softmax_w, softmax_b, name="logits") # training loss loss = tf.nn.softmax_cross_entropy_with_logits(self._logits, self.input_y) self._cost = tf.reduce_sum(loss) / self.batch_size tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars, aggregation_method=2), FLAGS.max_grad_norm) optimizer = tf.train.AdamOptimizer(FLAGS.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars)) # eval correct_prediction = tf.equal(tf.argmax(self._logits, 1), tf.argmax(self.input_y, 1)) self._accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def __init__(self, vocab_size, embedding_size, learning_rate, learning_rate_decay_op, memory_hops, dropout_rate, q_depth, a_depth, episodic_m_depth, ep_depth, m_input_size, attention_ff_l1_size, max_gradient_norm, maximum_story_length=100, maximum_question_length=20, use_lstm=False, forward_only=False): # initialization self.vocab_size = vocab_size self.embedding_size = embedding_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = tf.Variable( float(learning_rate_decay_op), trainable=False) self.dropout_rate = dropout_rate self.global_step = tf.Variable(0, trainable=False, name='global_step') self.q_depth = q_depth # question RNN depth self.a_depth = a_depth # answer RNN depth self.m_depth = episodic_m_depth # memory cell depth self.ep_depth = ep_depth # episodic depth self.max_gradient_norm = max_gradient_norm self.memory_hops = memory_hops # number of episodic memory pass self.m_input_size = m_input_size self.m_size = embedding_size # memory cell size self.a_size = embedding_size # answer RNN size self.attention_ff_l1_size = attention_ff_l1_size # attention_ff_l2_size print("[*] Creating Dynamic Memory Network ...") # question module def seq2seq_fq(encoder_inputs, cell, mask=None): return seq2seq.sentence_embedding_rnn_q(encoder_inputs, self.vocab_size, cell, self.embedding_size, mask) def seq2seq_fs(encoder_inputs, cell, mask=None): return seq2seq.sentence_embedding_rnn_s(encoder_inputs, self.vocab_size, cell, self.embedding_size, mask) # attention gate in episodic # TODO: force gate logits to be sparse, add L1 norm regularization # Sentence token placeholder self.story = [] for i in range(maximum_story_length): self.story.append( tf.placeholder(tf.int32, shape=[None], name="story{0}".format(i))) self.story_mask = tf.placeholder(tf.int32, shape=[None], name="story_mask") self.story_len = tf.placeholder(tf.int32, shape=[], name="story length") print(self.story_len) self.question = [] for i in range(maximum_question_length): self.question.append( tf.placeholder(tf.int32, shape=[None], name="question{0}".format(i))) self.answer = tf.placeholder(tf.int64, name="answer") # self.story_len = 1#= tf.reshape(tf.shape(self.story_mask), []) # TODO: fixed lens problem #self.story_len = 5 # configuration of attention gate # print (self.story) with tf.variable_scope("answer"): softmax_weights = tf.Variable(tf.truncated_normal( [self.a_size, self.vocab_size], -0.1, 0.1), name="softmax_weights") softmax_biases = tf.Variable(tf.zeros([self.vocab_size]), name="softmax_biases") answer_weights = tf.Variable(tf.truncated_normal( [self.m_size, self.a_size], -0.1, 0.1), name="answer_weights") answer_biases = tf.Variable(tf.zeros([self.a_size]), name="answer_biases") #------------ question module ------------ single_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: single_cell = tf.nn.rnn_cell.DropoutWrapper( single_cell, output_keep_prob=dropout_rate) question_cell = single_cell if q_depth > 1: question_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * q_depth) question = seq2seq_fq(self.question, question_cell) self.question_state = question[0] #for e in question: #------------ Input module ------------ reader_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: reader_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: reader_cell = tf.nn.rnn_cell.DropoutWrapper( reader_cell, output_keep_prob=dropout_rate) # Embedded toekn into vector, feed into rnn cell return cell state fusion_fw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) fusion_bw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size) if use_lstm: fusion_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) fusion_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size) if not forward_only and dropout_rate < 1: fusion_fw_cell = tf.nn.rnn_cell.DropoutWrapper( fusion_fw_cell, output_keep_prob=dropout_rate) fusion_bw_cell = tf.nn.rnn_cell.DropoutWrapper( fusion_bw_cell, output_keep_prob=dropout_rate) (_facts, _, _) = rnn.bidirectional_rnn(fusion_fw_cell, fusion_bw_cell, seq2seq_fs(self.story, reader_cell), dtype=tf.float32) self.facts = _facts[0] #------------ episodic memory module ------------ # TODO: use self.facts to extract ep_size self.ep_size = 2 * self.embedding_size # episodic cell size # construct memory cell #single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.m_size) mem_cell = cell.MemCell(self.m_size) #mem_cell = tf.nn.rnn_cell.GRUCell(self.m_size) self.episodic_array = tf.Variable(tf.random_normal([1, 1])) # construct episodic_cell # for i in xrange(self.memory_hops): single_cell = cell.MGRUCell(self.ep_size) ep_cell = cell.MultiMGRUCell([single_cell] * ep_depth) e = [] mem_state = self.question_state q_double = tf.concat(1, [self.question_state, self.question_state]) mem_state_double = tf.concat(1, [mem_state, mem_state]) # TODO change z_dim to be z_dim = self.embedding_size * 8 self.attention_ff_size = z_dim self.attention_ff_l2_size = 1 # self._ep_initial_state = [] # for _cell in range(ep_cell) # self._ep_initial_state.append = _cell.zero_state(1, tf.float32) # TODO change batch size # initialize parameters with tf.variable_scope("episodic"): # parameters of attention gate l1_weights = tf.Variable(tf.truncated_normal( [self.attention_ff_size, self.attention_ff_l1_size], -0.1, 0.1), name="l1_weights") l1_biases = tf.Variable(tf.zeros([self.attention_ff_l1_size]), name="l1_biases") l2_weights = tf.Variable(tf.truncated_normal( [self.attention_ff_l1_size, self.attention_ff_l2_size], -0.1, 0.1), name="l2_weights") l2_biases = tf.Variable(tf.zeros([self.attention_ff_l2_size]), name="l2_biases") # paramters of episodic mem_weights = tf.Variable(tf.truncated_normal( [self.m_input_size, self.m_size], -0.1, 0.1), name="mem_weights") mem_biases = tf.Variable(tf.zeros([self.m_size]), name="mem_biases") # initializing variable of feedforward nn seq2seq.def_feedforward_nn(self.attention_ff_size, self.attention_ff_l1_size, self.attention_ff_l2_size) for hops in xrange(self.memory_hops): # gate attention network step = tf.constant(0) tf.while_loop( lambda step, story_len, facts, q_double, mem_state_double: tf. less(step, story_len), lambda step, story_len, facts, q_double, mem_state_double: self .mem_body(step, story_len, facts, q_double, mem_state_double), [step, self.story_len, self.facts, q_double, mem_state_double]) #self.episodic_gate = tf.reshape(tf.nn.softmax(self.episodic_array),[1]) self.episodic_gate = tf.nn.softmax( tf.reshape(self.episodic_array, [1, -1])) print("episodic_gate", self.episodic_gate) # attention GRU # output, context = cell.rnn(ep_cell[hops], [self.facts], self.episodic_gate, scope="epsodic", dtype=tf.float32) output, context = cell.rnn_ep(ep_cell, [self.facts], self.episodic_gate, dtype=tf.float32, scope="episodic") e.append(output) # memory updates #_, mem_state = mem_cell(context_state, mem_state) # GRU #_, mem_state = cell.rnn_mem(mem_cell, [context], self.question_state, mem_state, self.m_input_size, self.m_size, dtype=tf.float32) mem_state = mem_cell(context, self.question_state, mem_state, self.m_input_size, self.m_size) # if the attentioned module is last e, it means the episodic pass is over if np.argmax(np.asarray(e[-1])) == len(e[-1]) - 1: break #------------ answer ------------ # TODO: use decoder sequence to generate answer answer_steps = 1 single_cell = tf.nn.rnn_cell.GRUCell(self.a_size) answer_cell = single_cell if a_depth > 1: answer_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * a_depth) a_state = mem_state for step in range(answer_steps): y = tf.nn.softmax(tf.matmul(a_state, answer_weights)) (answer, a_state) = answer_cell(tf.concat(1, [self.question_state, y]), a_state) #(answer, a_state) = answer_cell(tf.concat(1, [question, mem_state]), a_state) self.logits = tf.nn.softmax( tf.matmul(answer, softmax_weights) + softmax_biases) answer = tf.reshape(tf.one_hot(self.answer, self.vocab_size, 1.0, 0.0), [1, self.vocab_size]) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(self.logits, answer)) params = tf.trainable_variables() # testing for e in params: print(e.get_shape(), e.name, type(e)) if not forward_only: self.gradient_norms = [] self.updates = [] optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.gradient_norms = norm self.updates = optimizer.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
cell_fw = rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=keep_prob) initial_state_fw = cell_fw.zero_state(batch_size, tf.float32) with tf.name_scope("Cell_bw") as scope: #Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut cell_bw = rnn_cell.BasicLSTMCell(hidden_size) cell_bw = rnn_cell.MultiRNNCell([cell_bw] * num_layers) cell_bw = rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=keep_prob) initial_state_bw = cell_bw.zero_state(batch_size, tf.float32) with tf.name_scope("RNN") as scope: # Thanks to Tensorflow, the entire decoder is just one line of code: #outputs, states = seq2seq.rnn_decoder(inputs, initial_state, cell_fw) outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=tf.float32) outputs_tensor = tf.concat(0, outputs) final = outputs[-1] with tf.name_scope("Mark") as scope: W_m = tf.Variable(tf.random_normal([2 * hidden_size, 1], stddev=0.01)) b_m = tf.Variable(tf.random_normal([1], stddev=0.01)) h_m = tf.matmul(outputs_tensor, W_m) + b_m h_mark = tf.reshape(h_m, (seq_len, batch_size)) h_markt = tf.transpose(h_mark) sm_mark = tf.nn.softmax(h_markt) cost_mark = tf.nn.sparse_softmax_cross_entropy_with_logits(h_markt, marks) loss_mark = tf.reduce_mean(cost_mark)
def lm_private(encoder_units, pos_prediction, chunk_prediction, config, is_training): """Decode model for lm Args: encoder_units - these are the encoder units: [batch_size X encoder_size] with the one the pos prediction pos_prediction: must be the same size as the encoder_size returns: logits """ # concatenate the encoder_units and the pos_prediction pos_prediction = tf.reshape( pos_prediction, [config.batch_size, config.num_steps, config.pos_embedding_size]) chunk_prediction = tf.reshape( chunk_prediction, [config.batch_size, config.num_steps, config.chunk_embedding_size]) lm_inputs = tf.concat(2, [chunk_prediction, pos_prediction, encoder_units]) with tf.variable_scope("lm_decoder"): if config.bidirectional == True: if config.lstm == True: cell_fw = rnn_cell.BasicLSTMCell(config.lm_decoder_size, forget_bias=1.0) cell_bw = rnn_cell.BasicLSTMCell(config.lm_decoder_size, forget_bias=1.0) else: cell_fw = rnn_cell.GRUCell(config.lm_decoder_size) cell_bw = rnn_cell.GRUCell(config.lm_decoder_size) if is_training and config.keep_prob < 1: cell_fw = rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=config.keep_prob) cell_bw = rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=config.keep_prob) cell_fw = rnn_cell.MultiRNNCell([cell_fw] * config.num_shared_layers) cell_bw = rnn_cell.MultiRNNCell([cell_bw] * config.num_shared_layers) initial_state_fw = cell_fw.zero_state(config.batch_size, tf.float32) initial_state_bw = cell_bw.zero_state(config.batch_size, tf.float32) # this function puts the 3d tensor into a 2d tensor: batch_size x input size inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, lm_inputs) ] decoder_outputs, _, _ = rnn.bidirectional_rnn( cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, scope="lm_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, 2 * config.lm_decoder_size]) softmax_w = tf.get_variable( "softmax_w", [2 * config.lm_decoder_size, config.vocab_size]) else: if config.lstm == True: cell = rnn_cell.BasicLSTMCell(config.lm_decoder_size) else: cell = rnn_cell.GRUCell(config.lm_decoder_size) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) # this function puts the 3d tensor into a 2d tensor: batch_size x input size inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, lm_inputs) ] decoder_outputs, decoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="lm_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, config.lm_decoder_size]) softmax_w = tf.get_variable( "softmax_w", [config.lm_decoder_size, config.vocab_size]) softmax_b = tf.get_variable("softmax_b", [config.vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b l2_penalty = tf.reduce_sum(tf.square(output)) return logits, l2_penalty
def pos_private(encoder_units, config, is_training): """Decode model for pos Args: encoder_units - these are the encoder units num_pos - the number of pos tags there are (output units) returns: logits """ with tf.variable_scope("pos_decoder"): if config.bidirectional == True: if config.lstm == True: cell_fw = rnn_cell.BasicLSTMCell(config.pos_decoder_size, forget_bias=1.0) cell_bw = rnn_cell.BasicLSTMCell(config.pos_decoder_size, forget_bias=1.0) else: cell_fw = rnn_cell.GRUCell(config.pos_decoder_size) cell_bw = rnn_cell.GRUCell(config.pos_decoder_size) if is_training and config.keep_prob < 1: cell_fw = rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=config.keep_prob) cell_bw = rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=config.keep_prob) cell_fw = rnn_cell.MultiRNNCell([cell_fw] * config.num_shared_layers) cell_bw = rnn_cell.MultiRNNCell([cell_bw] * config.num_shared_layers) initial_state_fw = cell_fw.zero_state(config.batch_size, tf.float32) initial_state_bw = cell_bw.zero_state(config.batch_size, tf.float32) # puts it into batch_size X input_size inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, encoder_units)] decoder_outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, scope="pos_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, 2*config.pos_decoder_size]) softmax_w = tf.get_variable("softmax_w", [2*config.pos_decoder_size, config.num_pos_tags]) else: if config.lstm == True: cell = rnn_cell.BasicLSTMCell(config.pos_decoder_size, forget_bias=1.0) else: cell = rnn_cell.GRUCell(config.pos_decoder_size) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) # puts it into batch_size X input_size inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, encoder_units)] decoder_outputs, decoder_states = rnn.rnn(cell, inputs, initial_state=initial_state, scope="pos_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, config.pos_decoder_size]) softmax_w = tf.get_variable("softmax_w", [config.pos_decoder_size, config.num_pos_tags]) softmax_b = tf.get_variable("softmax_b", [config.num_pos_tags]) logits = tf.matmul(output, softmax_w) + softmax_b l2_penalty = tf.reduce_sum(tf.square(output)) return logits, l2_penalty
def __init__(self, is_training, vocab_size, tag_size, maxlen): self._batch_size = FLAGS.batch_size self._hidden_size = FLAGS.hidden_size self._num_layers = FLAGS.num_layers self._dropout_keep_prob = FLAGS.dropout_keep_prob self._vocab_size = vocab_size self._tag_size = tag_size self._is_training = is_training self._input_data = tf.placeholder(tf.int32, [self._batch_size, maxlen]) self._targets = tf.placeholder(tf.int32, [self._batch_size, maxlen]) self._mask = tf.placeholder(tf.bool, [self._batch_size, maxlen]) lstm_cell = tf.nn.rnn_cell.LSTMCell(self._hidden_size, self._hidden_size) if is_training and self._dropout_keep_prob < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self._dropout_keep_prob) cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self._num_layers) cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self._num_layers) self._initial_state_fw = cell_fw.zero_state(self._batch_size, tf.float32) self._initial_state_bw = cell_bw.zero_state(self._batch_size, tf.float32) with tf.device("/cpu:0"): self._embedding = tf.get_variable("embedding", [self._vocab_size, self._hidden_size]) inputs = tf.nn.embedding_lookup(self._embedding, self._input_data) inputs = [input_ for input_ in tf.unpack(tf.transpose(inputs, [1, 0, 2]))] if is_training and self._dropout_keep_prob < 1: inputs = tf.nn.dropout(tf.pack(inputs), self._dropout_keep_prob) inputs = tf.unpack(inputs) outputs = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=self._initial_state_fw, initial_state_bw=self._initial_state_bw) # output from forward and backward cells. output = tf.reshape(tf.concat(1, outputs), [-1, 2 * self._hidden_size]) softmax_w = tf.get_variable("softmax_w", [2 * self._hidden_size, self._tag_size]) softmax_b = tf.get_variable("softmax_b", [self._tag_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.reshape(tf.cast(self._mask, tf.float32), [-1])], self._tag_size) self._cost = cost = tf.reduce_sum(loss) / self._batch_size equality = tf.equal(tf.argmax(logits, 1), tf.cast(tf.reshape(self._targets, [-1]), tf.int64)) masked = tf.boolean_mask(equality, tf.reshape(self.mask, [-1])) self._misclass = 1 - tf.reduce_mean(tf.cast(masked, tf.float32)) if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), FLAGS.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
cell_fw = rnn_cell.MultiRNNCell([cell_fw] * num_layers) cell_fw = rnn_cell.DropoutWrapper(cell_fw,output_keep_prob=keep_prob) initial_state_fw = cell_fw.zero_state(batch_size, tf.float32) with tf.name_scope("Cell_bw") as scope: #Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut cell_bw = rnn_cell.BasicLSTMCell(hidden_size) cell_bw = rnn_cell.MultiRNNCell([cell_bw] * num_layers) cell_bw = rnn_cell.DropoutWrapper(cell_bw,output_keep_prob=keep_prob) initial_state_bw = cell_bw.zero_state(batch_size, tf.float32) with tf.name_scope("RNN") as scope: # Thanks to Tensorflow, the entire decoder is just one line of code: #outputs, states = seq2seq.rnn_decoder(inputs, initial_state, cell_fw) outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=tf.float32) outputs_tensor = tf.concat(0, outputs) final = outputs[-1] with tf.name_scope("Mark") as scope: W_m = tf.Variable(tf.random_normal([2*hidden_size,1], stddev=0.01)) b_m = tf.Variable(tf.random_normal([1], stddev=0.01)) h_m = tf.matmul(outputs_tensor, W_m) + b_m h_mark = tf.reshape(h_m,(seq_len,batch_size)) h_markt = tf.transpose(h_mark) sm_mark = tf.nn.softmax(h_markt) cost_mark = tf.nn.sparse_softmax_cross_entropy_with_logits(h_markt,marks) loss_mark = tf.reduce_mean(cost_mark) with tf.name_scope("Output") as scope:
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = jzRNNCell elif args.model == 'gru': cell_fn = jzGRUCell elif args.model == 'lstm': cell_fn = jzLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) if args.activation == 'tanh': cell_af = tf.tanh elif args.activation == 'sigmoid': cell_af = tf.sigmoid elif args.activation == 'relu': cell_af = tf.nn.relu else: raise Exception("activation function not supported: {}".format(args.activation)) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) with tf.variable_scope('rnnlm'): if not args.bidirectional: softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) else: softmax_w = tf.get_variable("softmax_w", [args.rnn_size*2, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.nn.dropout(tf.squeeze(input_, [1]),args.dropout) for input_ in inputs] # one-directional RNN (nothing changed here..) if not args.bidirectional: cell = cell_fn(args.rnn_size,activation=cell_af) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.initial_state = cell.zero_state(args.batch_size, tf.float32) def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) # bi-directional RNN else: lstm_fw = cell_fn(args.rnn_size,activation=cell_af) lstm_bw = cell_fn(args.rnn_size,activation=cell_af) self.lstm_fw = lstm_fw = rnn_cell.MultiRNNCell([lstm_fw]*args.num_layers) self.lstm_bw = lstm_bw = rnn_cell.MultiRNNCell([lstm_bw]*args.num_layers) self.initial_state_fw = lstm_fw.zero_state(args.batch_size,tf.float32) self.initial_state_bw = lstm_bw.zero_state(args.batch_size,tf.float32) outputs,_,_ = rnn.bidirectional_rnn(lstm_fw, lstm_bw, inputs, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, sequence_length=args.batch_size) output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size*2]) self.logits = tf.matmul(tf.nn.dropout(output,args.dropout), softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, num_chars, num_classes, num_steps=200, num_epochs=100, embedding_matrix=None, is_training=True, is_crf=True, weight=False): # Parameter self.max_f1 = 0 self.learning_rate = 0.002 self.dropout_rate = 0.5 self.batch_size = 128 self.num_layers = 1 self.emb_dim = 100 self.hidden_dim = 100 self.num_epochs = num_epochs self.num_steps = num_steps self.num_chars = num_chars self.num_classes = num_classes # placeholder of x, y and weight self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets_weight = tf.placeholder(tf.float32, [None, self.num_steps]) self.targets_transition = tf.placeholder(tf.int32, [None]) # char embedding if embedding_matrix != None: self.embedding = tf.Variable(embedding_matrix, trainable=False, name="emb", dtype=tf.float32) else: self.embedding = tf.get_variable("emb", [self.num_chars, self.emb_dim]) self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim]) self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb) # lstm cell lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim) # dropout if is_training: lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers) lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers) # get the length of each sample self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) self.length = tf.cast(self.length, tf.int32) # forward and backward self.outputs, _, _ = rnn.bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) # softmax self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, self.hidden_dim * 2]) self.softmax_w = tf.get_variable("softmax_w", [self.hidden_dim * 2, self.num_classes]) self.softmax_b = tf.get_variable("softmax_b", [self.num_classes]) self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b if not is_crf: pass else: self.tags_scores = tf.reshape(self.logits, [self.batch_size, self.num_steps, self.num_classes]) self.transitions = tf.get_variable("transitions", [self.num_classes + 1, self.num_classes + 1]) dummy_val = -1000 class_pad = tf.Variable(dummy_val * np.ones((self.batch_size, self.num_steps, 1)), dtype=tf.float32) self.observations = tf.concat(2, [self.tags_scores, class_pad]) begin_vec = tf.Variable(np.array([[dummy_val] * self.num_classes + [0] for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32) end_vec = tf.Variable(np.array([[0] + [dummy_val] * self.num_classes for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32) begin_vec = tf.reshape(begin_vec, [self.batch_size, 1, self.num_classes + 1]) end_vec = tf.reshape(end_vec, [self.batch_size, 1, self.num_classes + 1]) self.observations = tf.concat(1, [begin_vec, self.observations, end_vec]) self.mask = tf.cast(tf.reshape(tf.sign(self.targets),[self.batch_size * self.num_steps]), tf.float32) # point score self.point_score = tf.gather(tf.reshape(self.tags_scores, [-1]), tf.range(0, self.batch_size * self.num_steps) * self.num_classes + tf.reshape(self.targets,[self.batch_size * self.num_steps])) self.point_score *= self.mask # transition score self.trans_score = tf.gather(tf.reshape(self.transitions, [-1]), self.targets_transition) # real score self.target_path_score = tf.reduce_sum(self.point_score) + tf.reduce_sum(self.trans_score) # tf.initialize_all_variables() # sess = tf.Session() # sess.run(self.transitions.eval()) # all path score self.total_path_score, self.max_scores, self.max_scores_pre = self.forward(self.observations, self.transitions, self.length) # loss self.loss = - (self.target_path_score - self.total_path_score) # summary self.train_summary = tf.scalar_summary("loss", self.loss) self.val_summary = tf.scalar_summary("loss", self.loss) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
def __init__(self, config): sent_len = self.sent_len = config.sent_len word_len = config.word_len batch_size = config.batch_size vocab_size = config.vocab_size embed_size = config.embed_size keep_prob1 = config.keep_prob1 keep_prob2 = config.keep_prob2 num_layers1 = config.num_layers1 num_layers2 = config.num_layers2 state_size1 = config.state_size1 state_size2 = config.state_size2 self.input_data = tf.placeholder(tf.int32, [batch_size*sent_len, word_len]) self.lengths = tf.placeholder(tf.int64,[batch_size]) self.wordlengths = tf.placeholder(tf.int64, [batch_size*sent_len]) self.targets = tf.placeholder(tf.float32, [batch_size, 1]) # Get embedding layer which requires CPU with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, embed_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) #LSTM 1 -> Encode the characters of every tok into a fixed dense representation with tf.variable_scope("rnn1", reuse=None): lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) if keep_prob1 < 1: #Only on the inputs for rnn1. That way we don't dropout twice lstm_cell_1 = rnn_cell.DropoutWrapper( lstm_cell_1, input_keep_prob=keep_prob1) lstm_back_cell_1 = rnn_cell.DropoutWrapper( lstm_back_cell_1, input_keep_prob=keep_prob1) cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1) backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] * num_layers1) rnn_splits = [tf.squeeze(input_, [1]) for input_ in tf.split(1, word_len, inputs)] # Run the bidirectional rnn outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn( cell_1, backcell_1, rnn_splits, sequence_length=self.wordlengths, dtype=tf.float32) #tok_embeds = outputs1[-1] tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1]) with tf.variable_scope("rnn2", reuse=None): lstm_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1*4) lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1*4) # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer # is the output from the conv net, so this also adds dropout to the output of the conv net if keep_prob2 < 1: lstm_cell_2 = rnn_cell.DropoutWrapper( lstm_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) lstm_back_cell_2 = rnn_cell.DropoutWrapper( lstm_back_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2) backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] * num_layers2) # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2] # we want it to be a list of sent_len of [batch_size, state_size*2] # We partition as [0,1,2,...n,0,1,2,...n...] rnn_inputs2 = tf.dynamic_partition(tok_embeds, list(range(sent_len))*batch_size, sent_len) #Sent level rnn outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn(cell_2, backcell_2, rnn_inputs2, sequence_length=self.lengths, dtype=tf.float32) #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4]) sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2]) with tf.variable_scope("linear", reuse=None): w = tf.get_variable("w", [state_size2*4, 1]) b = tf.get_variable("b", [1]) raw_logits = tf.matmul(sent_embed, w) + b self.probabilities = tf.sigmoid(raw_logits) self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets)) #Calculate gradients and propagate #Aggregation method 2 is really important for rnn per the tensorflow issues list tvars = tf.trainable_variables() self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite optimizer = tf.train.AdamOptimizer() grads, _vars = zip(*optimizer.compute_gradients(self.cost, tvars, aggregation_method=2)) grads, self.grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(grads, _vars))
def __init__(self, config): sent_len = config.sent_len batch_size = config.batch_size vocab_size = config.vocab_size embed_size = config.embed_size num_layers = config.num_layers state_size = config.state_size keep_prob = config.keep_prob self.input_data = tf.placeholder(tf.int32, [batch_size, sent_len]) self.lengths = tf.placeholder(tf.int64, [batch_size]) self.targets = tf.placeholder(tf.float32, [batch_size, 1]) # Get embedding layer which requires CPU with tf.device("/cpu:0"): embeding = tf.get_variable("embeding", [vocab_size, embed_size]) inputs = tf.nn.embedding_lookup(embeding, self.input_data) #LSTM 1 -> Encode the characters of every tok into a fixed dense representation with tf.variable_scope("rnn1", reuse=None): cell = rnn_cell.LSTMCell( state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer()) back_cell = rnn_cell.LSTMCell( state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer()) cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob) back_cell = rnn_cell.DropoutWrapper(back_cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([cell] * num_layers) backcell = rnn_cell.MultiRNNCell([back_cell] * num_layers) rnn_splits = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, sent_len, inputs) ] # Run the bidirectional rnn outputs, last_fw_state, last_bw_state = rnn.bidirectional_rnn( cell, backcell, rnn_splits, sequence_length=self.lengths, dtype=tf.float32) sent_out = tf.concat(1, [last_fw_state, last_bw_state]) #sent_out = outputs[-1] #sent_out = tf.add_n(outputs) output_size = state_size * 4 with tf.variable_scope("linear", reuse=None): w = tf.get_variable("w", [output_size, 1]) b = tf.get_variable("b", [1], initializer=tf.constant_initializer(0.0)) raw_logits = tf.matmul(sent_out, w) + b self.probabilities = tf.sigmoid(raw_logits) self.cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets)) #Calculate gradients and propagate #Aggregation method 2 is really important for rnn per the tensorflow issues list tvars = tf.trainable_variables() self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite optimizer = tf.train.AdamOptimizer() grads, _vars = zip(*optimizer.compute_gradients( self.cost, tvars, aggregation_method=2)) grads, self.grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(grads, _vars))
def __init__(self, config): sent_len = self.sent_len = config.sent_len word_len = config.word_len batch_size = config.batch_size vocab_size = config.vocab_size embed_size = config.embed_size keep_prob1 = config.keep_prob1 keep_prob2 = config.keep_prob2 num_layers1 = config.num_layers1 num_layers2 = config.num_layers2 state_size1 = config.state_size1 state_size2 = config.state_size2 self.input_data = tf.placeholder(tf.int32, [batch_size * sent_len, word_len]) self.lengths = tf.placeholder(tf.int64, [batch_size]) self.wordlengths = tf.placeholder(tf.int64, [batch_size * sent_len]) self.targets = tf.placeholder(tf.float32, [batch_size, 1]) # Get embedding layer which requires CPU with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, embed_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) #LSTM 1 -> Encode the characters of every tok into a fixed dense representation with tf.variable_scope("rnn1", reuse=None): lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) if keep_prob1 < 1: #Only on the inputs for rnn1. That way we don't dropout twice lstm_cell_1 = rnn_cell.DropoutWrapper( lstm_cell_1, input_keep_prob=keep_prob1) lstm_back_cell_1 = rnn_cell.DropoutWrapper( lstm_back_cell_1, input_keep_prob=keep_prob1) cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1) backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] * num_layers1) rnn_splits = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, word_len, inputs) ] # Run the bidirectional rnn outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn( cell_1, backcell_1, rnn_splits, sequence_length=self.wordlengths, dtype=tf.float32) #tok_embeds = outputs1[-1] tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1]) with tf.variable_scope("rnn2", reuse=None): lstm_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1 * 4) lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1 * 4) # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer # is the output from the conv net, so this also adds dropout to the output of the conv net if keep_prob2 < 1: lstm_cell_2 = rnn_cell.DropoutWrapper( lstm_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) lstm_back_cell_2 = rnn_cell.DropoutWrapper( lstm_back_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2) backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] * num_layers2) # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2] # we want it to be a list of sent_len of [batch_size, state_size*2] # We partition as [0,1,2,...n,0,1,2,...n...] rnn_inputs2 = tf.dynamic_partition( tok_embeds, list(range(sent_len)) * batch_size, sent_len) #Sent level rnn outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn( cell_2, backcell_2, rnn_inputs2, sequence_length=self.lengths, dtype=tf.float32) #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4]) sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2]) with tf.variable_scope("linear", reuse=None): w = tf.get_variable("w", [state_size2 * 4, 1]) b = tf.get_variable("b", [1]) raw_logits = tf.matmul(sent_embed, w) + b self.probabilities = tf.sigmoid(raw_logits) self.cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets)) #Calculate gradients and propagate #Aggregation method 2 is really important for rnn per the tensorflow issues list tvars = tf.trainable_variables() self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite optimizer = tf.train.AdamOptimizer() grads, _vars = zip(*optimizer.compute_gradients( self.cost, tvars, aggregation_method=2)) grads, self.grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(grads, _vars))
def __init__(self, sess, params, vocabs_size): NNModel.Model.__init__(self, vocabs_size) self.params = params self.batch_size = self.params.get("batch_size") self.max_length = self.params.get("max_length") self.size = self.params.get("size") self.num_layers = self.params.get("num_layers") # the learning rate could be a float, but this way we can adjust it during training # self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate = self.params.get("learning_rate") self.embedding_size = self.params.get("embedding_size") # self.global_step = tf.Variable(0, trainable=False) self.incorrect = [0] * self.max_length self.global_step = 0 self.corpus_name = self.params.get("corpus_name") logging.info( "BiRNN model created with {0} layers of {1} cells. Embedding = {2}. Vocabulary sizes = {3}, length = {4}, batch = {5}." .format(self.num_layers, self.size, self.embedding_size, vocabs_size, self.max_length, self.batch_size)) # forward RNN with tf.variable_scope('forward'): fcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size) forward_cell = fcell if self.num_layers > 1: fcell2 = rnn_cell.GRUCell(self.size) forward_cell = rnn_cell.MultiRNNCell([fcell] + ([fcell2] * self.num_layers)) # backward RNN with tf.variable_scope('backward'): bcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size) backward_cell = bcell if self.num_layers > 1: bcell2 = rnn_cell.GRUCell(self.size) backward_cell = rnn_cell.MultiRNNCell([bcell] + ([bcell2] * self.num_layers)) #seq_len = tf.fill([self.batch_size], constant(self.max_length, dtype=tf.int64)) # self.inputs = tf.placeholder(tf.float32, shape=[self.max_length, self.batch_size, self.vocab_sizes[0]], name="inputs") self.inputs = [ tf.placeholder(tf.int32, shape=[None], name="inputs{0}".format(i)) for i in range(self.max_length) ] self.targets = [ tf.placeholder(tf.int32, shape=[None], name="targets{0}".format(i)) for i in range(self.max_length) ] self.sentence_lengths = tf.placeholder(tf.int64, shape=[None], name="sequence_lengths") self.dropout_placeholder = tf.placeholder(tf.float32, shape=[], name="dropout") self.word_embeddings = tf.Variable( tf.random_uniform([self.vocab_sizes[0], self.embedding_size], -1.0, 1.0)) embedded_inputs = [ tf.nn.embedding_lookup(self.word_embeddings, input_) for input_ in self.inputs ] dropped_embedded_inputs = [ tf.nn.dropout(i, self.dropout_placeholder) for i in embedded_inputs ] # dropout je realny cislo weights = { # Hidden layer weights => 2*n_hidden because of foward + backward cells # 'hidden': tf.Variable(tf.random_uniform([self.vocab_sizes[0], 2 * size]), name="hidden-weight"), 'out': tf.Variable(tf.random_uniform([2 * self.size, self.vocab_sizes[1]]), name="out-weight") } biases = { # 'hidden': tf.Variable(tf.random_uniform([2 * size]), name="hidden-bias"), 'out': tf.Variable(tf.random_uniform([self.vocab_sizes[1]]), name="out-bias") } # hack to omit information from RNN creation logging.getLogger().setLevel(logging.CRITICAL) with tf.variable_scope('BiRNN-net'): # bidi_layer = BidirectionalRNNLayer(forward_cell, backward_cell, dropped_embedded_inputs, self.sentence_lengths) # with tf.variable_scope('forward'): # output_fw, last_state = rnn.rnn(cell=forward_cell, inputs=dropped_embedded_inputs, dtype=tf.float32, sequence_length=self.sentence_lengths) # # with tf.variable_scope('backward'): # outputs_rev_rev, last_state_rev = rnn.rnn(cell=backward_cell, inputs=rnn._reverse_seq(dropped_embedded_inputs, self.sentence_lengths), dtype=tf.float32, # sequence_length=self.sentence_lengths) # output_bw = self.rnn._reverse_seq(outputs_rev_rev, self.sentence_lengths) # # outputs = [array_ops.concat(1, [fw, bw]) for fw, bw in zip(output_fw, output_bw)] outputs = rnn.bidirectional_rnn( forward_cell, backward_cell, dropped_embedded_inputs, sequence_length=self.sentence_lengths, dtype=tf.float32) logging.getLogger().setLevel(logging.INFO) self.out = [] self.probs = [] # after switch to TF 0.8 it started outputing some merges for FC a BC for o in outputs[0]: # TODO ############# pridat tf.nn.relu(MATMUL+BIAs) ??? intermediate_out = tf.matmul(o, weights['out']) + biases['out'] self.out.append(intermediate_out) self.probs.append(tf.nn.softmax(intermediate_out)) loss = seq2seq.sequence_loss_by_example(self.out, self.targets, [tf.ones([self.batch_size])] * self.max_length, self.vocab_sizes[1]) self.cost = tf.reduce_sum(loss) / self.batch_size tf.scalar_summary("Cost", self.cost) self.updates = tf.train.AdamOptimizer( self.learning_rate).minimize(loss) self.saver = tf.train.Saver(max_to_keep=0) # don't remove old models self.summaries = tf.merge_all_summaries() self.sum_writer = tf.python.training.summary_io.SummaryWriter( "tmp", sess.graph) # Initializing the variables & Launch the graph sess.run(tf.initialize_all_variables()) logging.info("BiRNN model initialized.")