def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len): # BiRNN requires to supply sequence_length as [batch_size, int64] # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set # For a better implementation with latest version of tensorflow, check below _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64)) # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw, sequence_length=_seq_len) # Linear activation # Get inner loop last output return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation # (n_steps*batch_size, n_input) _X = tf.reshape(_X, [-1, self.config.num_input]) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Forward direction cell rnn_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) # Backward direction cell rnn_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) # Split data because rnn cell needs a list of inputs for the RNN inner # loop # n_steps * (batch_size, n_hidden) _X = tf.split(0, self.config.num_steps, _X) # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn( rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation return [ tf.matmul(output, _weights['out']) + _biases['out'] for output in outputs ], final_fw, final_bw
def BiRNN(x, weights, biases): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation # Get inner loop last output output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs] return output
def __init__(self, is_training, glove_word_vectors, vocabulary, config): self.size = config.hidden_size self.config = config self.is_training = is_training self.word_vec_size = config.word_vec_size vocab_size = config.vocab_size self.glove_word_vectors = glove_word_vectors self.vocabulary = vocabulary # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. # TODO: these might be able to be improved if used the LSTMCell which has other features # to improve performance, but then need the sentence_length with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) if is_training and config.keep_prob < 1: with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.DropoutWrapper( self.left_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.DropoutWrapper( self.right_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.MultiRNNCell([self.left_lstm_cell] * config.num_layers) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.MultiRNNCell( [self.right_lstm_cell] * config.num_layers)
def __init__(self, batch_size, len_question, len_answer, n_answers, n_words, dim_embed, dim_hidden, bias_init_vector=None): self.batch_size = batch_size self.len_question = len_question self.len_answer = len_answer self.n_answers = n_answers self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb') self.W_emb_hid_Q = tf.Variable(tf.random_uniform( [dim_embed, dim_hidden], -0.1, 0.1), name='W_emb_hid_Q') self.b_emb_hid_Q = tf.Variable(tf.zeros([dim_hidden]), name='b_emb_hid_Q') self.W_emb_hid_A = tf.Variable(tf.random_uniform( [dim_embed, dim_hidden], -0.1, 0.1), name='W_emb_hid_A') self.b_emb_hid_A = tf.Variable(tf.zeros([dim_hidden]), name='b_emb_hid_Q') self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden) self.W_hid_emb = tf.Variable(tf.random_uniform([dim_hidden, dim_embed], -0.1, 0.1), name='W_hid_emb') self.b_hid_emb = tf.Variable(tf.zeros([dim_embed]), name='b_hid_emb') self.W_emb_word = tf.Variable(tf.random_uniform([dim_embed, n_words], -0.1, 0.1), name='W_emb_word') if bias_init_vector is not None: self.b_embed_word = tf.Variable(bias_init_vector.astype( np.float32), name='b_embed_word') else: self.b_emb_word = tf.Variable(tf.zeros([n_words]), name='b_emb_word')
def __init__(self, batch_size, len_question, len_answer, n_answers, n_words, dim_embed, dim_hidden, bias_init_vector=None): self.batch_size = batch_size self.len_question = len_question self.len_answer = len_answer self.n_answers = n_answers self.n_words = n_words self.dim_embed = dim_embed self.dim_hidden = dim_hidden with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb') self.W_emb_hid_Q = tf.Variable(tf.random_uniform( [dim_embed, dim_hidden], -0.1, 0.1), name='W_emb_hid_Q') self.b_emb_hid_Q = tf.Variable(tf.zeros([dim_hidden]), name='b_emb_hid_Q') self.W_emb_hid_A = tf.Variable(tf.random_uniform( [dim_embed, dim_hidden], -0.1, 0.1), name='W_emb_hid_A') self.b_emb_hid_A = tf.Variable(tf.zeros([dim_hidden]), name='b_emb_hid_Q') self.lstm_fw_Q = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm_bw_Q = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm_fw_A = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm_bw_A = rnn_cell.BasicLSTMCell(dim_hidden) self.W_Q_emb = tf.Variable(tf.random_uniform( [dim_hidden * 2, dim_embed], -0.1, 0.1), name='W_Q_emb') self.b_Q_emb = tf.Variable(tf.zeros([dim_embed]), name='b_Q_emb') self.W_A_emb = tf.Variable(tf.random_uniform( [dim_hidden * 2, dim_embed], -0.1, 0.1), name='W_A_emb') self.b_A_emb = tf.Variable(tf.zeros([dim_embed]), name='b_A_emb')
def initialize_model(self): self.keep_prob = tf.placeholder(tf.float32) sigma = 1e-3 #embeddings = tf.Variable(tf.convert_to_tensor(wv, dtype=tf.float32), name="Embedding") self.x = tf.placeholder(tf.float32, shape = (self.batch_size, wv_dim, self.num_steps)) self.y = tf.placeholder(tf.int32, shape = (self.batch_size, self.num_steps)) self.loan_amounts = tf.placeholder(tf.float32, shape = (self.batch_size, self.num_steps)) if self.num_steps > 1: inputs = map(tf.squeeze, tf.split(2, self.num_steps, self.x)) loans = tf.split(1, self.num_steps, self.loan_amounts) else: inputs = [self.x[:,:,0]] loans = [self.loan_amounts] filter_number_1 = 256 filter_number_2 = 144 cell1 = rnn_cell.BasicLSTMCell(filter_number_1, forget_bias=1.0, input_size = wv_dim) cell2 = rnn_cell.BasicLSTMCell(filter_number_2, forget_bias=1.0, input_size = filter_number_1) cell = rnn_cell.MultiRNNCell([cell1, cell2]) self.initial_state = cell.zero_state(self.batch_size, tf.float32) state = self.initial_state self.loss = 0 rnn_outputs = [] for idx, batch in enumerate(inputs): with tf.variable_scope("RNN") as scope: if idx > 0: scope.reuse_variables() wc3 = tf.get_variable("wc3", (filter_number_2 + 1, self.n_classes), initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32)) bc3 = tf.get_variable("bc3", (self.n_classes,), initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32)) output, state = cell(batch, state) pred = bc3 + tf.matmul(tf.concat(1, [loans[idx], output]), wc3) #pred = tf.matmul(output, wc3) + bc3 rnn_outputs.append(pred) self.previous_state = state self.output = tf.argmax(rnn_outputs[-1], 1) for i in range(len(inputs)): #print rnn_outputs[i].get_shape() self.loss += tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(rnn_outputs[i], self.y[:,i])) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
def __init__(self, vocab_size, batch_size, sequece_length, embedding_size, num_classes): self.hyperParam = {} self.hyperParam["hidden_num"] = 20 self.hyperParam["l2_lamda"] = 3; self.hyperParam["dropout_keep_prob"] = 0.5; l2_loss = tf.constant(0.0) self.dropout_keep_prob = 0.5 ##rnnCell = rnn_cell.BasicRNNCell(hidden_num) rnnCell = rnn_cell.BasicLSTMCell(self.hyperParam["hidden_num"], forget_bias=1.0) self.input_data = tf.placeholder(tf.int32, shape=[None, sequece_length], name = "input_data") self.weights = tf.placeholder(tf.int32, shape=[None, sequece_length], name= "weights") self.output_data = tf.placeholder(tf.int32, [None, sequece_length], name = "output_data") a = tf.shape(self.output_data)[0] #self.inputs = [] with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, embedding_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) #for i, v in enumerate(input_refine): # self.inputs.append(tf.nn.embedding_lookup(embedding, input_refine[i])) self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequece_length, inputs)] self.output, self.states = rnn.rnn(rnnCell, self.inputs, dtype=tf.float32) # Add dropout with tf.name_scope("dropout"): self.h_drop = [tf.nn.dropout(p, self.hyperParam["dropout_keep_prob"]) for p in self.output] predictions = []; with tf.name_scope("result"): W = tf.Variable(tf.truncated_normal([self.hyperParam["hidden_num"], num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) #output = tf.reshape(tf.concat(1, self.output), [-1, hidden_num]) output = tf.reshape(tf.concat(1, self.h_drop), [-1, self.hyperParam["hidden_num"]]) logits = tf.matmul(output, W) + b self.scores = logits #self.new_scores = [tf.squeeze(k, [1]) for k in tf.split(1, sequece_length, tf.reshape(logits, [-1, sequece_length ,num_classes]))] losses = 0; accuracy = [] with tf.name_scope("loss"): output_refine = tf.reshape(self.output_data, [-1]) #output_refine = tf.split(1, sequece_length, self.output_data) #weigth = tf.ones_like(output_refine, dtype="float32") weight = tf.reshape(tf.cast(self.weights, "float32"), [-1]) loss = seq2seq.sequence_loss_by_example([self.scores], [output_refine], [weight],num_classes); self.loss = tf.reduce_sum(loss)/tf.cast(a, "float32") + self.hyperParam["l2_lamda"]*l2_loss #self.accuracy = tf.reduce_mean(tf.cast(tf.concat(0, accuracy), "float")) with tf.name_scope("accurcy"): self.predictions = tf.argmax(tf.reshape(self.scores, [-1, sequece_length, num_classes]), 2) #self.kk = tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "int64") aa = tf.expand_dims(tf.reshape(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), [-1]), 0) bb = tf.expand_dims(tf.cast(tf.reshape(self.weights, [-1]), "float32"), 0) self.kk = tf.squeeze(tf.matmul(aa, bb, transpose_b=True))/tf.reduce_sum(tf.cast(self.weights, "float32"), [0,1]) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), name="accrucy")
def RNN(x, input_size, num_hidden): weights = { 'hidden': tf.Variable(tf.random_normal([input_size, num_hidden])), # Hidden layer weights 'out': tf.Variable(tf.random_normal([num_hidden, 1])) } biases = { 'hidden': tf.Variable(tf.random_normal([num_hidden])), 'out': tf.Variable(tf.random_normal([1])) } X_t = tf.transpose(x, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation X_r = tf.reshape(X_t, [-1, input_size]) # (n_steps*batch_size, n_input) X_m = tf.matmul(X_r, weights['hidden']) + biases['hidden'] X_s = tf.split(0, seq_len, X_m) # n_steps * (batch_size, n_hidden) lstm_cell = rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0) outputs, states = rnn.rnn( lstm_cell, X_s, dtype=tf.float32) #note that outputs is a list of seq_len return tf.matmul(outputs[-1], weights['out']) + biases[ 'out'] #each element is a tensor of size [batch_size,num_units]
def create_model(self): self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="input_data") self.target_data = tf.placeholder(tf.int32,[self.batch_size, self.seq_length], name="target_data") # define hyper_parameters self.keep_prob = tf.Variable(0.3, trainable=False, name='keep_prob') self.lr = tf.Variable(0.0, trainable=False, name="lr") softmax_weights = tf.get_variable("softmax_weights",[self.rnn_size, self.vocab_size]) softmax_biases = tf.get_variable("softmax_biases", [self.vocab_size]) lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size) # if self.is_training and self.keep_prob < 1: # lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob) multilayer_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) self.initial_state = multilayer_cell.zero_state(self.batch_size, tf.float32) with tf.device("/cpu:0"): # define the embedding matrix for the whole vocabulary self.embedding = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size]) # take the vector representation for each word in the embeddings embeds = tf.nn.embedding_lookup(self.embedding, self.input_data) if self.is_training and self.keep_prob < 1: embeds = tf.nn.dropout(embeds, self.keep_prob) def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_weights, softmax_biases) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.embedding, prev_symbol) #convert input to a list of seq_length inputs = tf.split(1,self.seq_length, embeds) #after splitting the shape becomes (batch_size,1,rnn_size). We need to modify it to [batch*rnn_size] inputs = [ tf.squeeze(input_, [1]) for input_ in inputs] output,states= seq2seq.rnn_decoder(inputs,self.initial_state, multilayer_cell, loop_function=loop if self.infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, output), [-1, self.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_weights, softmax_biases) self.probs = tf.nn.softmax(self.logits, name= "probability") loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.target_data, [-1])], [tf.ones([self.batch_size * self.seq_length])], self.vocab_size ) self.cost = tf.reduce_sum(loss) / ( self.batch_size * self.seq_length ) self.final_state= states[-1] tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),self.grad_clip) optimizer = tf.train.AdamOptimizer(0.01) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, config): lstm_cell = rnn_cell.BasicLSTMCell(config.n_hidden, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._train_op = tf.no_op() self._input_data = tf.placeholder(tf.int32, [config.batch_size]) _X = tf.matmul(self._input_data, tf.get_variable("weights_out", [ config.n_hidden, 1 ])) + tf.get_variable("bias_hidden", [config.n_hidden]) self._targets = tf.placeholder(tf.int32, [config.batch_size]) self._initial_state = cell.zero_state(config.batch_size, tf.float32) state = self._initial_state outputs, states = rnn.rnn(cell, self.input_data, tf.split(0, 1, _X), initial_state=state) pred = tf.matmul( outputs[-1], tf.get_variable("weights_hidden", [config.n_features, config.n_hidden ])) + tf.get_variable("weights_out", [1]) self._final_state = states[-1] self._cost = cost = tf.reduce_mean(tf.square(pred - self.targets)) #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) if not config.is_training: return optimizer = tf.train.GradientDescentOptimizer( learning_rate=config.learning_rate).minimize(cost) self._train_op = optimizer
def __init__(self, n_input=None, n_steps=None, n_output=None, n_char=None, n_train_batch=1, n_validation=1, n_test=None): # model hyperparametes self.n_input = n_input self.n_steps = n_steps self.n_output = n_output self.n_char = n_char # model parameters self.embeddings = tf.Variable( tf.random_uniform([n_char, n_input], -1.0, 1.0)) self.lstm_cell = rnn_cell.BasicLSTMCell(n_input, forget_bias=1.0) self.weights = { 'out': tf.Variable(tf.random_normal([n_input, n_output])) } self.biases = {'out': tf.Variable(tf.random_normal([n_output]))} # train, validation and test models self.model_train = Model(n_train_batch, self.weights, self.biases, self.lstm_cell) self.model_validation = Model(n_validation, self.weights, self.biases, self.lstm_cell) self.model_test = Model(n_test, self.weights, self.biases, self.lstm_cell)
def build_lstm_inner(H, lstm_input): ''' build lstm decoder ''' lstm_cell = rnn_cell.BasicLSTMCell(H['lstm_size'], forget_bias=0.0, state_is_tuple=False) if H['num_lstm_layers'] > 1: lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=False) else: lstm = lstm_cell batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer( -0.1, 0.1)): for time_step in range(H['rnn_len']): if time_step > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def predict_next_frame(H, lstm_input): lstm_cell = rnn_cell.BasicLSTMCell(832, forget_bias=0.0, state_is_tuple=False) if H['num_lstm_layers'] > 1: lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=False) else: lstm = lstm_cell batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer( -0.1, 0.1)): for i in range(9): if i > 0: tf.get_variable_scope().reuse_variables() input_data = tf.reshape(lstm_input[8 - i], [300, 832]) output, state = lstm(input_data, state) output = tf.reshape(output, [1, 15, 20, 832]) outputs.append(output) return outputs
def RNN(x, weights, biases, init_state): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) #(n_steps , batch_size, n_input) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden) # This input shape is required by `rnn` function x = tf.split(0, n_steps, x) ''' 个人觉得上面的三行代码是最难理解的,具体的reshape 的demo可以看1_Introduction中的basic_op. 最后转化成了每一副图像的第一行拿出来作为一个矩阵, 这样正好满足了[batch_size, cell.input_zise]的要求的格式, 具体的逻辑处理在rnn.rnn函数里边 ''' # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, x, initial_state=init_state, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out'], lstm_cell.state_size
def _shared_layer(input_data, config): """Build the model to decoding Args: input_data = size batch_size X num_steps X embedding size Returns: output units """ cell = rnn_cell.BasicLSTMCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) encoder_outputs, encoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="encoder_rnn") return encoder_outputs, initial_state
def __init__(self, vocab_size, size=256, depth=2, learning_rate=1e-4, batch_size=32, keep_prob=0.1, num_steps=100, checkpoint_dir="checkpoint", forward_only=False): """Initialize the parameters for an Deep Bidirectional LSTM model. Args: vocab_size: int, The dimensionality of the input vocab size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256] learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5] batch_size: int, The size of a batch [16, 32] keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2] num_steps: int, The max time unit [100] """ super(DeepBiLSTM, self).__init__() self.vocab_size = int(vocab_size) self.size = int(size) self.depth = int(depth) self.learning_rate = float(learning_rate) self.batch_size = int(batch_size) self.keep_prob = float(keep_prob) self.num_steps = int(seq_length) self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.num_steps]) self.input_lengths = tf.placeholder(tf.int64, [self.batch_size]) with tf.device("/cpu:0"): self.emb = tf.Variable(tf.truncated_normal( [self.vocab_size, self.size], -0.1, 0.1), name='emb') import ipdb ipdb.set_trace() self.embed_inputs = tf.nn.embedding_lookup( self.emb, tf.transpose(self.inputs)) self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth) self.initial_state = self.stacked_cell.zero_state( batch_size, tf.float32) if not forward_only and self.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) self.outputs, self.states = rnn.rnn(self.stacked_cell, tf.unpack(self.embed_inputs), dtype=tf.float32, sequence_length=self.input_lengths, initial_state=self.initial_state) output = tf.reduce_sum(tf.pack(self.output), 0)
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.BasicLSTMCell(dim_hidden) #self.encode_img_W = self.init_weight(dim_image, dim_hidden, name='encode_img_W') self.encode_img_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def testEmbeddingTiedRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): enc_inp = [ tf.constant(1, tf.int32, shape=[2]) for i in xrange(2) ] dec_inp = [ tf.constant(i, tf.int32, shape=[2]) for i in xrange(3) ] cell = rnn_cell.BasicLSTMCell(2) dec, mem = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp, cell, 5) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 5)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 4)) # Test externally provided output projection. w = tf.get_variable("proj_w", [2, 5]) b = tf.get_variable("proj_b", [5]) with tf.variable_scope("proj_seq2seq"): dec, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp, cell, 5, output_projection=(w, b)) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 2)) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [ tf.constant(0, tf.int32, shape=[2]) for _ in xrange(3) ] tf.get_variable_scope().reuse_variables() d1, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp, cell, 5, feed_previous=True) d2, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp2, cell, 5, feed_previous=True) d3, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp2, cell, 5, feed_previous=tf.constant(True)) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size size = config.n_hidden num_steps = config.num_steps self._input_data = tf.placeholder(tf.float32, (batch_size, config.num_steps)) self._targets = tf.placeholder(tf.float32, [batch_size, 1]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=2.8) # lstm_cell = rnn_cell.LSTMCell(size, 1) # cell = lstm_cell cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) self._train_op = tf.no_op() self._result = -1 weights_hidden = tf.constant( 1.0, shape=[config.num_features, config.n_hidden]) weights_hidden = tf.get_variable( "weights_hidden", [config.num_features, config.n_hidden]) inputs = [] for k in range(num_steps): nextitem = tf.matmul( tf.reshape(self._input_data[:, k], [config.batch_size, config.num_features]), weights_hidden) inputs.append(nextitem) outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) #output = tf.reshape(tf.concat(1, outputs), [-1, config.n_hidden]) #pred = tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size]) #pred = tf.matmul(output, tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) pred = tf.sigmoid( tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden, 1])) + tf.get_variable("bias_out", [1])) self._pred = pred self._final_state = states[-1] self._cost = cost = tf.square((pred[:, 0] - self.targets[:, 0])) self._result = tf.abs(pred[0, 0] - self.targets[0, 0]) # self._cost = cost = tf.abs(pred[0, 0] - self.targets[0,0]) if not config.is_training: return #optimizer = tf.train.GradientDescentOptimizer(learning_rate = config.learning_rate).minimize(cost) optimizer = tf.train.AdamOptimizer().minimize(cost) self._train_op = optimizer print("top ", self._train_op)
def __init__(self, rnn_size, num_layers, vocab_size, grad_clip, batch_size=1, seq_length=1): cell = rnn_cell.BasicLSTMCell(rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers) self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length]) self.targets = tf.placeholder(tf.int32, [batch_size, seq_length]) self.initial_state = cell.zero_state(batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable('softmax_w', [rnn_size, vocab_size]) softmax_b = tf.get_variable('softmax_b', [vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [vocab_size, rnn_size]) inputs = tf.split( 1, seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) train = batch_size == 1 and seq_length == 1 loop_fn = loop if train else None outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop_fn, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * seq_length])], vocab_size) self.cost = tf.reduce_sum(loss) / batch_size / seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def BiLSTMgraph(self, _X, _C, _T, _istate_fw, _istate_bw, _weights, _biases): # input: a [len_sent,len_seq] (e.g. 7x5) # transform into embeddings if _T: emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X) emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C) emb_t = tf.nn.embedding_lookup(self._weights['t_emb'], _T) # Linear activation _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul( emb_c, self._weights['hidden_c']) + tf.matmul( emb_t, self._weights['hidden_t']) + self._biases['hidden_b'] else: emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X) emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C) # Linear activation _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul( emb_c, self._weights['hidden_c']) + self._biases['hidden_b'] # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0) lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=0.5) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0) lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=0.5) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, self.sent_max_len, _X) # Get lstm cell output outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, initial_state_fw=self.istate_fw, initial_state_bw=self.istate_bw, sequence_length=self.seq_len) return outputs
def lstm_model(_weights, _biases, _Wemb, _config): _image = tf.placeholder(tf.float32, [_config.batch_size, _config.dim_image]) _sentence = tf.placeholder(tf.int32, [_config.batch_size, _config.maxlen + 2]) _mask = tf.placeholder(tf.float32, [_config.batch_size, _config.maxlen + 2]) lstm = rnn_cell.BasicLSTMCell(_config.dim_hidden) image_emb = tf.matmul(_image, _weights['encoding_img_W']) + _biases[ 'encoding_img_b'] # (batch_size, dim_hidden) state = tf.zeros([_config.batch_size, lstm.state_size]) _loss = 0.0 with tf.variable_scope("RNN"): for i in range(_config.maxlen + 2): # maxlen + 1 if i == 0: current_emb = image_emb else: with tf.device("/cpu:0"): current_emb = tf.nn.embedding_lookup( _Wemb, _sentence[:, i - 1]) + _biases['bemb'] if i > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(current_emb, state) # (batch_size, dim_hidden) if i > 0: labels = tf.expand_dims(_sentence[:, i], 1) # (batch_size) indices = tf.expand_dims(tf.range(0, _config.batch_size, 1), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([_config.batch_size, _config.n_words]), 1.0, 0.0) # (batch_size, n_words) logit_words = tf.matmul( output, _weights['embed_word_W']) + _biases[ 'embed_word_b'] # (batch_size, n_words) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logit_words, onehot_labels) cross_entropy = cross_entropy * _mask[:, i] #tf.expand_dims(mask, 1) current_loss = tf.reduce_sum(cross_entropy) _loss = _loss + current_loss _loss = _loss / tf.reduce_sum(_mask[:, 1:]) return _loss, _image, _sentence, _mask
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, bias_init_vector = None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name = 'Wemb') self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden) self.encode_image_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name = 'encode_image_W') self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]), name = 'encode_image_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name = 'embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name = 'embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name = 'embed_word_b')
def __init__(self, config, is_training): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=config.keep_prob) self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.target_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.initial_state = cell.zero_state(batch_size=config.batch_size, dtype=tf.float32) inputs = tf.split(1, num_steps, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] with tf.variable_scope('rnnvm'): output_w = tf.get_variable("output_w", [size, 1]) output_b = tf.get_variable("output_b", [1]) outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, scope='rnnvm') output = tf.reshape(tf.concat(1, outputs), [-1, size]) output = tf.nn.xw_plus_b(output, output_w, output_b) entropy = tf.nn.sigmoid_cross_entropy_with_logits( output, tf.reshape(self.target_data, shape=[num_steps * batch_size, 1])) self.cost = cost = tf.reduce_mean(entropy) self.final_state = states[-1] if not is_training: return self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def build_lstm_inner(lstm_input, H): lstm_size = H['arch']['lstm_size'] lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=0.0) batch_size = H['arch']['batch_size'] * H['arch']['grid_height'] * H['arch']['grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN'): for time_step in range(H['arch']['rnn_len']): if time_step > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def rnn_model(X, init_state, lstm_size, slicing_tensors): # X, input shape: (batch_size, input_vec_size, time_step_size) # print "X shape", X.get_shape().as_list() XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size # XT shape: (input_vec_size, batch_szie, time_step_size) # print "XT shape", XT.get_shape().as_list() XR = tf.reshape( XT, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size) # XR shape: (input vec_size, batch_size) # print sess.run(num_steps) # print "XR shape", XR.get_shape().as_list() X_split = tf.split(0, n_lstm_steps, XR) # split them to time_step_size (28 arrays) # Each array shape: (batch_size, input_vec_size) # print "X_split" # print len(X_split) # print X_split # Make lstm with lstm_size (each input vector size) lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0) # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size) outputs, _states = rnn.rnn(lstm, X_split, initial_state=init_state) # print "outputs", outputs[0].get_shape() outputs = tf.reshape(tf.concat(0, outputs), [n_lstm_steps, batch_size, dim_hidden]) # Linear activation is NOT REQUIRED!! # Get the last output. # print "outputs" # print len(outputs) # print outputs # Slicing the appropriate output vectors from the <outputs> # sliced_outputs = [tf.slice(outputs[break_points[i]-1], slicing_lengths[i][0], slicing_lengths[i][1]) for i in range(batch_size)] slicing_tensors = [ tf.squeeze(tsr) for tsr in tf.split(0, batch_size, slicing_tensors) ] # print "slicing_tensors", slicing_tensors[0].get_shape() sliced_outputs = [ tf.slice(outputs, begin=tensor, size=[1, 1, dim_hidden]) for tensor in slicing_tensors ] # for begin,size in slicing_lengths: # print tf.slice(outputs, begin, size) # return outputs[-1], lstm.state_size # State size to initialize the state # return tf.squeeze(tf.concat(0, sliced_outputs)), lstm.state_size return sliced_outputs, lstm.state_size
def _chunk_private(encoder_units, pos_prediction, config): """Decode model for chunks Args: encoder_units - these are the encoder units: [batch_size X encoder_size] with the one the pos prediction pos_prediction: must be the same size as the encoder_size returns: logits """ # concatenate the encoder_units and the pos_prediction pos_prediction = tf.reshape( pos_prediction, [batch_size, num_steps, pos_embedding_size]) chunk_inputs = tf.concat(2, [pos_prediction, encoder_units]) with tf.variable_scope("chunk_decoder"): cell = rnn_cell.BasicLSTMCell(config.chunk_decoder_size, forget_bias=1.0) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) initial_state = cell.zero_state(config.batch_size, tf.float32) # this function puts the 3d tensor into a 2d tensor: batch_size x input size inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, chunk_inputs) ] decoder_outputs, decoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="chunk_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, config.chunk_decoder_size]) softmax_w = tf.get_variable( "softmax_w", [config.chunk_decoder_size, config.num_chunk_tags]) softmax_b = tf.get_variable("softmax_b", [config.num_chunk_tags]) logits = tf.matmul(output, softmax_w) + softmax_b return logits, decoder_states
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.float32, [batch_size, num_steps]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) iw = tf.get_variable("input_w", [1, size]) ib = tf.get_variable("input_b", [size]) inputs = [ tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data) ] if is_training and config.keep_prob < 1: inputs = [ tf.nn.dropout(input_, config.keep_prob) for input_ in inputs ] outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size]) self._output = output = tf.nn.xw_plus_b( rnn_output, tf.get_variable("out_w", [size, 1]), tf.get_variable("out_b", [1])) self._cost = cost = tf.reduce_mean( tf.square(output - tf.reshape(self._targets, [-1]))) self._final_state = states[-1] if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) #optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))