def encoder(self): with tf.variable_scope("encoder") as encoder_scope: encoder_w_in = self._weight_variable( [self.input_dim, self.hidden_size], name='encoder_w_in') encoder_b_in = self._bias_variable([ self.hidden_size, ], name='encoder_b_in') encoder_inputs_2d = tf.reshape( self.encoder_inputs, [self.batch_size * self.max_time, self.input_dim]) encoder_cell_inputs = tf.nn.relu( tf.add(tf.matmul(encoder_inputs_2d, encoder_w_in), encoder_b_in)) encoder_cell_inputs_3d = tf.reshape( encoder_cell_inputs, [self.batch_size, self.max_time, self.hidden_size]) encoder_fw_cells = [] encoder_bw_cells = [] for i in range(self.num_layers): with tf.variable_scope('encoder_lstm_{}'.format(i)): encoder_fw_cells.append( rnn_cell.DropoutWrapper( cell=rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True), input_keep_prob=1.0, output_keep_prob=self.output_keep_prob)) encoder_bw_cells.append( rnn_cell.DropoutWrapper( cell=rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True), input_keep_prob=1.0, output_keep_prob=self.output_keep_prob)) encoder_muti_fw_cell = rnn_cell.MultiRNNCell(encoder_fw_cells) encoder_muti_bw_cell = rnn_cell.MultiRNNCell(encoder_bw_cells) (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_muti_fw_cell, cell_bw=encoder_muti_bw_cell, inputs=encoder_cell_inputs_3d, #sequence_length=self.sequence_length, dtype=tf.float32, time_major=False) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) #encoder_final_state_c = tf.concat( # (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) #encoder_final_state_h = tf.concat( # (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) #encoder_final_state = tf.contrib.rnn.LSTMStateTuple( # c=encoder_final_state_c, # h=encoder_final_state_h #) return encoder_outputs
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, enc_timesteps, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) self.enc_timesteps = np.int(enc_timesteps) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform( [n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.lstm = rnn_cell.DropoutWrapper(self.lstm, input_keep_prob=1) self.lstm = rnn_cell.MultiRNNCell([self.lstm ]) self.back_lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.back_lstm = rnn_cell.DropoutWrapper(self.back_lstm, input_keep_prob=1) self.back_lstm = rnn_cell.MultiRNNCell([self.back_lstm]) self.encode_img_W = tf.Variable(tf.random_uniform( [dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform( [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable( bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def model(self): cells = [] for i in range(1, len(self.layers_size) - 1): if self.cell_type == 0: cell = rnn_cell.BasicLSTMCell(self.layers_size[i]) elif self.cell_type == 1: cell = rnn_cell.BasicRNNCell(self.layers_size[i]) elif self.cell_type == 2: cell = rnn_cell.GRUCell(self.layers_size[i]) cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) cells.append(cell) multilayer_cell = rnn_cell.MultiRNNCell(cells) multilayer_cell = rnn_cell.DropoutWrapper( multilayer_cell, output_keep_prob=self.keep_prob) output, state = tf.nn.dynamic_rnn(multilayer_cell, self.input_tensor, dtype=tf.float32) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) # This may be a bottleneck (memory) last_weights = tf.Variable( tf.random_normal([self.layers_size[-2], self.layers_size[-1]])) if self.enable_bias: bias = tf.Variable(tf.random_normal(([self.layers_size[-1]]))) return tf.nn.softmax(tf.matmul(last, last_weights) + bias) return tf.nn.softmax(tf.matmul(last, last_weights))
def compute_states(self,emb): def unpack_sequence(tensor): return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2])) with tf.variable_scope("Composition",initializer= tf.contrib.layers.xavier_initializer(),regularizer= tf.contrib.layers.l2_regularizer(self.reg)): cell_fw = rnn_cell.LSTMCell(self.hidden_dim) cell_bw = rnn_cell.LSTMCell(self.hidden_dim) #tf.cond(tf.less(self.dropout #if tf.less(self.dropout, tf.constant(1.0)): cell_fw = rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) cell_bw=rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32) outputs,_,_=rnn.bidirectional_rnn(cell_fw,cell_bw,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32) #output = pack_sequence(outputs) sum_out=tf.reduce_sum(tf.stack(outputs),[0]) sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1)) final_state=sent_rep return final_state
def setup_encoder(self): # encoder的设置 with vs.variable_scope("Encoder"): # 在encoder的作用域下 inp = tf.nn.dropout( self.encoder_inputs, self.keep_prob ) # 对encoder进行dropout dropout率为 keep_prob -> 减少过拟合 fw_cell = rnn_cell.GRUCell(self.size) # GRU单元 fw_cell = rnn_cell.DropoutWrapper( fw_cell, output_keep_prob=self.keep_prob) # 对单元进行dropout self.encoder_fw_cell = rnn_cell.MultiRNNCell( # 创建多层RNN的函数 encoder 前向单元 [fw_cell] * self.num_layers, state_is_tuple=True) # 设置multi-rnn cell bw_cell = rnn_cell.GRUCell(self.size) # 设置size大小的GRU单元 bw_cell = rnn_cell.DropoutWrapper( # 根据dropout率,随机在抛弃GRU中计算的数据 bw_cell, output_keep_prob=self.keep_prob) self.encoder_bw_cell = rnn_cell.MultiRNNCell( # 设置 encoder 反向单元 [bw_cell] * self.num_layers, state_is_tuple=True) out, _ = rnn.bidirectional_dynamic_rnn( self.encoder_fw_cell, # 设置动态双向RNN self.encoder_bw_cell, inp, self.src_len, dtype=tf.float32, time_major=True, initial_state_fw=self.encoder_fw_cell.zero_state( self.batch_size, dtype=tf.float32), # 状态全部初始化为0 initial_state_bw=self.encoder_bw_cell.zero_state( self.batch_size, dtype=tf.float32)) out = tf.concat([out[0], out[1]], axis=2) # 把 1 和 2拼接起来 self.encoder_output = out
def build_nmt_multicell_rnn(num_layers_encoder, num_layers_decoder, encoder_size, decoder_size, source_proj_size, use_lstm=True, input_feeding=True, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) encoder_cell = cell_class(num_units=encoder_size, input_size=source_proj_size, initializer=initializer) if input_feeding: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size * 2, initializer=initializer) else: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) # if dropout > 0.0: # if dropout is 0.0, it is turned off encoder_cell = rnn_cell.DropoutWrapper(encoder_cell, output_keep_prob=1.0 - dropout) encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] * num_layers_encoder) decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0, output_keep_prob=1.0 - dropout) if num_layers_decoder > 1: decoder_cell1 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1, output_keep_prob=1.0 - dropout) decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] + [decoder_cell1] * (num_layers_decoder - 1)) else: decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0]) return encoder_rnncell, decoder_rnncell
def recurrent_neural_network(x, keep_prob): # Bidirectional LSTM; needs layer = {'weights': tf.Variable(tf.random_normal([2*rnn_size ,n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True, forget_bias=1.0) lstm_fw_cell = rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=keep_prob) lstm_fw_cell = rnn_cell.MultiRNNCell([lstm_fw_cell] * num_layers) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True, forget_bias=1.0) lstm_bw_cell = rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=keep_prob) lstm_bw_cell = rnn_cell.MultiRNNCell([lstm_bw_cell] * num_layers) # Get lstm cell output #try: outputs, states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32, sequence_length=length(x)) #sequence_length=early_stop) #except Exception: # Old TensorFlow version only returns outputs not states # outputs = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, # dtype=tf.float32, sequence_length=early_stop) output_fw, output_bw = outputs last = last_relevant(output_fw, length(x)) first = last_relevant(output_fw, length(x)) return tf.matmul(tf.concat(1,[first,last]) , layer['weights']) + layer['biases']
def RNN(x, is_training, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_time_step, x) lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8) lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8) if is_training and keep_prob < 1: lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1, output_keep_prob=keep_prob) lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2]) #if is_training and keep_prob < 1: # x = tf.nn.dropout(x,keep_prob) #initial_state = cell.zero_state(batch_size,tf.float32) #state = initial_state output = [] output, states = rnn.rnn(cell, x, dtype=tf.float32) #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2]) #maybe a softmax return tf.matmul(output[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) state = cell.zero_state(batch_size, dtype=tf.float32) cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=0.7) cell = rnn_cell.MultiRNNCell([cell] * 3, state_is_tuple=True) cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=0.7) outputs, states = rnn.rnn(cell, x, dtype=tf.float32) return (tf.matmul(outputs[19], weights['out']) + biases['out'])
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # Before-LSTM-embedding self.embed_BLSTM_Q_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_Q_W') self.embed_BLSTM_A_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_A_W') # encoder: RNN body self.lstm_1_q = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_q = rnn_cell.DropoutWrapper(self.lstm_1_q, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_q = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_q = rnn_cell.DropoutWrapper(self.lstm_2_q, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_q = rnn_cell.MultiRNNCell([self.lstm_dropout_1_q, self.lstm_dropout_2_q],state_is_tuple=False) self.lstm_1_a = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_a = rnn_cell.DropoutWrapper(self.lstm_1_a, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_a = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_a = rnn_cell.DropoutWrapper(self.lstm_2_a, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_a = rnn_cell.MultiRNNCell([self.lstm_dropout_1_a, self.lstm_dropout_2_a],state_is_tuple=False) # question-embedding W1 self.embed_Q_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_Q_W') self.embed_Q_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_Q_b') # Answer-embedding W3 self.embed_A_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_A_W') self.embed_A_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_A_b') # image-embedding W2 self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # score-embedding W4 #self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') #self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') # QI-embedding W3 self.embed_QI_W = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.08, 0.08), name='embed_QI_W') self.embed_QI_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_QI_b')
def sentence_embedding(self, inputs, keep_prob, w): with tf.device('/cpu:0'): embedding_layer = tf.nn.embedding_lookup(w['word_embedding_w'],inputs) # batch_size x max_len x word_embedding cell_input = tf.transpose(embedding_layer,[1,0,2]) cell_input = tf.reshape(cell_input,[-1,self.hiddensize]) cell_input = tf.split(0,self.max_len,cell_input) with tf.variable_scope('forward'): lstm_fw_cell = rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(self.rnnsize,forget_bias=1.0,state_is_tuple=True),input_keep_prob=keep_prob,output_keep_prob=keep_prob) with tf.variable_scope('backward'): lstm_bw_cell = rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(self.rnnsize,forget_bias=1.0,state_is_tuple=True),input_keep_prob=keep_prob,output_keep_prob=keep_prob) outputs,_,_ = rnn.bidirectional_rnn(lstm_fw_cell,lstm_bw_cell,cell_input,dtype=tf.float32) # outputs shape: seq_len x [batch_size x (fw_cell_size + bw_cell_size)] att = self.attention_layer(outputs,w) return att
def __init__(self, params, unigram_probs, context_vocab_sizes, use_nce_loss=True): super(HyperModel, self).__init__(params, unigram_probs, context_vocab_sizes=context_vocab_sizes) self.hash_func = None # setup the hash table if params.use_hash_table: self.hash_func = self.GetHashFunc(params) context_embeds = None if params.use_mikolov_adaptation or params.use_hyper_adaptation: context_embeds = self.final_context_embed self.cell = HyperCell(params.cell_size, context_embeds, mikolov_adapt=params.use_mikolov_adaptation, hyper_adapt=params.use_hyper_adaptation) regularized_cell = rnn_cell.DropoutWrapper( self.cell, output_keep_prob=self.dropout_keep_prob, input_keep_prob=self.dropout_keep_prob) self.linear_proj = tf.get_variable( 'linear_proj', [params.cell_size, params.embedding_dims]) outputs, self.zz = tf.nn.dynamic_rnn(regularized_cell, self._inputs, dtype=tf.float32, sequence_length=self.seq_len) self.outputs = outputs reshaped_outputs = tf.reshape(outputs, [-1, params.cell_size]) projected_outputs = tf.matmul(reshaped_outputs, self.linear_proj) self.OutputHelper(projected_outputs, params, use_nce_loss=use_nce_loss, hash_func=self.hash_func) self.CreateDecodingGraph(params)
def apply_dropout( cell, input_keep_probability, output_keep_probability, random_seed=None): """Apply dropout to the outputs and inputs of `cell`. Args: cell: An `RNNCell`. input_keep_probability: Probability to keep inputs to `cell`. If `None`, no dropout is applied. output_keep_probability: Probability to keep outputs to `cell`. If `None`, no dropout is applied. random_seed: Seed for random dropout. Returns: An `RNNCell`, the result of applying the supplied dropouts to `cell`. """ input_prob_none = input_keep_probability is None output_prob_none = output_keep_probability is None if input_prob_none and output_prob_none: return cell if input_prob_none: input_keep_probability = 1.0 if output_prob_none: output_keep_probability = 1.0 return rnn_cell.DropoutWrapper( cell, input_keep_probability, output_keep_probability, random_seed)
def build_model(self): ''' build model ''' self._x = tf.placeholder(tf.int32, [self.batch_size], name='input') self._y = tf.placeholder(tf.int32, [self.batch_size], name='output') self.state = [tf.placeholder(tf.float32, [self.batch_size, \ self.rnn_size], name='rnn_state') for _ in range(self.layers)] self.global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('gru_layer'): sigma = self.sigma if self.sigma != 0 else np.sqrt(6.0 / (self.n_items + self.rnn_size)) if self.init_as_normal: initializer = tf.random_normal_initializer(mean=0, stddev=sigma) else: initializer = tf.random_uniform_initializer(minval=-sigma, maxval=sigma) embedding = tf.get_variable('embedding', [self.n_items, \ self.rnn_size], initializer=initializer) softmax_w = tf.get_variable('softmax_w', [self.n_items, \ self.rnn_size], initializer=initializer) softmax_b = tf.get_variable('softmax_b', [self.n_items], \ initializer=tf.constant_initializer(0.0)) cell = rnn_cell.GRUCell(self.rnn_size, activation=self.hidden_act) drop_cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.dropout_p_hidden) stacked_cell = rnn_cell.MultiRNNCell([drop_cell] * self.layers) inputs = tf.nn.embedding_lookup(embedding, self._x) output, state = stacked_cell(inputs, tuple(self.state)) self.final_state = state if self.is_training: # Use other examples of the minibatch as negative samples. sampled_w = tf.nn.embedding_lookup(softmax_w, self._y) sampled_b = tf.nn.embedding_lookup(softmax_b, self._y) logits = tf.matmul(output, sampled_w, transpose_b=True) + sampled_b self.yhat = self.final_activation(logits) self.cost = self.loss_function(self.yhat) else: logits = tf.matmul(output, softmax_w, transpose_b=True) + softmax_b self.yhat = self.final_activation(logits) if not self.is_training: return self._lr = tf.maximum(1e-5, tf.train.exponential_decay(\ self.learning_rate, self.global_step, self.decay_steps, \ self.decay, staircase=True)) #Try different optimizers. #optimizer = tf.train.AdagradOptimizer(self._lr) optimizer = tf.train.AdamOptimizer(self._lr) #optimizer = tf.train.AdadeltaOptimizer(self._lr) #optimizer = tf.train.RMSPropOptimizer(self._lr) tvars = tf.trainable_variables() gvs = optimizer.compute_gradients(self.cost, tvars) if self.grad_cap > 0: capped_gvs = [(tf.clip_by_norm(grad, self.grad_cap), var) for grad, var in gvs] else: capped_gvs = gvs self.train_op = optimizer.apply_gradients(capped_gvs, global_step=self.global_step)
def neural_network(model='lstm', rnn_size=128, num_layers=2,keep_prob=0.5): if model == 'rnn': cell_fun = rnn_cell.BasicRNNCell elif model == 'gru': cell_fun = rnn_cell.GRUCell elif model == 'lstm': cell_fun = rnn_cell.BasicLSTMCell cell = cell_fun(rnn_size) cell=rnn_cell.DropoutWrapper(cell,output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([cell] * num_layers) initial_state = cell.zero_state(batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [rnn_size, datalen + 1]) softmax_b = tf.get_variable("softmax_b", [datalen + 1]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [datalen + 1, rnn_size]) inputs = tf.nn.embedding_lookup(embedding, input_data) outputs, last_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, scope='rnnlm') output = tf.reshape(outputs, [-1, rnn_size]) logits = tf.matmul(output, softmax_w) + softmax_b probs = tf.nn.softmax(logits) return logits, last_state, probs, cell, initial_state,inputs
def h_rnn(input): i = 0 num_layer = 0 layer = [input] while True: print(num_layer) layer.append([]) _input = layer[num_layer] length = len(_input) with tf.variable_scope("RNN_" + str(num_layer)) as scope: cell = rnn_cell.BasicLSTMCell(self.dim) cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=self.keep_prob) stacked_cell = rnn_cell.MultiRNNCell([cell] * self.number_of_layers) i = 0 while i < length: state = _rnn(stacked_cell, _input[i:min(i + self.seg_len, length)]) layer[num_layer + 1].append(state) scope.reuse_variables() i += self.seg_len num_layer += 1 if length <= self.seg_len: break return layer[num_layer][0]
def model(self): print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states gru = rnn_cell.GRUCell(self.num_hid_units) gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1) if self.num_hid_layers > 1: gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers) outputs, state = rnn.rnn(gru, x, dtype=tf.float32) # Turn result back into [batch_size, steps, hidden_units] format. outputs = tf.transpose(outputs, [1, 0, 2]) # Flatten into [batch_size x steps, hidden_units] to allow matrix # multiplication outputs = tf.reshape(outputs, [-1, self.num_hid_units]) # Apply affine transformation to reshape output [batch_size x steps, 1] y1 = tf.matmul(outputs, self.weights_H2O) + self.bias_H2O y1 = tf.reshape(y1, [-1, STEPS]) # Keep prediction (sigmoid applied) and non-sigmoid (apply sigmoid in # cost function) y_ns = y1[:, :783] y_pred = tf.sigmoid(y1)[:, :783] return y_ns, y_pred
def model(self): """ Builds the Tensorflow graph :return: """ print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states lstm = rnn_cell.LSTMCell(self.num_hid_units) lstm = rnn_cell.DropoutWrapper(lstm, output_keep_prob=1) outputs, state = rnn.rnn(lstm, x, dtype=tf.float32) # First affine-transformation - output from last input y1 = tf.matmul(outputs[-1], self.weights_H2L) + self.bias_H2L y2 = tf.nn.relu(y1) y_pred = tf.matmul(y2, self.weights_L2O) + self.bias_L2O return y_pred
def lstm(self): """ prepare the input shape for the lstm, the oringinal shape is (batch_size, seq_size, input_dim) but must be transformed to a tensor list with the length seq_size, of the shape (batch_size, input_dim) must copy self.X to a new tensor X """ X = self.X #permute batch_size and seq_size X = tf.transpose( X, [1, 0, 2 ]) #the [1] becomes [0], [0] becomes [1], [2] stays the same #reshape to (seq_size*batch_size, input_dim) X = tf.reshape(X, [-1, self.input_dim]) #split the list of tensors X = tf.split(X, self.seq_size) #create lstm and add dropout lstm_cell = rnn_cell.LSTMCell(self.hidden_dim, use_peepholes=True) lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) outputs, states = rnn.dynamic_rnn(lstm_cell, X, dtype=tf.float32) output = tf.matmul( outputs[-1], self.weights) + self.biases #the last output and process return output
def recurrent_neural_network(x, keep_prob): layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(0, feature_dim, x) #GRU gru_cell = rnn_cell.GRUCell(rnn_size) gru_cell = rnn_cell.DropoutWrapper(gru_cell, output_keep_prob=keep_prob) gru_cell = rnn_cell.MultiRNNCell([gru_cell] * num_layers) outputs, states = rnn.rnn(gru_cell, x, dtype=tf.float32) '''' # Standard LSTM: #lstm_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True) #lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) #lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers) #outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) ''' ''' # Bidirectional LSTM; needs layer['weights'] = tf.Variable(tf.random_normal([2*rnn_size,n_classes])) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size, forget_bias=1.0) # Get lstm cell output try: outputs, states, extras = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], layer['weights']) + layer['biases'] #outputs, states = tf.nn.bidirectional_dynamic_rnn( # cell_fw=lstm_cell, # cell_bw=lstm_cell, # dtype=tf.float32, # #sequence_length=X_lengths, # inputs=x) #output_fw, output_bw = outputs #states_fw, states_bw = states ''' output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def __init__(self, param): self.param = param # tf Graph input self.x = tf.placeholder("float", [None, self.param.n_steps, self.param.n_input]) self.y = tf.placeholder("float", [None, self.param.n_classes]) self.xx = self.x # Define weights weights = { 'out': tf.Variable( tf.random_normal([self.param.n_hidden, self.param.n_classes])) } biases = {'out': tf.Variable(tf.random_normal([self.param.n_classes]))} # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps self.x = tf.transpose(self.x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) self.x = tf.reshape(self.x, [-1, self.param.n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) self.x = tf.split(0, self.param.n_steps, self.x) # Define a lstm cell with tensorflow #lstm_cell = rnn_cell.LSTMCell(self.param.n_hidden, forget_bias=self.param.forget_bias, activation=tf.nn.relu) lstm_cell = rnn_cell.BasicLSTMCell(self.param.n_hidden, forget_bias=self.param.forget_bias) lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.5, output_keep_prob=0.5) #cell = rnn_cell.MultiRNNCell([lstm_cell] ) # Get lstm cell output outputs, states = tf.nn.rnn(lstm_cell, self.x, dtype=tf.float32) #activate function pred = tf.matmul(outputs[-1], weights['out'] + biases['out']) # Define loss and optimizer self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(pred, self.y)) self.optimizer = tf.train.AdadeltaOptimizer( learning_rate=self.param.learning_rate).minimize(self.cost) #self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.param.learning_rate).minimize(self.cost) # Evaluate model correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #probability and classifer collection self.proba_collection = tf.nn.softmax(pred) #tf.argmax(pred,1) self.classifier_collection = tf.argmax(pred, 1) # Initializing the variables self.init = tf.global_variables_initializer()
def bi_rnn(self, inputs, scope=None): with tf.variable_scope(scope or 'BiRNN'): fw_cells = tf.nn.rnn_cell.LSTMCell(hidden_units) bw_cells = tf.nn.rnn_cell.LSTMCell(hidden_units) fw_cells = rnn_cell.DropoutWrapper(fw_cells, output_keep_prob=1 - self.dropout_rate) bw_cells = rnn_cell.DropoutWrapper(bw_cells, output_keep_prob=1 - self.dropout_rate) rnn_outputs, _ = rnn.bidirectional_dynamic_rnn(cell_fw=fw_cells, cell_bw=bw_cells, inputs=inputs, dtype=tf.float32) H = tf.concat(rnn_outputs, axis=2) # 2 * hidden_units return H
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # from tensorflow.models.rnn import rnn # inputs = [tf.squeeze(input_, [1]) # for input_ in tf.split(1, num_steps, inputs)] # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) outputs = [] states = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) states.append(state) output = tf.reshape(tf.concat(1, outputs), [-1, size]) logits = tf.nn.xw_plus_b( output, tf.get_variable("softmax_w", [size, vocab_size]), tf.get_variable("softmax_b", [vocab_size])) loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) self._cost = tf.reduce_sum(loss) / batch_size self._final_state = states[-1]
def add_lstm_cell(self,lstm_l_in_y): lstm_cell = rnn_cell.BasicLSTMCell(self.lstm_cell_size, forget_bias=1.0, state_is_tuple=True) lstm_cell = rnn_cell.DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob) mlstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.lstm_layer_num, state_is_tuple=True) lstm_cell_outputs, lstm_cell_final_state = tf.nn.dynamic_rnn( mlstm_cell, lstm_l_in_y, dtype=tf.float32,sequence_length=self.batch_size, time_major=False) return lstm_cell_outputs, lstm_cell_final_state
def RNN(x, weight, bias): cell = rnn_cell.BasicLSTMCell(n_hidden, state_is_tuple=True) cell = rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=0.75) #cell = rnn_cell.MultiRNNCell([cell] * 2) # print np.shape(x),cell output, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) return tf.nn.softmax(tf.matmul(last, weight) + bias)
def RNN(x, weights: dict, biases: dict): lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # 初始化全零 state init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) # 如果 inputs 为 (batches, steps, inputs) ==> time_major=False # 如果 inputs 为 (inputssteps, batches, inputs) ==> time_major=True outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, initial_state=init_state, time_major=False) # 把 outputs 变成 列表 [(batch, outputs)..] * steps outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2])) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weight, bias): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) cell = rnn.BasicRNNCell(num_hidden) cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=0.5) # cell = rnn_cell.MultiRNNCell([cell] * 3) outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weight) + bias
def build(self, inputs, keep_prob, n_classes, word_embedding): inputs = tf.transpose(inputs,[1,0,2]) inputs = tf.reshape(inputs,[-1,self.max_len]) inputs = tf.split(0, self.max_sen, inputs) variable_dict = { "word_embedding_w": tf.get_variable(name="word_embedding",shape=[self.vocabsize,self.hiddensize],initializer=tf.constant_initializer(word_embedding),trainable=True), "attention_w" : tf.get_variable(name="word_attention_weights",shape=[2*self.rnnsize,2*self.rnnsize]), "attention_b" : tf.get_variable(name="word_attention_bias",shape=[2*self.rnnsize]), "attention_c" : tf.get_variable(name="word_attention_context",shape=[2*self.rnnsize,1]), } sent_embeddings = [] with tf.variable_scope("embedding_scope") as scope: for x in inputs: embedding = self.sentence_embedding(x,keep_prob,variable_dict) sent_embeddings.append(embedding) scope.reuse_variables() with tf.variable_scope('forward'): lstm_fw_cell = rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(self.docsize,forget_bias=1.0,state_is_tuple=True),input_keep_prob=keep_prob,output_keep_prob=keep_prob) with tf.variable_scope('backward'): lstm_bw_cell = rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(self.docsize,forget_bias=1.0,state_is_tuple=True),input_keep_prob=keep_prob,output_keep_prob=keep_prob) outputs, _ , _ = rnn.bidirectional_rnn(lstm_fw_cell,lstm_bw_cell,sent_embeddings,dtype=tf.float32) atten_variable_dict = { "attention_w" : tf.get_variable(name="sent_attention_weights", shape=[2*self.docsize,2*self.docsize]), "attention_b" : tf.get_variable(name="sent_attention_bias", shape=[2*self.docsize]), "attention_c" : tf.get_variable(name="sent_attention_context", shape=[2*self.docsize,1]), } att = self.attention_layer(outputs,atten_variable_dict) # full connected layer W = tf.get_variable("fullconnect_weights",shape=[2 * self.docsize,n_classes]) B = tf.get_variable("fullconnect_bias",shape=[n_classes]) output = tf.add(tf.matmul(att,W),B,name="output") return output
def _rnn(inputs, reverse=False): with tf.variable_scope("GRU_RNN") as scope: cell = rnn_cell.GRUCell(self.w2v_dim) cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=self.dropout_input) stacked_cell = rnn_cell.MultiRNNCell([cell] * self.number_of_layers) state = stacked_cell.zero_state(self.batch_size, tf.float32) if reverse: inputs = reversed(inputs) for time, input_ in enumerate(inputs): if time > 0: scope.reuse_variables() output, state = stacked_cell(input_, state) return state
def prediction(self): # Recurrent network. network = rnn_cell.GRUCell(self._num_hidden) network = rnn_cell.DropoutWrapper(network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32) # Select last output. output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction