def build_single_cell(self): if (self.cell_type.lower() == 'gru'): cell_type = GRUCell elif (self.cell_type.lower() == 'indgru'): cell_type = IndyGRUCell elif (self.cell_type.lower() == 'indlstm'): cell_type = IndyLSTMCell elif (self.cell_type.lower() == 'grublock'): cell_type = GRUBlockCell elif self.cell_type.lower() == 'lstm': cell_type = LSTMCell if self.cell_type.lower() == 'lstm': cell = cell_type(self.hidden_units, initializer=self.initializer) elif self.cell_type.lower() == 'grublock': cell = cell_type(self.hidden_units) else: cell = cell_type(self.hidden_units, kernel_initializer=self.initializer, bias_initializer=tf.zeros_initializer) if self.use_dropout: cell = DropoutWrapper(cell, dtype=self.dtype, output_keep_prob=self.keep_prob_ph) return cell
def build_single_cell(self,n_hidden,use_residual): ''' 构建一个单独的rnn cell :param n_hidden: 隐藏层的神经单元数量 :param use_residual: 是否使用residual wrapper :return: ''' if self.cell_type == 'gru': cell_type = GRUCell else: cell_type = LSTMCell cell = cell_type(n_hidden) #使用self.use_dropout 可以避免过拟合,等等。 if self.use_dropout: cell = DropoutWrapper( cell, dtype=tf.float32, output_keep_prob=self.keep_prob_placeholder, seed = self.seed #一些层之间操作的随机数 ) #使用ResidualWrapper进行封装可以避免一些梯度消失或者梯度爆炸 if use_residual: cell = ResidualWrapper(cell) return cell
def _apply_droput_wrapper(self): cells = [] for _ in range(self.num_layers): cell = self.__new_cell() cell = DropoutWrapper(cell, input_keep_prob=self.in_keep_prob, output_keep_prob=self.out_keep_prob) cells.append(cell) self.multi_cell = MultiRNNCell(cells) self.initial_state = rnn_placeholders( self.multi_cell.zero_state(self.batch_size, tf.float32)) self.zero_state = self.multi_cell.zero_state(self.batch_size, tf.float32)
def build(self): self.lstm_cell = LSTMBlockCell( self.units, #use_peepholes=self.peephole, use_peephole=True) #initializer=tf.initializers.random_uniform(minval=self.minval, # maxval=self.maxval)) self.va_lstm_cell = DropoutWrapper(self.lstm_cell, variational_recurrent=True, input_keep_prob=0.7, output_keep_prob=0.7, state_keep_prob=0.7, dtype=tf.float32, input_size=self.inputSize) tf.nn.dynamic_rnn(self.va_lstm_cell, tf.random_normal((1, 1, self.inputSize)), dtype=tf.float32) self._trainable_weights = self.lstm_cell.variables
def build_decoder(self, encoder_outputs, encoder_final_state, decoder_inputs, decoder_targets, decoder_lengths, encoder_input_lengths): """Builds an RNN decoder. Can also use dropout and an attention mechanism.""" with tf.variable_scope("decoder"): # Embeddings for ARPA phonetic characters arpa_embeddings = tf.Variable(tf.random_uniform( (self.n_arpa, self.embed_dims), -1.0, 1.0), name="arpa_embeddings") decoder_input_embeddings = tf.nn.embedding_lookup( arpa_embeddings, decoder_inputs) # Dense layer that each timestep output is sent to with tf.variable_scope("projection"): projection_layer = tf.layers.Dense(self.n_arpa, use_bias=False) # Cell definition with dropout for training decoder_dims = self.hidden_dims if self.bidir: decoder_dims *= 2 decoder_cell = self.cell_class_fn(decoder_dims) if self.mode == "training": decoder_cell = DropoutWrapper( decoder_cell, input_keep_prob=1.0 - self.dropout, output_keep_prob=1.0 - self.dropout, state_keep_prob=1.0 - self.dropout) # Attention wrapper if self.attention_fn is not None: attention_states = tf.transpose(encoder_outputs, [1, 0, 2]) attention_mechanism = self.attention_fn( decoder_dims, attention_states, memory_sequence_length=encoder_input_lengths) decoder_cell = tf.contrib.seq2seq.AttentionWrapper( decoder_cell, attention_mechanism, attention_layer_size=decoder_dims) # Define decoder initial state if self.attention_fn is not None: decoder_initial_state = decoder_cell.zero_state( self.batch_size, tf.float32).clone(cell_state=encoder_final_state) else: decoder_initial_state = encoder_final_state # Define helper # Input at each timestep is label ARPA phonetic sequence if self.mode == "train": helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_input_embeddings, sequence_length=decoder_lengths, time_major=True) # Inference argmax predictions are inputs to the next timestep elif self.mode == "inference": start_tokens = tf.fill([self.batch_size], START_CODE) helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( arpa_embeddings, start_tokens, END_CODE) my_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, helper, decoder_initial_state, output_layer=projection_layer) # Inference predictions are limited to 2 times the input sequence length maximum_iterations = tf.round( tf.reduce_max(encoder_input_lengths) * 2) # Decoding loop output outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, output_time_major=True, impute_finished=True, maximum_iterations=maximum_iterations) logits = outputs.rnn_output # Transposed so that not time major predictions_arpa = tf.transpose(tf.argmax(logits, 2)) return logits, predictions_arpa
def build_encoder(self, encoder_inputs, encoder_input_lengths): """ Builds an RNN encoder. Can be configured to be uni- / bi- directional. Can also enable dropout. Returns outputs of the RNN at each timestep and also the final state """ with tf.variable_scope("encoder"): # Embeddings for orthographic characters char_embeddings = tf.Variable(tf.random_uniform( (self.n_chars, self.embed_dims), -1.0, 1.0), name="char_embeddings") encoder_input_embeddings = tf.nn.embedding_lookup( char_embeddings, encoder_inputs) # Unidirectional Run if not self.bidir: encoder_cell = self.cell_class_fn(self.hidden_dims) if self.mode == "training": encoder_cell = DropoutWrapper( encoder_cell, input_keep_prob=1.0 - self.dropout, output_keep_prob=1.0 - self.dropout, state_keep_prob=1.0 - self.dropout) encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_input_embeddings, dtype=tf.float32, time_major=True) # Bidirectional Run else: with tf.variable_scope("fw"): fw_encoder_cell = self.cell_class_fn(self.hidden_dims) if self.mode == "training": fw_encoder_cell = DropoutWrapper( fw_encoder_cell, input_keep_prob=1.0 - self.dropout, output_keep_prob=1.0 - self.dropout, state_keep_prob=1.0 - self.dropout) with tf.variable_scope("bw"): bw_encoder_cell = self.cell_class_fn(self.hidden_dims) if self.mode == "training": bw_encoder_cell = DropoutWrapper( bw_encoder_cell, input_keep_prob=1.0 - self.dropout, output_keep_prob=1.0 - self.dropout, state_keep_prob=1.0 - self.dropout) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_encoder_cell, cell_bw=bw_encoder_cell, inputs=encoder_input_embeddings, sequence_length=encoder_input_lengths, dtype=tf.float32, time_major=True)) # Concat final states of forward and backward run encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) encoder_final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), -1) return encoder_outputs, encoder_final_state
x = tf.placeholder(dtype=tf.int32, shape=[None, None]) y = tf.placeholder(dtype=tf.int64, shape=[None]) sequence_length = tf.placeholder(dtype=tf.int32, shape=[None]) keep_prob = tf.placeholder(dtype=tf.float32) num_units = 100 n_epoch = 100 with tf.variable_scope('embedding'): rnn_input = tf.contrib.layers.embed_sequence(x, vocab_size=embed_ingred_size, embed_dim=embed_size) with tf.variable_scope('rnn'): cell = GRUCell(num_units) cell = DropoutWrapper(cell, output_keep_prob=keep_prob) cell = MultiRNNCell([cell for _ in range(num_layers)]) outputs, states = tf.nn.dynamic_rnn(cell, rnn_input, dtype=tf.float32, sequence_length=sequence_length) # ★Attention # 'outputs' is a tensor of shape [batch_size, max_time, num_of_units] # 'state' is a N-tuple where N is the number of GRUCells containing a # tf.contrib.rnn.GRUcells for each cell with tf.variable_scope('full_connected'): state = states[-1] fc = tf.contrib.layers.fully_connected(state, num_class,
y = tf.placeholder(dtype=tf.int64, shape=[None]) sequence_length = tf.placeholder(dtype=tf.int32, shape=[None]) keep_prob = tf.placeholder(dtype=tf.float32) num_units = 100 n_epoch = 3000 with tf.variable_scope('embedding'): rnn_input = tf.contrib.layers.embed_sequence(x, vocab_size=embed_ingred_size, embed_dim=embed_size) with tf.variable_scope('rnn'): with tf.variable_scope('forward'): fw_cells = [GRUCell(num_units) for _ in range(num_layers)] fw_cells = [ DropoutWrapper(fw_cell, output_keep_prob=keep_prob) for fw_cell in fw_cells ] fw_cells = MultiRNNCell(fw_cells) with tf.variable_scope('Backward'): bw_cells = [GRUCell(num_units) for _ in range(num_layers)] bw_cells = [ DropoutWrapper(bw_cell, output_keep_prob=keep_prob) for bw_cell in bw_cells ] bw_cells = MultiRNNCell(bw_cells) outputs, states = bidirectional_dynamic_rnn( fw_cells, bw_cells,