def __init__(self, cell, is_training,input_keep_prob=1.0, output_keep_prob=1.0, state_keep_prob=1.0, variational_recurrent=False, input_size=None, dtype=None, seed=None): DropoutWrapper.__init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0, state_keep_prob=1.0, variational_recurrent=False, input_size=None, dtype=None, seed=None) self.is_training=is_training
def _create_loss(self): ''' 风险评估损失函数 Loss = -100. * mean(P * (R-c)) P : self.position, output, the planed position we should hold to next day R : self.y, the change rate of next day c : cost :return: ''' with tf.variable_scope("loss", reuse=tf.AUTO_REUSE): # self.x.shape = (batch_size, num_step, input_size) # xx.shape = [num_step, [batch_size, input_size]] xx = tf.unstack(self.x, self.num_step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob, state_keep_prob=self.keep_prob) # outputs.shape = [num_step, [batch_size, hidden_size]] outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6. self.avg_position = tf.reduce_mean(self.position) self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name="estimated_risk"))
def single_rnn_cell(): single_cell = GRUCell( self.rnn_size) if self.cell_type == 'GRU' else LSTMCell( self.rnn_size) basic_cell = DropoutWrapper(single_cell, output_keep_prob=self.keep_prob) return basic_cell
def decoder_rnn(decoder_embedded_input, decoder_embeddings_matrix, encoder_state, num_words, sequence_length, rnn_size, num_layers, word2int, keep_prob, batch_size): with tf.variable_scope('decoding') as decoding_scope: lstm = BasicLSTMCell(rnn_size) lstm_dropout = DropoutWrapper(lstm, input_keep_prob=keep_prob) decoder_cell = MultiRNNCell([lstm_dropout] * num_layers) weights = tf.truncated_normal_initializer(stddev=0.1) biases = tf.zeros_iniitializer() output_function = lambda x: tf.contrib.layers.fully_connected( x, num_words, None, scope=decoding_scope, weights_initializer=weights, biases_initializer=biases) training_predictions = decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, sequence_length, decoding_scope, output_function, keep_prob, batch_size) decoding_scope.reuse_variables() test_predictions = decode_test_set( encoder_state, decoder_cell, decoder_embeddings_matrix, word2int['<SOS>'], word2int['<EOS>'], sequence_length - 1, num_words, decoding_scope, output_function, keep_prob, batch_size) return training_predictions, test_predictions
def build_single_cell_num(self, rnn_num): """构建一个单独的rnn cell Args: n_hidden: 隐藏层神经元数量 use_residual: 是否使用residual wrapper """ if self.config.cell_type == 'gru': cell_type = GRUCell else: cell_type = LSTMCell cell = cell_type(rnn_num) if self.config.use_dropout: cell = DropoutWrapper( cell, dtype=tf.float32, output_keep_prob=self.keep_prob ) if self.config.use_residual: cell = ResidualWrapper(cell) return cell
def get_cell(input_size=None, reuse=False): cells = [] for j in range(decoder.layers): input_size_ = input_size if j == 0 else decoder.cell_size if decoder.cell_type.lower() == 'lstm': cell = CellWrapper(BasicLSTMCell(decoder.cell_size, reuse=reuse)) elif decoder.cell_type.lower() == 'dropoutgru': cell = DropoutGRUCell(decoder.cell_size, reuse=reuse, layer_norm=decoder.layer_norm, input_size=input_size_, input_keep_prob=decoder.rnn_input_keep_prob, state_keep_prob=decoder.rnn_state_keep_prob) else: cell = GRUCell(decoder.cell_size, reuse=reuse, layer_norm=decoder.layer_norm) if decoder.use_dropout and decoder.cell_type.lower() != 'dropoutgru': cell = DropoutWrapper(cell, input_keep_prob=decoder.rnn_input_keep_prob, output_keep_prob=decoder.rnn_output_keep_prob, state_keep_prob=decoder.rnn_state_keep_prob, variational_recurrent=decoder.pervasive_dropout, dtype=tf.float32, input_size=input_size_) cells.append(cell) if len(cells) == 1: return cells[0] else: return CellWrapper(MultiRNNCell(cells))
def build_balancing_representation(self): self.rnn_input = tf.concat( [self.current_covariates, self.previous_treatments], axis=-1) self.sequence_length = self.compute_sequence_length(self.rnn_input) rnn_cell = DropoutWrapper(LSTMCell(self.rnn_hidden_units, state_is_tuple=False), output_keep_prob=self.rnn_keep_prob, state_keep_prob=self.rnn_keep_prob, variational_recurrent=True, dtype=tf.float32) decoder_init_state = None if (self.b_train_decoder): decoder_init_state = tf.concat([self.init_state, self.init_state], axis=-1) rnn_output, _ = rnn.dynamic_rnn(rnn_cell, self.rnn_input, initial_state=decoder_init_state, dtype=tf.float32, sequence_length=self.sequence_length) # Flatten to apply same weights to all time steps. rnn_output = tf.reshape(rnn_output, [-1, self.rnn_hidden_units]) balancing_representation = tf.layers.dense(rnn_output, self.br_size, activation=tf.nn.elu) return balancing_representation
def build_single_cell(self, n_hidden, use_residual): """构建一个单独的rnn cell Args: n_hidden: 隐藏层神经元数量 use_residual: 是否使用residual wrapper """ if self.cell_type == 'gru': cell_type = GRUCell else: cell_type = LSTMCell cell = cell_type(n_hidden) if self.use_dropout: cell = DropoutWrapper( cell, dtype=tf.float32, output_keep_prob=self.keep_prob_placeholder, seed=self.seed ) if use_residual: cell = ResidualWrapper(cell) return cell
def _build_rnn(self, name, is_cuda, rnn_dim, inputs, state_dropout_rate, output_dropout_rate): with tf.variable_scope(name): if is_cuda: lstm_cell = CudnnLSTM(num_layers=1, num_units=rnn_dim, direction='bidirectional') outputs, _ = lstm_cell(inputs) else: state_keep_prob = 1. - state_dropout_rate * tf.cast( self._is_training, tf.float32) with tf.variable_scope('cudnn_lstm'): single_cell = lambda: DropoutWrapper( CudnnCompatibleLSTMCell(rnn_dim), state_keep_prob=state_keep_prob, variational_recurrent=True, input_size=inputs.get_shape()[-1], dtype=tf.float32) outputs, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( [single_cell()], [single_cell()], inputs, time_major=True, dtype=tf.float32) outputs = tf.concat(outputs, axis=-1) outputs = tf.layers.dropout(outputs, output_dropout_rate, training=self._is_training, noise_shape=tf.concat( [[1], tf.shape(outputs)[1:]], axis=0)) return outputs
def build_single_cell(self, n_hidden, use_residual): ''' 构建一个单独的rnn 的cell :param n_hidden: 隐藏层的神经元数量 :param use_residual: 是否使用 residual wrapper :return: ''' if self.cell_type == 'gru': cell_type = GRUCell else: cell_type = LSTMCell cell = cell_type(n_hidden) if self.use_dropout: cell = DropoutWrapper( cell, dtype=tf.float32, output_keep_prob=self.keep_prob_placeholdle, seed=self.seed ) if use_residual: cell = ResidualWrapper(cell) return cell
def _create_loss(self): ''' Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss, cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore, the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position) :return: ''' # with tf.device("/cpu:0"): xx = tf.unstack(self.x, self.step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer()) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) # batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6, # is_training=is_training, scope="activation_batch_norm", reuse=False) self.position = (tf.nn.relu6(norm_signal, name="relu_limit") - 3) / 3. self.avg_position = tf.reduce_mean(self.position) # self.cost = 0.0002 self.loss = -100. * tf.reduce_mean( tf.multiply( (self.y - self.cost), self.position, name="estimated_risk"))
def get_cell(input_size=None, reuse=False, dropout=True): cells = [] for _ in range(decoder.layers): if decoder.use_lstm: cell = CellWrapper( BasicLSTMCell(decoder.cell_size, reuse=reuse)) else: cell = GRUCell(decoder.cell_size, reuse=reuse) if dropout and decoder.use_dropout: cell = DropoutWrapper( cell, input_keep_prob=decoder.rnn_input_keep_prob, output_keep_prob=decoder.rnn_output_keep_prob, state_keep_prob=decoder.rnn_state_keep_prob, variational_recurrent=decoder.pervasive_dropout, dtype=tf.float32, input_size=input_size) cells.append(cell) if len(cells) == 1: return cells[0] else: return CellWrapper(MultiRNNCell(cells))
def forward_pass(self, x): x = tf.unstack(x, self.in_steps, 1) filters = [self.input_shape[-1]] + self.filters for i in range(1, len(filters)): filter = filters[i] input_shape = self.input_shape[:-1] + [filters[i - 1]] convLSTM2D_cell = rnn.Conv2DLSTMCell( input_shape=input_shape, output_channels=filter, kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(i)) dropout_cell = DropoutWrapper(convLSTM2D_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = tf.nn.static_rnn(dropout_cell, x, dtype=tf.float32) outputs = self._batch_norm(outputs) x = tf.unstack(outputs, self.in_steps, 0) outputs = outputs[-self.out_steps:] y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4]) return y_hat
def make_RNN_cell(self, fn=tf.nn.relu): """ Returns a new cell (for deep recurrent networks), with Nneurons, and activation function fn. Args: fn - tensorflow activation function, e.g. tf.nn.relu, tf.nn.tanh Return cell - TF RNN cell """ #Make cell type if self.config.cell_type == 'RNN': cell = BasicRNNCell(num_units=self.config.Nhidden, activation=fn) elif self.config.cell_type == 'LSTM': cell = LSTMCell(num_units=self.config.Nhidden, activation=fn) elif self.config.cell_type == 'GRU': cell = GRUCell(num_units=self.config.Nhidden, activation=fn) else: msg = "cell_type must be RNN, LSTM or GRU. cell_type was {}".format( self.config.cell_type) raise Exception(msg) #always include dropout when training, but tweak keep_prob to turn this off. cell = DropoutWrapper(cell, input_keep_prob=self.keep_prob, variational_recurrent=True, input_size=self.config.Nhidden, dtype=tf.float32) return cell
def attention_alignment(inputs, input_lengths, memory, memory_lengths, n_layers, n_units, dropout_prob, cell_type=GRUCell, attention_mechanism=BahdanauAttention, is_training=True): """Performs alignment over inputs, attending over memory Args: inputs (tensor): Input sequence, with the shape of [Batch x seq_length x dimension] input_lengths (tensor): The length of input sequences. Used for dynamic unrolling memory (tensor): Sequence to attend memory_lengths (tensor): The length of memory. Used for dynamic unrolling n_layers (int): Number of layers in RNN n_units (int): Number of units in RNN dropout_prob (float): Drop out rate for RNN cell cell_type (method): Type of RNN cell, GRU by default attention_mechanism (method): Type of attention mechanism, Bahdanau by default is_training (bool): Whether the model is training or testing returns: (tensor, tensor, tensor): """ # get tensor dimensions batch_size, seq_length, dim = inputs.get_shape().as_list() # create a attention over the memory attention = attention_mechanism(n_units, memory, memory_sequence_length=memory_lengths, dtype=tf.float32) # build an encoder RNN over the input sequence dropout_prob = 0 if not is_training else dropout_prob if n_layers > 1: attention_cell = MultiRNNCell([DropoutWrapper(cell_type(n_units), output_keep_prob=1-dropout_prob) for _ in range(n_layers)]) else: attention_cell = cell_type(n_units) attention_cell = DropoutWrapper(attention_cell, output_keep_prob=1-dropout_prob) # for each input to the next RNN cell, wire the attention mechanism a_cell = AttentionWrapper(attention_cell, attention, alignment_history=True) # define the initial state # TODO: Do we ever feed an init state? attention_state = a_cell.zero_state(batch_size, dtype=tf.float32) # read input while attending over memory helper = TrainingHelper(inputs=inputs, sequence_length=input_lengths) decoder = BasicDecoder(a_cell, helper, attention_state) # output of the decoder is a new representation of input sentence with attention over the question outputs, states, _ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=seq_length, impute_finished=True) outputs = tf.pad(outputs.rnn_output, [[0, 0], [0, seq_length - tf.reduce_max(input_lengths)], [0, 0]]) outputs = tf.reshape(outputs, [batch_size, seq_length, dim]) # attention matrix for visualizing heatmap aligned = tf.transpose(states.alignment_history.stack(), [1, 0, 2]) return outputs, states, aligned
def __init__(self, vocabulary, sequence_length, batch_size, hidden_dim=650, name='PTBLM'): self.vocabulary = vocabulary self.sequence_length = sequence_length self.batch_size = batch_size self.hidden_dim = hidden_dim with tf.variable_scope(name): self.embed = Embed(vocabulary, hidden_dim, 'embed') self.lstm0 = LSTMCell(hidden_dim) self.lstm1 = LSTMCell(hidden_dim) self.lstm0_dropout = DropoutWrapper(self.lstm0, output_keep_prob=0.5) self.lstm1_dropout = DropoutWrapper(self.lstm1, output_keep_prob=0.5) self.stack = MultiRNNCell([self.lstm0, self.lstm1]) self.stack_dropout = MultiRNNCell([self.lstm0_dropout, self.lstm1_dropout]) self.dense = Dense(hidden_dim, vocabulary, 'dense') self.params = None
def _create_rnn_cell(self): """ Creates a single RNN cell according to the architecture of this RNN. Returns ------- rnn cell A single RNN cell according to the architecture of this RNN """ keep_prob = 1.0 if self.keep_prob is None else self.keep_prob if self.cell_type == CellType.GRU: return DropoutWrapper(GRUCell(self.num_units), keep_prob, keep_prob) elif self.cell_type == CellType.LSTM: return DropoutWrapper(LSTMCell(self.num_units), keep_prob, keep_prob) else: raise ValueError("unknown cell type: {}".format(self.cell_type))
def create_cell(size, reuse, lstm_init): lstm_cell = LSTMCell(size, forget_bias=0.7, state_is_tuple=True, initializer=lstm_init, reuse=reuse) lstm_cell = DropoutWrapper(lstm_cell) return lstm_cell
def decoding_layer(dec_input, encoder_state, target_sequence_length, max_target_sequence_length, rnn_size, num_layers, target_vocab_to_int, target_vocab_size, batch_size, keep_prob, decoding_embedding_size): """ Create decoding layer :param dec_input: Decoder input :param encoder_state: Encoder state :param target_sequence_length: The lengths of each sequence in the target batch :param max_target_sequence_length: Maximum length of target sequences :param rnn_size: RNN Size :param num_layers: Number of layers :param target_vocab_to_int: Dictionary to go from the target words to an id :param target_vocab_size: Size of target vocabulary :param batch_size: The size of the batch :param keep_prob: Dropout keep probability :param decoding_embedding_size: Decoding embedding size :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput) """ # TODO: Implement Function decode_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size])) decode_embed_input = tf.nn.embedding_lookup(decode_embeddings, dec_input) cells = [] for _ in range(num_layers): lstm_cell = LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=24)) drop_wrapped_cell = DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) cells.append(drop_wrapped_cell) decode_cell = MultiRNNCell(cells) output_layer = Dense(target_vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) with tf.variable_scope("decode"): train_decoder_out = decoding_layer_train( encoder_state, decode_cell, decode_embed_input, target_sequence_length, max_target_sequence_length, output_layer, keep_prob) with tf.variable_scope("decode", reuse=True): tgo = target_vocab_to_int['<GO>'] teos = target_vocab_to_int['<EOS>'] infer_decoder_out = decoding_layer_infer( encoder_state, decode_cell, decode_embeddings, tgo, teos, max_target_sequence_length, target_vocab_size, output_layer, batch_size, keep_prob) return train_decoder_out, infer_decoder_out
def _lstm_cell(model_opt): """ Defines a basic LSTM cell to which various wrappers can be applied. """ base_cell = BasicLSTMCell(model_opt.enc_hidden_dims, forget_bias=2.5, state_is_tuple=True) if model_opt.allow_dropout: base_cell = DropoutWrapper( base_cell, output_keep_prob=self.rnn_keep_prob) return base_cell
def _initialize_weights(self): cell_factory = GRUCell(num_units=self.params['n_units']) cell_drop = DropoutWrapper(cell_factory, self.params['k_prob']) __, states = tf.nn.dynamic_rnn(cell_drop, self.x, dtype=tf.float32, sequence_length=self.seq_length) hidden = fc(states, self.params['n_hidden_neurons']) self.output = fc(hidden, self.params['n_outputs'], activation_fn=None)
def GRULayer(input_tensor, num_layers=1): input_tensor = tf.reshape( input_tensor, shape=[-1, ConfigUtil.seq_length, ConfigUtil.hidden_size]) cell = GRUCell(num_units=128, kernel_initializer=create_initializer()) cell = DropoutWrapper(cell, output_keep_prob=(1 - ConfigUtil.dropout_prob)) cell = MultiRNNCell([cell] * num_layers) if num_layers > 1 else cell outputs, state = dynamic_rnn(cell, input_tensor, dtype=tf.float32) return outputs
def one_cell(self, hidden_size, cell_type, keep_prob=0.9): c = GRUCell if cell_type == 'gru' else LSTMCell cell = c(hidden_size) cell = DropoutWrapper(cell, dtype=tf.float32, output_keep_prob=keep_prob) cell = ResidualWrapper(cell) return cell
def get_cell_type(rnn_dim, cell_type, input_dropout, output_dropout, state_dropout): if cell_type == 'LSTM': cell = BasicLSTMCell(num_units=rnn_dim, name="lstmCell") elif cell_type == 'GRU': cell = GRUCell(num_units=rnn_dim, name="gruCell") else: print("specify encoder cell type in {LSTM, GRU}") cell_with_dropout = DropoutWrapper(cell, input_keep_prob = 1 - input_dropout, output_keep_prob= 1- output_dropout, state_keep_prob= 1- state_dropout ) return cell_with_dropout
def make_stacked_cells(numLayers_, numCellUnits_): def _make_base_cell(): return LSTMCell(numCellUnits_, activation=tf.nn.relu if USE_RELU else tf.tanh) if outputKeepProb == 1.0: return [_make_base_cell() for _ in range(numLayers_)] return [DropoutWrapper(_make_base_cell(), output_keep_prob=outputKeepProb) for _ in range(numLayers_)]
def _build_single_cell(cell_type, num_units, use_dropout, mode, dropout_probability, dtype, device=None): r""" :param num_units: `int` :return: """ if cell_type == 'lstm': cells = LSTMCell(num_units=num_units, use_peepholes=False, cell_clip=1.0, initializer=tf.variance_scaling_initializer(), dtype=dtype) elif cell_type == 'layernorm_lstm': cells = LayerNormLSTMCell(num_units=num_units, cell_clip=1.0) elif cell_type == 'layernorm_basiclstm': cells = LayerNormBasicLSTMCell(num_units=num_units) elif cell_type == 'gru': cells = GRUCell(num_units=num_units, kernel_initializer=tf.variance_scaling_initializer(), bias_initializer=tf.variance_scaling_initializer(), dtype=dtype) elif cell_type == 'ugrnn': cells = UGRNNCell(num_units) elif cell_type == 'lstm_block': cells = LSTMBlockCell(num_units=num_units, use_peephole=True, cell_clip=None) elif cell_type == 'gru_block': cells = GRUBlockCellV2(num_units=num_units) elif cell_type == 'nas': cells = NASCell(num_units=num_units) elif cell_type == 'lstm_masked': from tensorflow.contrib.model_pruning import MaskedLSTMCell cells = MaskedLSTMCell(num_units=num_units) else: raise Exception('cell type not supported: {}'.format(cell_type)) if use_dropout is True and mode == 'train': cells = DropoutWrapper( cells, input_keep_prob=dropout_probability[0], state_keep_prob=dropout_probability[1], output_keep_prob=dropout_probability[2], variational_recurrent=False, dtype=dtype, # input_size=self._inputs.get_shape()[1:], ) if device is not None: cells = DeviceWrapper(cells, device=device) return cells
def _build_net(self): self.global_step = tf.Variable(0, trainable=False) source = self.iterator.source tgt = self.iterator.target_input # 得到当前batch的长度(如果长度不足的会被padding填充) max_sequence_in_batch = self.iterator.source_sequence_length max_sequence_in_batch = tf.reduce_max(max_sequence_in_batch) max_sequence_in_batch = tf.to_int32(max_sequence_in_batch) # x: [batch_size, time_step, embedding_size], float32 self.x = tf.nn.embedding_lookup(self.embedding, source) # y: [batch_size, time_step] self.y = tgt # graph construction cell_forward = tf.contrib.rnn.BasicLSTMCell(unit_num) cell_backward = tf.contrib.rnn.BasicLSTMCell(unit_num) if DROPOUT_RATE is not None: cell_forward = DropoutWrapper(cell_forward, input_keep_prob=1.0, output_keep_prob=DROPOUT_RATE) cell_backward = DropoutWrapper(cell_backward, input_keep_prob=1.0, output_keep_prob=DROPOUT_RATE) # time_major 可以适应输入维度。 outputs, bi_state = \ tf.nn.bidirectional_dynamic_rnn(cell_forward, cell_backward, self.x, dtype=tf.float32) forward_out, backward_out = outputs outputs = tf.concat([forward_out, backward_out], axis=2) # projection: W = tf.get_variable("projection_w", [2 * unit_num, TAGS_NUM]) b = tf.get_variable("projection_b", [TAGS_NUM]) x_reshape = tf.reshape(outputs, [-1, 2 * unit_num]) projection = tf.matmul(x_reshape, W) + b # -1 to time step self.outputs = tf.reshape(projection, [self.batch_size, -1, TAGS_NUM]) self.seq_length = tf.convert_to_tensor(self.batch_size * [max_sequence_in_batch], dtype=tf.int32) self.log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( self.outputs, self.y, self.seq_length) # Add a training op to tune the parameters. self.loss = tf.reduce_mean(-self.log_likelihood) self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
def deep_lstm(): if mode == tf.estimator.ModeKeys.TRAIN: return MultiRNNCell([ DropoutWrapper(LSTMCell(state_size), state_keep_prob=1. - dropout_prob) for _ in range(num_layers) ]) else: return MultiRNNCell( [LSTMCell(state_size) for _ in range(num_layers)])
def _build_net(self): # x: [batch_size, time_step, embedding_size], float32 self.x = tf.nn.embedding_lookup(self.embedding, self.input) # y: [batch_size, time_step] self.y = self.label cell_forward = tf.contrib.rnn.BasicLSTMCell(config.lstm_size) cell_backward = tf.contrib.rnn.BasicLSTMCell(config.lstm_size) if config.keep_prob is not None: cell_forward = DropoutWrapper(cell_forward, input_keep_prob=1.0, output_keep_prob=config.keep_prob) cell_backward = DropoutWrapper(cell_backward, input_keep_prob=1.0, output_keep_prob=config.keep_prob) # time_major 可以适应输入维度。 outputs, bi_state = \ tf.nn.bidirectional_dynamic_rnn(cell_forward, cell_backward, self.x, dtype=tf.float32) forward_out, backward_out = outputs outputs = tf.concat([forward_out, backward_out], axis=2) # projection: W = tf.get_variable("projection_w", [2 * config.lstm_size, config.num_labels]) b = tf.get_variable("projection_b", [config.num_labels]) x_reshape = tf.reshape(outputs, [-1, 2 * config.lstm_size]) projection = tf.add(tf.matmul(x_reshape, W), b, name='projection') nsteps = tf.shape(outputs)[1] # -1 to time step self.outputs = tf.reshape(projection, [-1, nsteps, config.num_labels], name='output') self.log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( self.outputs, self.y, self.seq_length) self.transition_params = tf.add(self.transition_params, 0, name='transition_params') # Add a training op to tune the parameters. self.loss = tf.reduce_mean(-self.log_likelihood) self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize( self.loss)
def single_rnn_cell(): # 根据参数cell_type的值,创建单个LSTM或者GRU的细胞 single_cell = GRUCell( self.rnn_size) if self.cell_type == 'GRU' else LSTMCell( self.rnn_size) # DropoutWrapper()用来设置dropout # 第一个参数是指定需要设置dropout的细胞,output_keep_prob设置保留输出,不进行dropout的概率 basic_cell = DropoutWrapper(single_cell, output_keep_prob=self.keep_prob) return basic_cell
def RNN(x, weights, biases): x = tf.unstack(x, timesteps, 1) lstm_cell = DropoutWrapper(LSTMBlockCell(n_hidden, forget_bias=1.0), variational_recurrent=True, input_size=x_train.shape[0], state_keep_prob=.7, output_keep_prob=.7, dtype=tf.float32) outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']