예제 #1
0
    def build_single_cell(self):

        if (self.cell_type.lower() == 'gru'):
            cell_type = GRUCell
        elif (self.cell_type.lower() == 'indgru'):
            cell_type = IndyGRUCell
        elif (self.cell_type.lower() == 'indlstm'):
            cell_type = IndyLSTMCell
        elif (self.cell_type.lower() == 'grublock'):
            cell_type = GRUBlockCell
        elif self.cell_type.lower() == 'lstm':
            cell_type = LSTMCell
        if self.cell_type.lower() == 'lstm':
            cell = cell_type(self.hidden_units, initializer=self.initializer)
        elif self.cell_type.lower() == 'grublock':
            cell = cell_type(self.hidden_units)
        else:
            cell = cell_type(self.hidden_units,
                             kernel_initializer=self.initializer,
                             bias_initializer=tf.zeros_initializer)

        if self.use_dropout:
            cell = DropoutWrapper(cell,
                                  dtype=self.dtype,
                                  output_keep_prob=self.keep_prob_ph)
        return cell
예제 #2
0
    def build_single_cell(self,n_hidden,use_residual):
        '''
        构建一个单独的rnn cell
        :param n_hidden: 隐藏层的神经单元数量
        :param use_residual: 是否使用residual wrapper
        :return:
        '''

        if self.cell_type == 'gru':
            cell_type = GRUCell
        else:
            cell_type = LSTMCell

        cell = cell_type(n_hidden)

        #使用self.use_dropout 可以避免过拟合,等等。
        if self.use_dropout:
            cell = DropoutWrapper(
                cell,
                dtype=tf.float32,
                output_keep_prob=self.keep_prob_placeholder,
                seed = self.seed #一些层之间操作的随机数
                )
        #使用ResidualWrapper进行封装可以避免一些梯度消失或者梯度爆炸
        if use_residual:
            cell = ResidualWrapper(cell)
        return cell
예제 #3
0
    def _apply_droput_wrapper(self):
        cells = []
        for _ in range(self.num_layers):
            cell = self.__new_cell()
            cell = DropoutWrapper(cell,
                                  input_keep_prob=self.in_keep_prob,
                                  output_keep_prob=self.out_keep_prob)
            cells.append(cell)
        self.multi_cell = MultiRNNCell(cells)

        self.initial_state = rnn_placeholders(
            self.multi_cell.zero_state(self.batch_size, tf.float32))

        self.zero_state = self.multi_cell.zero_state(self.batch_size,
                                                     tf.float32)
    def build(self):
        self.lstm_cell = LSTMBlockCell(
            self.units,
            #use_peepholes=self.peephole,
            use_peephole=True)
        #initializer=tf.initializers.random_uniform(minval=self.minval,
        #                                           maxval=self.maxval))

        self.va_lstm_cell = DropoutWrapper(self.lstm_cell,
                                           variational_recurrent=True,
                                           input_keep_prob=0.7,
                                           output_keep_prob=0.7,
                                           state_keep_prob=0.7,
                                           dtype=tf.float32,
                                           input_size=self.inputSize)

        tf.nn.dynamic_rnn(self.va_lstm_cell,
                          tf.random_normal((1, 1, self.inputSize)),
                          dtype=tf.float32)

        self._trainable_weights = self.lstm_cell.variables
예제 #5
0
    def build_decoder(self, encoder_outputs, encoder_final_state,
                      decoder_inputs, decoder_targets, decoder_lengths,
                      encoder_input_lengths):
        """Builds an RNN decoder.
        Can also use dropout and an attention mechanism."""

        with tf.variable_scope("decoder"):

            # Embeddings for ARPA phonetic characters
            arpa_embeddings = tf.Variable(tf.random_uniform(
                (self.n_arpa, self.embed_dims), -1.0, 1.0),
                                          name="arpa_embeddings")
            decoder_input_embeddings = tf.nn.embedding_lookup(
                arpa_embeddings, decoder_inputs)

            # Dense layer that each timestep output is sent to
            with tf.variable_scope("projection"):
                projection_layer = tf.layers.Dense(self.n_arpa, use_bias=False)

            # Cell definition with dropout for training
            decoder_dims = self.hidden_dims
            if self.bidir:
                decoder_dims *= 2
            decoder_cell = self.cell_class_fn(decoder_dims)
            if self.mode == "training":
                decoder_cell = DropoutWrapper(
                    decoder_cell,
                    input_keep_prob=1.0 - self.dropout,
                    output_keep_prob=1.0 - self.dropout,
                    state_keep_prob=1.0 - self.dropout)

            # Attention wrapper
            if self.attention_fn is not None:
                attention_states = tf.transpose(encoder_outputs, [1, 0, 2])
                attention_mechanism = self.attention_fn(
                    decoder_dims,
                    attention_states,
                    memory_sequence_length=encoder_input_lengths)

                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    decoder_cell,
                    attention_mechanism,
                    attention_layer_size=decoder_dims)

            # Define decoder initial state
            if self.attention_fn is not None:
                decoder_initial_state = decoder_cell.zero_state(
                    self.batch_size,
                    tf.float32).clone(cell_state=encoder_final_state)
            else:
                decoder_initial_state = encoder_final_state

            # Define helper
            # Input at each timestep is label ARPA phonetic sequence
            if self.mode == "train":
                helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_input_embeddings,
                    sequence_length=decoder_lengths,
                    time_major=True)
            # Inference argmax predictions are inputs to the next timestep
            elif self.mode == "inference":
                start_tokens = tf.fill([self.batch_size], START_CODE)
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    arpa_embeddings, start_tokens, END_CODE)

            my_decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_cell,
                helper,
                decoder_initial_state,
                output_layer=projection_layer)

            # Inference predictions are limited to 2 times the input sequence length
            maximum_iterations = tf.round(
                tf.reduce_max(encoder_input_lengths) * 2)

            # Decoding loop output
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                my_decoder,
                output_time_major=True,
                impute_finished=True,
                maximum_iterations=maximum_iterations)
            logits = outputs.rnn_output

            # Transposed so that not time major
            predictions_arpa = tf.transpose(tf.argmax(logits, 2))

            return logits, predictions_arpa
예제 #6
0
    def build_encoder(self, encoder_inputs, encoder_input_lengths):
        """ Builds an RNN encoder. Can be configured to be uni- / bi- directional.
        Can also enable dropout. Returns outputs of the RNN at each timestep and 
        also the final state """

        with tf.variable_scope("encoder"):

            # Embeddings for orthographic characters
            char_embeddings = tf.Variable(tf.random_uniform(
                (self.n_chars, self.embed_dims), -1.0, 1.0),
                                          name="char_embeddings")
            encoder_input_embeddings = tf.nn.embedding_lookup(
                char_embeddings, encoder_inputs)

            # Unidirectional Run
            if not self.bidir:
                encoder_cell = self.cell_class_fn(self.hidden_dims)
                if self.mode == "training":
                    encoder_cell = DropoutWrapper(
                        encoder_cell,
                        input_keep_prob=1.0 - self.dropout,
                        output_keep_prob=1.0 - self.dropout,
                        state_keep_prob=1.0 - self.dropout)
                encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
                    encoder_cell,
                    encoder_input_embeddings,
                    dtype=tf.float32,
                    time_major=True)

            # Bidirectional Run
            else:
                with tf.variable_scope("fw"):
                    fw_encoder_cell = self.cell_class_fn(self.hidden_dims)
                    if self.mode == "training":
                        fw_encoder_cell = DropoutWrapper(
                            fw_encoder_cell,
                            input_keep_prob=1.0 - self.dropout,
                            output_keep_prob=1.0 - self.dropout,
                            state_keep_prob=1.0 - self.dropout)
                with tf.variable_scope("bw"):
                    bw_encoder_cell = self.cell_class_fn(self.hidden_dims)
                    if self.mode == "training":
                        bw_encoder_cell = DropoutWrapper(
                            bw_encoder_cell,
                            input_keep_prob=1.0 - self.dropout,
                            output_keep_prob=1.0 - self.dropout,
                            state_keep_prob=1.0 - self.dropout)

                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_final_state,
                  encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=fw_encoder_cell,
                      cell_bw=bw_encoder_cell,
                      inputs=encoder_input_embeddings,
                      sequence_length=encoder_input_lengths,
                      dtype=tf.float32,
                      time_major=True))

                # Concat final states of forward and backward run
                encoder_final_state_c = tf.concat(
                    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
                encoder_final_state_h = tf.concat(
                    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)
                encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                                     h=encoder_final_state_h)
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), -1)

            return encoder_outputs, encoder_final_state
예제 #7
0
x = tf.placeholder(dtype=tf.int32, shape=[None, None])
y = tf.placeholder(dtype=tf.int64, shape=[None])
sequence_length = tf.placeholder(dtype=tf.int32, shape=[None])
keep_prob = tf.placeholder(dtype=tf.float32)
num_units = 100
n_epoch = 100

with tf.variable_scope('embedding'):
    rnn_input = tf.contrib.layers.embed_sequence(x,
                                                 vocab_size=embed_ingred_size,
                                                 embed_dim=embed_size)

with tf.variable_scope('rnn'):
    cell = GRUCell(num_units)
    cell = DropoutWrapper(cell, output_keep_prob=keep_prob)
    cell = MultiRNNCell([cell for _ in range(num_layers)])

    outputs, states = tf.nn.dynamic_rnn(cell,
                                        rnn_input,
                                        dtype=tf.float32,
                                        sequence_length=sequence_length)
    # ★Attention
    # 'outputs' is a tensor of shape [batch_size, max_time, num_of_units]
    # 'state' is a N-tuple where N is the number of GRUCells containing a
    # tf.contrib.rnn.GRUcells for each cell

with tf.variable_scope('full_connected'):
    state = states[-1]
    fc = tf.contrib.layers.fully_connected(state,
                                           num_class,
y = tf.placeholder(dtype=tf.int64, shape=[None])
sequence_length = tf.placeholder(dtype=tf.int32, shape=[None])
keep_prob = tf.placeholder(dtype=tf.float32)
num_units = 100
n_epoch = 3000

with tf.variable_scope('embedding'):
    rnn_input = tf.contrib.layers.embed_sequence(x,
                                                 vocab_size=embed_ingred_size,
                                                 embed_dim=embed_size)

with tf.variable_scope('rnn'):
    with tf.variable_scope('forward'):
        fw_cells = [GRUCell(num_units) for _ in range(num_layers)]
        fw_cells = [
            DropoutWrapper(fw_cell, output_keep_prob=keep_prob)
            for fw_cell in fw_cells
        ]
        fw_cells = MultiRNNCell(fw_cells)

    with tf.variable_scope('Backward'):
        bw_cells = [GRUCell(num_units) for _ in range(num_layers)]
        bw_cells = [
            DropoutWrapper(bw_cell, output_keep_prob=keep_prob)
            for bw_cell in bw_cells
        ]
        bw_cells = MultiRNNCell(bw_cells)

    outputs, states = bidirectional_dynamic_rnn(
        fw_cells,
        bw_cells,