예제 #1
0
파일: listener.py 프로젝트: mangataz/nabu
    def encode(self, inputs, input_seq_length, is_training):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: the inputs to the neural network, this is a dictionary of
                [batch_size x time x ...] tensors
            input_seq_length: The sequence lengths of the input utterances, this
                is a dictionary of [batch_size] vectors
            is_training: whether or not the network is in training mode

        Returns:
            - the outputs of the encoder as a dictionary of
                [bath_size x time x ...] tensors
            - the sequence lengths of the outputs as a dictionary of
                [batch_size] tensors
        '''

        encoded = {}
        encoded_seq_length = {}

        for inp in inputs:
            with tf.variable_scope(inp):
                #add input noise
                std_input_noise = float(self.conf['input_noise'])
                if is_training and std_input_noise > 0:
                    noisy_inputs = inputs[inp] + tf.random_normal(
                        tf.shape(inputs[inp]), stddev=std_input_noise)
                else:
                    noisy_inputs = inputs[inp]

                outputs = noisy_inputs
                output_seq_lengths = input_seq_length[inp]
                for l in range(int(self.conf['num_layers'])):
                    outputs, output_seq_lengths = layer.pblstm(
                        inputs=outputs,
                        sequence_length=output_seq_lengths,
                        num_units=int(self.conf['num_units']),
                        num_steps=int(self.conf['pyramid_steps']),
                        scope='layer%d' % l)

                    if float(self.conf['dropout']) < 1 and is_training:
                        outputs = tf.nn.dropout(outputs,
                                                float(self.conf['dropout']))

                outputs = layer.blstm(inputs=outputs,
                                      sequence_length=output_seq_lengths,
                                      num_units=int(self.conf['num_units']),
                                      scope='layer%d' %
                                      int(self.conf['num_layers']))

                if float(self.conf['dropout']) < 1 and is_training:
                    outputs = tf.nn.dropout(outputs,
                                            float(self.conf['dropout']))

                encoded[inp] = outputs
                encoded_seq_length[inp] = output_seq_lengths

        return encoded, encoded_seq_length
예제 #2
0
    def encode(self, inputs, input_seq_length, is_training):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: the inputs to the neural network, this is a dictionary of
                [batch_size x time x ...] tensors
            input_seq_length: The sequence lengths of the input utterances, this
                is a dictionary of [batch_size] vectors
            is_training: whether or not the network is in training mode

        Returns:
            - the outputs of the encoder as a dictionary of
                [bath_size x time x ...] tensors
            - the sequence lengths of the outputs as a dictionary of
                [batch_size] tensors
        '''

        #do the forward computation

        encoded = {}
        encoded_seq_length = {}

        for inp in inputs:
            with tf.variable_scope(inp):
                #add gaussian noise to the inputs
                if is_training and float(self.conf['input_noise']) > 0:
                    logits = inputs[inp] + tf.random_normal(
                        tf.shape(inputs[inp]),
                        stddev=float(self.conf['input_noise']))
                else:
                    logits = inputs[inp]

                for l in range(int(self.conf['num_layers'])):

                    logits = layer.blstm(inputs=logits,
                                         sequence_length=input_seq_length[inp],
                                         num_units=int(self.conf['num_units']),
                                         scope='layer' + str(l))

                    if is_training and float(self.conf['dropout']) < 1:
                        logits = tf.nn.dropout(logits,
                                               float(self.conf['dropout']))

                encoded[inp] = logits
                encoded_seq_length[inp] = input_seq_length[inp]

        return encoded, encoded_seq_length
예제 #3
0
    def encode(self, features, len_feas):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: [batch_size x time x ...] tensor
            input_seq_length: [batch_size] vector
            is_train: whether or not the network is in training mode

        Returns:
            - [bath_size x time x ...] tensor
            - [batch_size] tensor
        '''
        num_pblayers = self.args.model.encoder.num_pblayers
        num_blayers = self.args.model.encoder.num_blayers
        num_cell_units = self.args.model.encoder.num_cell_units
        dropout = self.args.model.encoder.dropout

        outputs = features
        output_seq_lengths = len_feas
        for l in range(num_pblayers):
            outputs, output_seq_lengths = layer.pblstm(
                inputs=outputs,
                sequence_length=output_seq_lengths,
                num_units=num_cell_units,
                num_steps=2,
                layer_norm=True,
                scope='en_pblstm_%d' % l)

            if dropout > 0 and self.is_train:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout)

        for l in range(num_blayers):
            outputs = layer.blstm(inputs=outputs,
                                  sequence_length=output_seq_lengths,
                                  num_units=num_cell_units,
                                  scope='en_blstm_%d' % (l + num_pblayers))

            if dropout > 0 and self.is_train:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout)

        return outputs, output_seq_lengths
예제 #4
0
    def encode(self, features, len_feas):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: [batch_size x time x ...] tensor
            input_seq_length: [batch_size] vector
            is_train: whether or not the network is in training mode

        Returns:
            - [bath_size x time x ...] tensor
            - [batch_size] tensor
        '''
        num_pblayers = self.args.model.encoder.num_pblayers
        num_blayers = self.args.model.encoder.num_blayers
        num_cell_units = self.args.model.encoder.num_cell_units
        dropout = self.args.model.encoder.dropout
        size_feat = self.args.data.dim_input

        # the first cnn layer
        size_batch = tf.shape(features)[0]
        size_length = tf.shape(features)[1]
        x = tf.reshape(features,
                       [size_batch, size_length,
                        int(size_feat / 3), 3])
        x = normal_conv(inputs=x,
                        filter_num=64,
                        kernel=(3, 3),
                        stride=(2, 2),
                        padding='SAME',
                        use_relu=True,
                        name="conv",
                        w_initializer=None,
                        norm_type='layer')
        # conv_output = tf.expand_dims(features, -1)
        # len_sequence = len_feas
        # conv_output, len_sequence, size_feat = conv_layer(
        #     inputs=conv_output,
        #     len_sequence=len_sequence,
        #     size_feat=size_feat,
        #     num_filter=64,
        #     kernel=(3,3),
        #     stride=(2,2),
        #     scope='en_conv_0')

        # the second pblstm layer
        size_feat = int(np.ceil(40 / 2)) * 64
        size_length = tf.cast(tf.ceil(tf.cast(size_length, tf.float32) / 2),
                              tf.int32)
        output_seq_lengths = tf.cast(
            tf.ceil(tf.cast(len_feas, tf.float32) / 2), tf.int32)
        outputs = tf.reshape(x, [size_batch, size_length, size_feat])

        for l in range(num_pblayers):
            outputs, output_seq_lengths = layer.pblstm(
                inputs=outputs,
                sequence_length=output_seq_lengths,
                num_units=num_cell_units,
                num_steps=2,
                layer_norm=True,
                scope='en_pblstm_%d' % l)

            if dropout > 0 and self.is_train:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout)

        # the third blstm layer
        for l in range(num_blayers):
            outputs = layer.blstm(inputs=outputs,
                                  sequence_length=output_seq_lengths,
                                  num_units=num_cell_units,
                                  scope='en_blstm_%d' % (l + num_pblayers))

            if dropout > 0 and self.is_train:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout)

        return outputs, output_seq_lengths