Python BLSTMLayer Beispiele

Programmiersprache: Python

Namespace / Paketname: nabu.neuralnetworks.components.layer

Methode / Funktion: BLSTMLayer

Beispiele auf hotexamples.com: 7

Python BLSTMLayer - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die nabu.neuralnetworks.components.layer.BLSTMLayer, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: dblstm.py Projekt: xiaohanghang/Nabu-MSSS

    def _get_outputs(self, inputs, input_seq_length, is_training):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: the inputs to the neural network, this is a list of
                [batch_size x time x ...] tensors
            input_seq_length: The sequence lengths of the input utterances, this
                is a [batch_size] vector
            is_training: whether or not the network is in training mode

        Returns:
            - output, which is a [batch_size x time x ...] tensors
        '''

        #the blstm layer
        num_units = int(self.conf['num_units'])
        layer_norm = self.conf['layer_norm'] == 'True'
        recurrent_dropout = float(self.conf['recurrent_dropout'])
        if 'activation_fn' in self.conf:
            if self.conf['activation_fn'] == 'tanh':
                activation_fn = tf.nn.tanh
            elif self.conf['activation_fn'] == 'relu':
                activation_fn = tf.nn.relu
            elif self.conf['activation_fn'] == 'sigmoid':
                activation_fn = tf.nn.sigmoid
            else:
                raise Exception('Undefined activation function: %s' %
                                activation_fn)
        else:
            activation_fn = tf.nn.tanh

        blstm = layer.BLSTMLayer(num_units=num_units,
                                 layer_norm=layer_norm,
                                 recurrent_dropout=recurrent_dropout,
                                 activation_fn=activation_fn)

        #code not available for multiple inputs!!
        if len(inputs) > 1:
            raise 'The implementation of DBLSTM expects 1 input and not %d' % len(
                inputs)
        else:
            inputs = inputs[0]

        with tf.variable_scope(self.scope):
            if is_training and float(self.conf['input_noise']) > 0:
                inputs = inputs + tf.random_normal(
                    tf.shape(inputs), stddev=float(self.conf['input_noise']))

            logits = inputs

            for l in range(int(self.conf['num_layers'])):
                logits = blstm(logits, input_seq_length, 'layer' + str(l))

                if is_training and float(self.conf['dropout']) < 1:
                    logits = tf.nn.dropout(logits, float(self.conf['dropout']))

            output = logits

        return output

Beispiel #2

Datei anzeigen

Datei: dblstm.py Projekt: Kaatje95/Nabu-MSSS

    def _get_outputs(self, inputs, input_seq_length, is_training):
        '''
        Create the variables and do the forward computation

        Args:
            inputs: the inputs to the neural network, this is a dictionary of
                [batch_size x time x ...] tensors
            input_seq_length: The sequence lengths of the input utterances, this
                is a dictionary of [batch_size] vectors
            is_training: whether or not the network is in training mode

        Returns:
            - outputs, which is a dictionary of [batch_size x time x ...]
                tensors
        '''

        #the blstm layer
        blstm = layer.BLSTMLayer(num_units=int(self.conf['num_units']),
                                 layer_norm=self.conf['layer_norm'] == 'True',
                                 recurrent_dropout=float(
                                     self.conf['recurrent_dropout']))

        #do the forward computation
        outputs = {}

        with tf.variable_scope(self.scope):
            for inp in inputs:
                if is_training and float(self.conf['input_noise']) > 0:
                    inputs[inp] = inputs[inp] + tf.random_normal(
                        tf.shape(inputs[inp]),
                        stddev=float(self.conf['input_noise']))

            #code not available for multiple inputs
            for o in self.output_dims:
                logits = inputs.values()[0]

                for l in range(int(self.conf['num_layers'])):
                    logits = blstm(logits,
                                   input_seq_length.values()[0],
                                   'layer' + str(l))

                if is_training and float(self.conf['dropout']) < 1:
                    logits = tf.nn.dropout(logits, float(self.conf['dropout']))

                output = tf.contrib.layers.linear(
                    inputs=logits,
                    num_outputs=self.output_dims[o],
                    scope='outlayer')

                outputs[o] = output

        return outputs

Beispiel #3

Datei anzeigen

Datei: dblstm.py Projekt: JasperMertens/Nabu-MSSS

    def _get_outputs(self, inputs, input_seq_length, is_training):
        """
		Create the variables and do the forward computation
		Args:
			inputs: the inputs to the neural network, this is a list of
				[batch_size x time x ...] tensors
			input_seq_length: The sequence lengths of the input utterances, this
				is a [batch_size] vector
			is_training: whether or not the network is in training mode
		Returns:
			- output, which is a [batch_size x time x ...] tensors
		"""

        # the blstm layer
        num_layers = int(self.conf['num_layers'])
        num_units_first_layer = int(self.conf['num_units'])
        if 'fac_per_layer' in self.conf:
            fac_per_layer = float(self.conf['fac_per_layer'])
        else:
            fac_per_layer = 1.0
        num_units = [
            int(math.ceil(num_units_first_layer * (fac_per_layer**l)))
            for l in range(num_layers)
        ]

        layer_norm = self.conf['layer_norm'] == 'True'
        recurrent_dropout = float(self.conf['recurrent_dropout'])
        if 'activation_fn' in self.conf:
            if self.conf['activation_fn'] == 'tanh':
                activation_fn = tf.nn.tanh
            elif self.conf['activation_fn'] == 'relu':
                activation_fn = tf.nn.relu
            elif self.conf['activation_fn'] == 'sigmoid':
                activation_fn = tf.nn.sigmoid
            else:
                raise Exception('Undefined activation function: %s' %
                                self.conf['activation_fn'])
        else:
            activation_fn = tf.nn.tanh

        separate_directions = False
        if 'separate_directions' in self.conf and self.conf[
                'separate_directions'] == 'True':
            separate_directions = True

        blstm_layers = []
        for l in range(num_layers):
            blstm_layers.append(
                layer.BLSTMLayer(num_units=num_units[l],
                                 layer_norm=layer_norm,
                                 recurrent_dropout=recurrent_dropout,
                                 activation_fn=activation_fn,
                                 separate_directions=separate_directions,
                                 fast_version=False))

        # code not available for multiple inputs!!
        if len(inputs) > 1:
            raise 'The implementation of DBLSTM expects 1 input and not %d' % len(
                inputs)
        else:
            inputs = inputs[0]
        if num_layers == 0:
            output = inputs
            return output

        with tf.variable_scope(self.scope):
            if is_training and float(self.conf['input_noise']) > 0:
                inputs = inputs + tf.random_normal(
                    tf.shape(inputs), stddev=float(self.conf['input_noise']))

            logits = inputs

            if separate_directions:
                logits = (logits, logits)

            for l in range(num_layers):
                logits = blstm_layers[l](logits, input_seq_length,
                                         'layer' + str(l))

                if is_training and float(self.conf['dropout']) < 1:
                    logits = tf.nn.dropout(logits, float(self.conf['dropout']))

            output = logits
            if separate_directions:
                output = tf.concat(output, 2)

        return output

Beispiel #4

Datei anzeigen

Datei: dblstm.py Projekt: JeroenZegers/Nabu-MSSS

	def _get_outputs(self, inputs, input_seq_length, is_training):
		"""
		Create the variables and do the forward computation

		Args:
			inputs: the inputs to the neural network, this is a list of
				[batch_size x time x ...] tensors
			input_seq_length: The sequence lengths of the input utterances, this
				is a [batch_size] vector
			is_training: whether or not the network is in training mode

		Returns:
			- output, which is a [batch_size x time x ...] tensors
		"""

		# the blstm layer
		num_layers = int(self.conf['num_layers'])
		num_units_first_layer = int(self.conf['num_units'])
		if 'fac_per_layer' in self.conf:
			fac_per_layer = float(self.conf['fac_per_layer'])
		else:
			fac_per_layer = 1.0
		num_units = [
			int(math.ceil(num_units_first_layer*(fac_per_layer**l)))
			for l in range(num_layers)]

		layer_norm = self.conf['layer_norm'] == 'True'
		recurrent_dropout = float(self.conf['recurrent_dropout'])
		if 'activation_fn' in self.conf:
			if self.conf['activation_fn'] == 'tanh':
				activation_fn = tf.nn.tanh
			elif self.conf['activation_fn'] == 'relu':
				activation_fn = tf.nn.relu
			elif self.conf['activation_fn'] == 'sigmoid':
				activation_fn = tf.nn.sigmoid
			else:
				raise Exception('Undefined activation function: %s' % self.conf['activation_fn'])
		else:
			activation_fn = tf.nn.tanh

		# Taking only the last frame output makes less sense in a bi-directional network
		only_last_frame = 'only_last_frame' in self.conf and self.conf['only_last_frame'] == 'True'

		separate_directions = False
		if 'separate_directions' in self.conf and self.conf['separate_directions'] == 'True':
			separate_directions = True

		allow_more_than_3dim = False
		if 'allow_more_than_3dim' in self.conf and self.conf['allow_more_than_3dim'] == 'True':
			# Assuming time dimension is one to last
			allow_more_than_3dim = True

		blstm_layers = []
		for l in range(num_layers):
			blstm_layers.append(layer.BLSTMLayer(
				num_units=num_units[l],
				layer_norm=layer_norm,
				recurrent_dropout=recurrent_dropout,
				activation_fn=activation_fn,
				separate_directions=separate_directions,
				fast_version=False))
	
		# code not available for multiple inputs!!
		if len(inputs) > 1:
			raise 'The implementation of DBLSTM expects 1 input and not %d' % len(inputs)
		else:
			inputs = inputs[0]
		if num_layers == 0:
			output = inputs
			return output

		with tf.variable_scope(self.scope):
			if is_training and float(self.conf['input_noise']) > 0:
				inputs = inputs + tf.random_normal(
					tf.shape(inputs),
					stddev=float(self.conf['input_noise']))

			input_shape = inputs.get_shape()
			input_reshaped = False
			if len(input_shape) > 3:
				if allow_more_than_3dim:
					batch_size = input_shape[0]
					other_dims = input_shape[1:-2]
					num_inp_units = input_shape[-1]
					inputs = tf.reshape(inputs, [batch_size * np.prod(other_dims), -1, num_inp_units])
					input_seq_length = tf.expand_dims(input_seq_length, -1)
					input_seq_length = tf.tile(input_seq_length, [1, np.prod(other_dims)])
					input_seq_length = tf.reshape(input_seq_length, [-1])
					input_reshaped = True
				else:
					raise BaseException('Input has to many dimensions')

			logits = inputs

			if separate_directions:
				logits = (logits, logits)

			for l in range(num_layers):
				logits = blstm_layers[l](logits, input_seq_length, 'layer' + str(l))

				if is_training and float(self.conf['dropout']) < 1:
					logits = tf.nn.dropout(logits, float(self.conf['dropout']))

			output = logits

			if separate_directions:
				output = tf.concat(output, 2)

			if input_reshaped:
				output_shape = output.get_shape()
				num_output_units = output_shape[-1]
				output = tf.reshape(output, tf.stack([batch_size] + other_dims.as_list() + [-1] + [num_output_units], 0))

			if only_last_frame:
				output_rank = len(output.get_shape())
				if output_rank == 3:
					output = output[:, -1, :]
				elif output_rank == 4 and allow_more_than_3dim:
					output = output[:, :, -1, :]
				else:
					raise BaseException('Not yet implemented for rank different from 3 (or 4)')

		return output

Beispiel #5

Datei anzeigen

    def _get_outputs(self, inputs):
        """
		Create the variables and do the forward computation

		Args:
			inputs: see NTTstate

		Returns:
			- output, which is a [batch_size x time x ...] tensors
		"""
        print 'Ik kan wrs gwn concat, dblstm en feedforward model hergebruiken'
        num_layers = 2
        num_units = [600, 600]
        layer_norm = False
        recurrent_dropout = 1.0
        activation_fn = tf.nn.tanh

        blstm_layers = []
        for l in range(num_layers):
            blstm_layers.append(
                layer.BLSTMLayer(num_units=num_units[l],
                                 layer_norm=layer_norm,
                                 recurrent_dropout=recurrent_dropout,
                                 activation_fn=activation_fn))

        next_iter_count = inputs.iter_count + 1
        input_spec = inputs.input_spec
        input_seq_length = inputs.input_seq_length
        res_mask = inputs.res_mask
        all_masks = inputs.all_masks

        logits = tf.concat([input_spec, res_mask], -1)

        for l in range(num_layers):
            logits = blstm_layers[l](logits, input_seq_length,
                                     'layer' + str(l))

        mask = tf.contrib.layers.fully_connected(inputs=logits,
                                                 num_outputs=129,
                                                 activation_fn=tf.nn.sigmoid)

        new_res_mask = res_mask - mask
        new_res_mask = tf.clip_by_value(new_res_mask, 0, 1)

        all_masks_shape = all_masks.get_shape()
        all_masks = tf.concat([
            all_masks[:, :, :, :inputs.iter_count],
            tf.expand_dims(mask, -1), all_masks[:, :, :,
                                                inputs.iter_count + 1:]
        ],
                              axis=-1)
        all_masks.set_shape(all_masks_shape)
        # all_masks=tf.scatter_update(
        # 	ref=all_masks,
        # 	indices=inputs.iter_count,
        # 	updates=mask
        # )
        # tf.assign(all_masks[inputs.iter_count], mask)
        # all_masks[inputs.iter_count] = mask

        new_ntt_state = NTTState(iter_count=next_iter_count,
                                 input_spec=input_spec,
                                 input_seq_length=input_seq_length,
                                 res_mask=new_res_mask,
                                 all_masks=all_masks)
        return new_ntt_state

Beispiel #6

Datei anzeigen

    def _get_outputs(self, inputs, input_seq_length, is_training):
        """
		Create the variables and do the forward computation

		Args:
			inputs: the inputs to the neural network, this is a list of
				[batch_size x time x ...] tensors
			input_seq_length: The sequence lengths of the input utterances, this
				is a [batch_size] vector
			is_training: whether or not the network is in training mode

		Returns:
			- output, which is a [batch_size x time x ...] tensors
		"""

        # CNN hyper params
        if 'filters' in self.conf:
            kernel_size_lay1 = map(int, self.conf['filters'].split(' '))
        elif 'filter_size_t' in self.conf and 'filter_size_f' in self.conf:
            kernel_size_t_lay1 = int(self.conf['filter_size_t'])
            kernel_size_f_lay1 = int(self.conf['filter_size_f'])
            kernel_size_lay1 = [kernel_size_t_lay1, kernel_size_f_lay1]
        else:
            raise ValueError('Kernel convolution size not specified.')
        if 'filter_size_t' in self.conf and 'filter_size_f' in self.conf:
            kernel_size_t_fac_after_pool = float(
                self.conf['filter_size_t_fac_after_pool'])
            kernel_size_f_fac_after_pool = float(
                self.conf['filter_size_f_fac_after_pool'])
            kernel_fac_after_pool = [
                kernel_size_t_fac_after_pool, kernel_size_f_fac_after_pool
            ]
        else:
            kernel_fac_after_pool = [1, 1]

        f_pool_rate = int(self.conf['f_pool_rate'])
        t_pool_rate = int(self.conf['t_pool_rate'])
        num_encoder_layers = int(self.conf['num_encoder_layers'])
        if t_pool_rate <= num_encoder_layers:
            raise BaseException(
                'Expecting that not time pooling takes place. Need to adapt input sequence length tensor for lstm part '
                'if time pooling is wanted')
        num_decoder_layers = num_encoder_layers
        num_filters_1st_layer = int(self.conf['num_filters_1st_layer'])
        fac_per_layer = float(self.conf['fac_per_layer'])
        num_filters_enc = [
            int(math.ceil(num_filters_1st_layer * (fac_per_layer**l)))
            for l in range(num_encoder_layers)
        ]
        num_filters_dec = num_filters_enc[::-1]
        num_filters_dec = num_filters_dec[1:] + [(int(
            self.conf['num_output_filters']))]

        kernel_size_enc = []
        ideal_kernel_size_enc = [kernel_size_lay1]

        bypass = self.conf['bypass']

        layer_norm = self.conf['layer_norm'] == 'True'

        if 'activation_fn' in self.conf:
            if self.conf['activation_fn'] == 'tanh':
                activation_fn = tf.nn.tanh
            elif self.conf['activation_fn'] == 'relu':
                activation_fn = tf.nn.relu
            elif self.conf['activation_fn'] == 'sigmoid':
                activation_fn = tf.nn.sigmoid
            else:
                raise Exception('Undefined activation function: %s' %
                                self.conf['activation_fn'])
        else:
            activation_fn = tf.nn.relu

        # LSTM hyper parameters
        lstm_num_layers = int(self.conf['lstm_num_layers'])
        lstm_num_units_first_layer = int(self.conf['lstm_num_units'])
        if 'lstm_fac_per_layer' in self.conf:
            lstm_fac_per_layer = float(self.conf['lstm_fac_per_layer'])
        else:
            lstm_fac_per_layer = 1.0
        lstm_num_units = [
            int(math.ceil(lstm_num_units_first_layer *
                          (lstm_fac_per_layer**l)))
            for l in range(lstm_num_layers)
        ]

        recurrent_dropout = float(self.conf['recurrent_dropout'])
        if 'lstm_activation_fn' in self.conf:
            if self.conf['lstm_activation_fn'] == 'tanh':
                lstm_activation_fn = tf.nn.tanh
            elif self.conf['lstm_activation_fn'] == 'relu':
                lstm_activation_fn = tf.nn.relu
            elif self.conf['lstm_activation_fn'] == 'sigmoid':
                lstm_activation_fn = tf.nn.sigmoid
            else:
                raise Exception('Undefined LSTM activation function: %s' %
                                self.conf['lstm_activation_fn'])
        else:
            lstm_activation_fn = tf.nn.tanh

        separate_directions = False
        if 'separate_directions' in self.conf and self.conf[
                'separate_directions'] == 'True':
            separate_directions = True

        # the encoder layers
        encoder_layers = []
        for l in range(num_encoder_layers):
            kernel_size_l = copy.deepcopy(ideal_kernel_size_enc[l])
            kernel_size_l_plus_1 = kernel_size_l
            kernel_size_l = [int(math.ceil(k)) for k in kernel_size_l]
            kernel_size_enc.append(kernel_size_l)

            num_filters_l = num_filters_enc[l]

            max_pool_filter = [1, 1]
            if np.mod(l + 1, t_pool_rate) == 0:
                max_pool_filter[0] = 2
                kernel_size_l_plus_1[
                    0] = kernel_size_l_plus_1[0] * kernel_fac_after_pool[0]
            if np.mod(l + 1, f_pool_rate) == 0:
                max_pool_filter[1] = 2
                kernel_size_l_plus_1[
                    1] = kernel_size_l_plus_1[1] * kernel_fac_after_pool[1]
            ideal_kernel_size_enc.append(kernel_size_l_plus_1)

            encoder_layers.append(
                layer.Conv2D(num_filters=num_filters_l,
                             kernel_size=kernel_size_l,
                             strides=(1, 1),
                             padding='same',
                             activation_fn=activation_fn,
                             layer_norm=layer_norm,
                             max_pool_filter=max_pool_filter))

        # the centre LSTM layers
        blstm_layers = []
        for l in range(lstm_num_layers):
            blstm_layers.append(
                layer.BLSTMLayer(num_units=lstm_num_units[l],
                                 layer_norm=layer_norm,
                                 recurrent_dropout=recurrent_dropout,
                                 activation_fn=lstm_activation_fn,
                                 separate_directions=separate_directions,
                                 fast_version=False))

        # the decoder layers
        decoder_layers = []
        for l in range(num_decoder_layers):
            corresponding_encoder_l = num_encoder_layers - 1 - l
            num_filters_l = num_filters_dec[l]
            kernel_size_l = kernel_size_enc[corresponding_encoder_l]
            if bypass == 'unpool':
                strides = [1, 1]
            else:
                strides = encoder_layers[
                    corresponding_encoder_l].max_pool_filter

            decoder_layers.append(
                layer.Conv2D(num_filters=num_filters_l,
                             kernel_size=kernel_size_l,
                             strides=strides,
                             padding='same',
                             activation_fn=activation_fn,
                             layer_norm=layer_norm,
                             max_pool_filter=(1, 1),
                             transpose=True))

        # code not available for multiple inputs!!
        if len(inputs) > 1:
            raise 'The implementation of DCNN expects 1 input and not %d' % len(
                inputs)
        else:
            inputs = inputs[0]
        if (num_encoder_layers + lstm_num_layers + num_decoder_layers) == 0:
            output = inputs
            return output

        # Convolutional layers expect input channels, making 1 here.
        inputs = tf.expand_dims(inputs, -1)
        with tf.variable_scope(self.scope):
            if is_training and float(self.conf['input_noise']) > 0:
                inputs = inputs + tf.random_normal(
                    tf.shape(inputs), stddev=float(self.conf['input_noise']))

            logits = inputs

            with tf.variable_scope('encoder'):
                encoder_outputs = []
                encoder_outputs_before_pool = []
                for l in range(num_encoder_layers):
                    with tf.variable_scope('layer_%s' % l):

                        logits, outputs_before_pool = encoder_layers[l](logits)

                        encoder_outputs.append(logits)
                        encoder_outputs_before_pool.append(outputs_before_pool)

                        if is_training and float(self.conf['dropout']) < 1:
                            raise Exception(
                                'have to check whether dropout is implemented correctly'
                            )
                            # logits = tf.nn.dropout(logits, float(self.conf['dropout']))

            with tf.variable_scope('lstm_centre'):
                [batch_size, _, new_freq_dim, num_chan] = logits.get_shape()
                logits = tf.transpose(logits, [0, 2, 1, 3])
                logits = tf.reshape(logits,
                                    [batch_size * new_freq_dim, -1, num_chan])
                tmp_input_seq_length = tf.expand_dims(input_seq_length, 1)
                tmp_input_seq_length = tf.tile(tmp_input_seq_length,
                                               [1, new_freq_dim])
                tmp_input_seq_length = tf.reshape(tmp_input_seq_length,
                                                  [batch_size * new_freq_dim])
                for l in range(lstm_num_layers):
                    with tf.variable_scope('layer_%s' % l):

                        logits = blstm_layers[l](logits, tmp_input_seq_length)

                        if is_training and float(self.conf['dropout']) < 1:
                            raise Exception(
                                'have to check whether dropout is implemented correctly'
                            )
                            # logits = tf.nn.dropout(logits, float(self.conf['dropout']))

                logits = tf.reshape(
                    logits,
                    [batch_size, new_freq_dim, -1, 2 * lstm_num_units[-1]])
                logits = tf.transpose(logits, [0, 2, 1, 3])

            with tf.variable_scope('decoder'):
                for l in range(num_decoder_layers):
                    with tf.variable_scope('layer_%s' % l):
                        corresponding_encoder_l = num_encoder_layers - 1 - l
                        corresponding_encoder_output = encoder_outputs[
                            corresponding_encoder_l]
                        corresponding_encoder_output_before_pool = encoder_outputs_before_pool[
                            corresponding_encoder_l]
                        corresponding_encoder_max_pool_filter = encoder_layers[
                            corresponding_encoder_l].max_pool_filter
                        if bypass == 'True' and (lstm_num_layers > 0 or l > 0):
                            # don't use bypass for layer 0 if no centre layers
                            decoder_input = tf.concat(
                                [logits, corresponding_encoder_output], -1)
                        else:
                            decoder_input = logits

                        if bypass == 'unpool' and corresponding_encoder_max_pool_filter != [
                                1, 1
                        ]:
                            decoder_input = layer.unpool(
                                pool_input=
                                corresponding_encoder_output_before_pool,
                                pool_output=corresponding_encoder_output,
                                unpool_input=decoder_input,
                                pool_kernel_size=
                                corresponding_encoder_max_pool_filter,
                                pool_stride=
                                corresponding_encoder_max_pool_filter,
                                padding='VALID')

                        logits, _ = decoder_layers[l](decoder_input)

                        if is_training and float(self.conf['dropout']) < 1:
                            raise Exception(
                                'have to check whether dropout is implemented correctly'
                            )
                            # logits = tf.nn.dropout(logits, float(self.conf['dropout']))

                        # get wanted output size
                        if corresponding_encoder_l == 0:
                            wanted_size = tf.shape(inputs)
                        else:
                            wanted_size = tf.shape(
                                encoder_outputs[corresponding_encoder_l - 1])
                        wanted_t_size = wanted_size[1]
                        wanted_f_size = wanted_size[2]

                        # get actual output size
                        output_size = tf.shape(logits)
                        output_t_size = output_size[1]
                        output_f_size = output_size[2]

                        # compensate for potential mismatch, by adding duplicates
                        missing_t_size = wanted_t_size - output_t_size
                        missing_f_size = wanted_f_size - output_f_size

                        last_t_slice = tf.expand_dims(logits[:, -1, :, :], 1)
                        duplicate_logits = tf.tile(last_t_slice,
                                                   [1, missing_t_size, 1, 1])
                        logits = tf.concat([logits, duplicate_logits], 1)
                        last_f_slice = tf.expand_dims(logits[:, :, -1, :], 2)
                        duplicate_logits = tf.tile(last_f_slice,
                                                   [1, 1, missing_f_size, 1])
                        logits = tf.concat([logits, duplicate_logits], 2)

            # set the shape of the logits as we know
            dyn_shape = logits.get_shape().as_list()
            dyn_shape[-2] = inputs.get_shape()[-2]
            logits.set_shape(dyn_shape)
            output = logits

        return output

Beispiel #7

Datei anzeigen

Datei: dblstm_capsnet.py Projekt: JeroenZegers/Nabu-MSSS

	def  _get_outputs(self, inputs, input_seq_length, is_training):
		"""
		Create the variables and do the forward computation
		
		Args:
			inputs: the inputs to the neural network, this is a list of
				[batch_size x time x ...] tensors
			input_seq_length: The sequence lengths of the input utterances, this
				is a [batch_size] vector
			is_training: whether or not the network is in training mode
		
		Returns:
			- output, which is a [batch_size x time x ...] tensors
		"""

		num_capsules = int(self.conf['num_capsules'])
		capsule_dim=int(self.conf['capsule_dim'])
		routing_iters=int(self.conf['routing_iters'])
		
		if 'recurrent_probability_fn' in self.conf:
			if self.conf['recurrent_probability_fn'] == 'sigmoid':
				recurrent_probability_fn = tf.nn.sigmoid
			elif self.conf['recurrent_probability_fn'] == 'unit':
				recurrent_probability_fn = ops.unit_activation
		else:
			recurrent_probability_fn = None

		if 'accumulate_input_logits' in self.conf and self.conf['accumulate_input_logits']=='False':
			accumulate_input_logits = False
		else:
			accumulate_input_logits = True

		if 'accumulate_state_logits' in self.conf and self.conf['accumulate_state_logits']=='False':
			accumulate_state_logits = False
		else:
			accumulate_state_logits = True

		if 'logits_prior' in self.conf and self.conf['logits_prior']=='True':
			logits_prior = True
		else:
			logits_prior = False

		gates_fc = self.conf['gates_fc'] == 'True'
		use_output_matrix = self.conf['use_output_matrix'] == 'True'

		# code not available for multiple inputs!!
		if len(inputs) > 1:
			raise 'The implementation of CapsNet expects 1 input and not %d' %len(inputs)
		else:
			inputs=inputs[0]
			
		with tf.variable_scope(self.scope):
			if is_training and float(self.conf['input_noise']) > 0:
				inputs = inputs + tf.random_normal(
					tf.shape(inputs),
					stddev=float(self.conf['input_noise']))

			# Primary capsule.
			with tf.variable_scope('primary_capsule'):
				output = tf.identity(inputs, 'inputs')
				input_seq_length = tf.identity(input_seq_length, 'input_seq_length')
				
				# First layer is simple bidirectional rnn layer, without activation (squash activation
				# will be applied later)
				primary_output_dim = num_capsules*capsule_dim
				primary_capsules_layer = layer.BLSTMLayer(num_units=primary_output_dim, linear_out_flag=True)
				
				primary_capsules = primary_capsules_layer(output, input_seq_length)
				primary_capsules = tf.reshape(primary_capsules, [output.shape[0].value, tf.shape(output)[1],
																 num_capsules*2, capsule_dim])
		
				primary_capsules = ops.squash(primary_capsules)
		
				output = tf.identity(primary_capsules, 'primary_capsules')

			# non-primary capsules
			for l in range(1, int(self.conf['num_layers'])):
				with tf.variable_scope('layer%d' % l):
					# a capsule layer
					caps_blstm_layer = layer.BLSTMCapsuleLayer(num_capsules=num_capsules, capsule_dim=capsule_dim,
															   routing_iters=routing_iters,
															   recurrent_probability_fn=recurrent_probability_fn,
															   logits_prior=logits_prior,
															   accumulate_input_logits=accumulate_input_logits,
															   accumulate_state_logits=accumulate_state_logits,
															   gates_fc=gates_fc,
															   use_output_matrix=use_output_matrix)
					
					output = caps_blstm_layer(output, input_seq_length)
		
					if is_training and float(self.conf['dropout']) < 1:
						output = tf.nn.dropout(output, float(self.conf['dropout']))
			
			output_dim = num_capsules*2*capsule_dim
			output = tf.reshape(output, [output.shape[0].value, tf.shape(output)[1], output_dim])

		return output