Exemple #1
0
 def call(self, x):
     W_bar = self.kernel / tf.norm(self.kernel) * self._get_coef()
     kernel = self.kernel
     self.kernel = W_bar
     outputs = Dense.call(self, x)
     self.kernel = kernel
     return outputs
Exemple #2
0
 def call(self, x):
     norms = tf.norm(self.kernel, axis=self.axis, keepdims=True)
     W_bar = self.kernel / norms * self._get_coef()
     kernel = self.kernel
     self.kernel = W_bar
     outputs = Dense.call(self, x)
     self.kernel = kernel
     return outputs
Exemple #3
0
    def call(self, x, training=None):
        if training:
            W_bar, _u, sigma = spectral_normalization(
                self.kernel, self.u, niter=self.niter_spectral)
            self.sig.assign(sigma)
            self.u.assign(_u)
        else:
            W_reshaped = K.reshape(self.kernel, [-1, self.kernel.shape[-1]])
            W_bar = W_reshaped / self.sig

        W_bar = bjorck_normalization(W_bar, niter=self.niter_bjorck)
        W_bar = W_bar * self._get_coef()

        # with tf.control_dependencies([self.u.assign(_u), self.sig.assign(sigma)]):
        W_bar = K.reshape(W_bar, self.kernel.shape)
        kernel = self.kernel
        self.kernel = W_bar
        outputs = Dense.call(self, x)
        self.kernel = kernel
        return outputs
Exemple #4
0
    def call(self, x, **kwargs):

        return Dense.call(self, x)
Exemple #5
0
class CNNEncoder(tf.keras.layers.Layer):
    """ CNNEncoder is a combination of multiple convolutional layers and max
    pooling layers. This is defined as a single layer to be consistent with
    other encoders in terms of input and output specifications.

    Input shape: (batch_size, sequence_length, input_dim).
    Output shape: (batch_size, output_dim).

    The CNN has one convolution layer per each ngram filter size. Each
    convolution operation gives out a vector of size num_filters. The number
    of times a convolution layer will be used depends on the ngram size:
    input_len - ngram_size + 1. The corresponding maxpooling layer aggregates
    all these outputs from the convolution layer and outputs the max.

    This operation is repeated for every ngram size passed, and consequently
    the dimensionality of the output after maxpooling is
    len(ngram_filter_sizes) * num_filters.

    We the use a fully connected layer to project in back to the desired
    output_dim.

    References: "A Sensitivity Analysis of (and Practitioners’ Guide to)
    Convolutional Neural Networks for Sentence Classification",
    Zhang and Wallace 2016, particularly Figure 1.

    Args:
        filters: Integer, the output dim for each convolutional layer.
        kernel_sizes: An integer tuple of list, the kernel sizes of each
            convolutional layers.
        units: After doing convolutions, we'll project the collected features
            into a vecor of this size. If this value is `None`, just return the
            result of the max pooling.
        conv_layer_activation: string of convolutional layer `Activation`.
        l1_regularization: float.
        l2_regularization: float.
    """

    def __init__(self, filters=100, kernel_sizes=(2, 3, 4, 5),
                 conv_layer_activation='relu',
                 l1_regularization=None, l2_regularization=None,
                 units=None,
                 **kwargs):
        self.filters = filters
        self.kernel_sizes = kernel_sizes
        self.units = units
        self.conv_layer_activation = conv_layer_activation
        self.l1_regularization = l1_regularization
        self.l2_regularization = l2_regularization
        self.regularizer = l1_l2(
            l1=l1_regularization if l1_regularization is not None else 0.0,
            l2=l2_regularization if l2_regularization is not None else 0.0)
        self.conv_layers = None
        self.projection_layer = None
        self.trainable_layers = None
        self.output_dim = None

        self.input_spec = [InputSpec(ndim=3)]
        super(CNNEncoder, self).__init__(**kwargs)

    def build(self, input_shape):
        self.conv_layers = [Conv1D(filters=self.filters,
                                   kernel_size=kernel_size,
                                   activation=self.conv_layer_activation,
                                   kernel_regularizer=self.regularizer,
                                   bias_regularizer=self.regularizer)
                            for kernel_size in self.kernel_sizes]
        for conv_layer in self.conv_layers:
            with K.name_scope(conv_layer.name):
                conv_layer.build(input_shape)
        maxpool_output_dim = self.filters * len(self.kernel_sizes)
        if self.units is not None:
            self.projection_layer = Dense(self.units)
            projection_input_shape = (input_shape[0], maxpool_output_dim)
            with K.name_scope(self.projection_layer.name):
                self.projection_layer.build(projection_input_shape)
            self.output_dim = self.units
            self.trainable_layers = self.conv_layers + [self.projection_layer]
        else:
            self.projection_layer = None
            self.output_dim = maxpool_output_dim
            self.trainable_layers = self.conv_layers

        super(CNNEncoder, self).build(input_shape)

    def call(self, inputs, mask=None):
        # Each convolution layer returns output of size (batch_size, conv_length, filters),
        # where `conv_length = num_words - kernel_size + 1`. We then do max
        # pooling over each filter for the whole input sequence, just use K.max,
        # giving a result tensor of shape (batch_size, filters), which then
        # gets projected using the projection layer.
        filter_outputs = [K.max(conv_layer.call(inputs), axis=1)
                          for conv_layer in self.conv_layers]
        maxpool_output = Concatenate()(filter_outputs) \
            if len(filter_outputs) > 1 else filter_outputs[0]
        if self.projection_layer:
            result = self.projection_layer.call(maxpool_output)
        else:
            result = maxpool_output
        return result

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

    def compute_mask(self, inputs, mask=None):
        # By default Keras propagates the mask from a layer that supports masking. We don't need it
        # anymore. So eliminating it from the flow.
        return None

    def get_config(self):
        config = {"filters": self.filters,
                  "kernel_sizes": self.kernel_sizes,
                  "units": self.units,
                  "conv_layer_activation": self.conv_layer_activation,
                  "l1_regularization": self.l1_regularization,
                  "l2_regularization": self.l2_regularization
                  }
        base_config = super(CNNEncoder, self).get_config()
        config.update(base_config)
        return config

    @property
    def trainable_weights(self):
        trainable_weights = []
        for layer in self.trainable_layers:
            trainable_weights.extend(layer.trainable_weights)
        return trainable_weights