Beispiel #1
0
class ResidualBlock(Layer):
    def __init__(self,
                 dilation_rate,
                 nb_filters,
                 kernel_size,
                 padding,
                 activation='relu',
                 dropout_rate=0,
                 kernel_initializer='he_normal',
                 use_batch_norm=False,
                 last_block=True,
                 **kwargs):

        # type: (int, int, int, str, str, float, str, bool, dict) -> None
        """Defines the residual block for the WaveNet TCN

        Args:
            x: The previous layer in the model
            training: boolean indicating whether the layer should behave in training mode or in inference mode
            dilation_rate: The dilation power of 2 we are using for this residual block
            nb_filters: The number of convolutional filters to use in this block
            kernel_size: The size of the convolutional kernel
            padding: The padding used in the convolutional layers, 'same' or 'causal'.
            activation: The final activation used in o = Activation(x + F(x))
            dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
            kernel_initializer: Initializer for the kernel weights matrix (Conv1D).
            use_batch_norm: Whether to use batch normalization in the residual layers or not.
            kwargs: Any initializers for Layer class.
        """

        self.dilation_rate = dilation_rate
        self.nb_filters = nb_filters
        self.kernel_size = kernel_size
        self.padding = padding
        self.activation = activation
        self.dropout_rate = dropout_rate
        self.use_batch_norm = use_batch_norm
        self.kernel_initializer = kernel_initializer
        self.last_block = last_block

        super(ResidualBlock, self).__init__(**kwargs)

    def _add_and_activate_layer(self, layer):
        """Helper function for building layer

        Args:
            layer: Appends layer to internal layer list and builds it based on the current output
                   shape of ResidualBlock. Updates current output shape.

        """
        self.residual_layers.append(layer)
        self.residual_layers[-1].build(self.res_output_shape)
        self.res_output_shape = self.residual_layers[-1].compute_output_shape(
            self.res_output_shape)

    def build(self, input_shape):

        with K.name_scope(
                self.name
        ):  # name scope used to make sure weights get unique names
            self.residual_layers = list()
            self.res_output_shape = input_shape

            for k in range(2):
                name = 'conv1D_{}'.format(k)
                with K.name_scope(
                        name
                ):  # name scope used to make sure weights get unique names
                    self._add_and_activate_layer(
                        Conv1D(filters=self.nb_filters,
                               kernel_size=self.kernel_size,
                               dilation_rate=self.dilation_rate,
                               padding=self.padding,
                               name=name,
                               kernel_initializer=self.kernel_initializer))

                if self.use_batch_norm:
                    # TODO should be WeightNorm here, but using batchNorm instead
                    self._add_and_activate_layer(BatchNormalization())

                self._add_and_activate_layer(Activation('relu'))
                self._add_and_activate_layer(
                    SpatialDropout1D(rate=self.dropout_rate))

            if not self.last_block:
                # 1x1 conv to match the shapes (channel dimension).
                name = 'conv1D_{}'.format(k + 1)
                with K.name_scope(name):
                    # make and build this layer separately because it directly uses input_shape
                    self.shape_match_conv = Conv1D(
                        filters=self.nb_filters,
                        kernel_size=1,
                        padding='same',
                        name=name,
                        kernel_initializer=self.kernel_initializer)

            else:
                self.shape_match_conv = Lambda(lambda x: x, name='identity')

            self.shape_match_conv.build(input_shape)
            self.res_output_shape = self.shape_match_conv.compute_output_shape(
                input_shape)

            self.final_activation = Activation(self.activation)
            self.final_activation.build(
                self.res_output_shape)  # probably isn't necessary

            # this is done to force keras to add the layers in the list to self._layers
            for layer in self.residual_layers:
                self.__setattr__(layer.name, layer)

            super(ResidualBlock, self).build(
                input_shape)  # done to make sure self.built is set True

    def call(self, inputs, training=None):
        """

        Returns: A tuple where the first element is the residual model tensor, and the second
                 is the skip connection tensor.
        """
        x = inputs
        for layer in self.residual_layers:
            if isinstance(layer, SpatialDropout1D):
                x = layer(x, training=training)
            else:
                x = layer(x)

        x2 = self.shape_match_conv(inputs)
        res_x = add([x2, x])
        return [self.final_activation(res_x), x]

    def compute_output_shape(self, input_shape):
        return [self.res_output_shape, self.res_output_shape]
Beispiel #2
0
class TCN(Layer):
    """Creates a TCN layer.

        Input shape:
            A tensor of shape (batch_size, timesteps, input_dim).

        Args:
            nb_filters: The number of filters to use in the convolutional layers.
            kernel_size: The size of the kernel to use in each convolutional layer.
            dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64].
            nb_stacks : The number of stacks of residual blocks to use.
            padding: The padding to use in the convolutional layers, 'causal' or 'same'.
            use_skip_connections: Boolean. If we want to add skip connections from input to each residual block.
            return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
            activation: The activation used in the residual blocks o = Activation(x + F(x)).
            dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
            kernel_initializer: Initializer for the kernel weights matrix (Conv1D).
            use_batch_norm: Whether to use batch normalization in the residual layers or not.
            kwargs: Any other arguments for configuring parent class Layer. For example "name=str", Name of the model.
                    Use unique names when using multiple TCN.

        Returns:
            A TCN layer.
        """
    def __init__(self,
                 nb_filters=64,
                 kernel_size=2,
                 nb_stacks=1,
                 dilations=(1, 2, 4, 8, 16, 32),
                 padding='causal',
                 use_skip_connections=True,
                 dropout_rate=0.0,
                 return_sequences=False,
                 activation='linear',
                 kernel_initializer='he_normal',
                 use_batch_norm=False,
                 **kwargs):

        self.return_sequences = return_sequences
        self.dropout_rate = dropout_rate
        self.use_skip_connections = use_skip_connections
        self.dilations = dilations
        self.nb_stacks = nb_stacks
        self.kernel_size = kernel_size
        self.nb_filters = nb_filters
        self.activation = activation
        self.padding = padding
        self.kernel_initializer = kernel_initializer
        self.use_batch_norm = use_batch_norm

        if padding != 'causal' and padding != 'same':
            raise ValueError(
                "Only 'causal' or 'same' padding are compatible for this layer."
            )

        if not isinstance(nb_filters, int):
            print('An interface change occurred after the version 2.1.2.')
            print('Before: tcn.TCN(x, return_sequences=False, ...)')
            print('Now should be: tcn.TCN(return_sequences=False, ...)(x)')
            print(
                'The alternative is to downgrade to 2.1.2 (pip install keras-tcn==2.1.2).'
            )
            raise Exception()

        # initialize parent class
        super(TCN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.main_conv1D = Conv1D(filters=self.nb_filters,
                                  kernel_size=1,
                                  padding=self.padding,
                                  kernel_initializer=self.kernel_initializer)
        self.main_conv1D.build(input_shape)

        # member to hold current output shape of the layer for building purposes
        self.build_output_shape = self.main_conv1D.compute_output_shape(
            input_shape)

        # list to hold all the member ResidualBlocks
        self.residual_blocks = list()
        total_num_blocks = self.nb_stacks * len(self.dilations)
        if not self.use_skip_connections:
            total_num_blocks += 1  # cheap way to do a false case for below

        for s in range(self.nb_stacks):
            for d in self.dilations:
                self.residual_blocks.append(
                    ResidualBlock(dilation_rate=d,
                                  nb_filters=self.nb_filters,
                                  kernel_size=self.kernel_size,
                                  padding=self.padding,
                                  activation=self.activation,
                                  dropout_rate=self.dropout_rate,
                                  use_batch_norm=self.use_batch_norm,
                                  kernel_initializer=self.kernel_initializer,
                                  last_block=len(self.residual_blocks) +
                                  1 == total_num_blocks,
                                  name='residual_block_{}'.format(
                                      len(self.residual_blocks))))
                # build newest residual block
                self.residual_blocks[-1].build(self.build_output_shape)
                self.build_output_shape = self.residual_blocks[
                    -1].res_output_shape

        # this is done to force keras to add the layers in the list to self._layers
        for layer in self.residual_blocks:
            self.__setattr__(layer.name, layer)

        self.lambda_layer = Lambda(lambda tt: tt[:, -1, :])
        self.lambda_ouput_shape = self.lambda_layer.compute_output_shape(
            self.build_output_shape)

    def compute_output_shape(self, input_shape):
        """
        Overridden in case keras uses it somewhere... no idea. Just trying to avoid future errors.
        """
        if not self.built:
            self.build(input_shape)
        if not self.return_sequences:
            return self.lambda_ouput_shape
        else:
            return self.build_output_shape

    def call(self, inputs, training=None):
        x = inputs
        x = self.main_conv1D(x)
        skip_connections = list()
        for layer in self.residual_blocks:
            x, skip_out = layer(x, training=training)
            skip_connections.append(skip_out)

        if self.use_skip_connections:
            x = add(skip_connections)
        if not self.return_sequences:
            x = self.lambda_layer(x)
        return x

    def get_config(self):
        """
        Returns the config of a the layer. This is used for saving and loading from a model
        :return: python dictionary with specs to rebuild layer
        """
        config = super(TCN, self).get_config()
        config['nb_filters'] = self.nb_filters
        config['kernel_size'] = self.kernel_size
        config['nb_stacks'] = self.nb_stacks
        config['dilations'] = self.dilations
        config['padding'] = self.padding
        config['use_skip_connections'] = self.use_skip_connections
        config['dropout_rate'] = self.dropout_rate
        config['return_sequences'] = self.return_sequences
        config['activation'] = self.activation
        config['use_batch_norm'] = self.use_batch_norm
        config['kernel_initializer'] = self.kernel_initializer

        return config
Beispiel #3
0
class ClockworkRNN(Layer):
    """Clockwork RNN ([Koutnik et al., 2014](https://arxiv.org/abs/1402.3511)).

    Constructs a CW-RNN from RNNs of a given type.

    # Arguments
        periods: List of positive integers. The periods of each internal RNN. 
        units_per_period: Positive integer or list of positive integers.
            Number of units for each internal RNN. If list, it must have the
            same length as `periods`.
        input_shape: Shape of the input data.
        output_units: Positive integer. Dimensionality of the output space.
        output_activation: String or callable. Activation function to use. If
            you don't specify anything, no activation is applied (i.e.,
            "linear" activation: `a(x) = x`). 
        return_sequences: Boolean (default False). Whether to return the last
            output in the output sequence, or the full sequence.
        sort_ascending: Boolean (default False). Whether to sort the periods
            in ascending or descending order (default, as in the original
            paper).
        include_top: Whether to include the fully-connected layer at the top
            of the network.
        dense_kwargs: Dictionary. Optional arguments for the trailing Dense 
            unit (`activation` and `units` keys will be ignored).
        rnn_dtype: The type of RNN to use as clockwork layer. Can be a string
            ("SimpleRNN", "GRU", "LSTM", "CuDNNGRU", "CuDNNLSTM") or any RNN 
            subclass.
        rnn_kwargs: Dictionary. Optional arguments for the internal RNNs 
            (`return_sequences` and `return_state` will be ignored).
    
    """
    def __init__(self, periods, 
                 units_per_period, 
                 output_units,
                 output_activtion='linear',
                 return_sequences=False,
                 sort_ascending=False,
                 include_top=True,
                 dense_kwargs=None,
                 rnn_dtype="SimpleRNN",
                 rnn_kwargs=None,
                 **kwargs):
        if type(rnn_dtype) is str:
            self.rnn_dtype = getattr(layers, rnn_dtype) 
        else:
            self.rnn_dtype = rnn_dtype
        
        ClockworkRNN.__name__ = "Clockwork" + self.rnn_dtype.__name__
        super(ClockworkRNN, self).__init__(**kwargs)

        if type(units_per_period) is list:
            self.units_per_period = units_per_period
        else:
            self.units_per_period = [units_per_period] * len(periods)

        self.periods = periods
        self.rnn_kwargs = rnn_kwargs or {}
        self.rnn_kwargs['return_sequences'] = True
        self.rnn_kwargs['return_state'] = False
        self.rnn_kwargs.pop("units", True)
        self.dense_kwargs = dense_kwargs or {}
        self.dense_kwargs['activation'] = output_activtion
        self.dense_kwargs['units'] = output_units
        self.include_top = include_top
        self.return_sequences = return_sequences
        self.sort_ascending = sort_ascending
        self.blocks = []

    def build(self, input_shape):
        last_shape = input_shape
        output_shapes = []
        
        for period, units in sorted(zip(self.periods, self.units_per_period),
                                    reverse=not self.sort_ascending):
            block, output_shape, last_shape = self._build_clockwork_block(
                units, period, last_shape)
            output_shapes.append(output_shape)
            self.blocks.append(block)

        self.concat_all = Concatenate()
        self.concat_all.build(output_shapes)
        last_shape = self.concat_all.compute_output_shape(output_shapes)

        if not self.return_sequences:
            self.lambda_last = Lambda(lambda x: x[:, -1])
            self.lambda_last.build(last_shape)
            last_shape = self.lambda_last.compute_output_shape(last_shape)

        if self.include_top:
            if self.return_sequences:
                self.dense = TimeDistributed(Dense(**self.dense_kwargs))
            else:
                self.dense = Dense(**self.dense_kwargs)
                
            self.dense.build(last_shape)
            self._trainable_weights.extend(self.dense.trainable_weights)
            last_shape = self.dense.compute_output_shape(last_shape)
                
        super(ClockworkRNN, self).build(input_shape)

    def call(self, x):
        rnns = []
        to_next_block = x

        for block in self.blocks:
            to_dense, to_next_block = self._call_clockwork_block(
                to_next_block, *block)
            rnns.append(to_dense)

        out = self.concat_all(rnns)

        if not self.return_sequences:
            out = self.lambda_last(out)

        if self.include_top:
            out = self.dense(out)
    
        return out

    def compute_output_shape(self, input_shape):
        if self.include_top:
            out_dim = self.dense_kwargs['units']
        else:
            out_dim = np.sum(self.units_per_period)

        if self.return_sequences:
            return input_shape[:-1] + (out_dim,)
        else:
            return input_shape[:-2] + (out_dim,)

    def _delay(self, x):
        return K.temporal_padding(x, (1, 0))[:, :-1]
    
    def _crop(self, x, timesteps):
        return x[:, :K.cast(timesteps, "int32")]

    def _build_clockwork_block(self, units, period, input_shape):
        output_shape = input_shape[:-1] + (units,)
        pool = MaxPooling1D(1, period)
        rnn = self.rnn_dtype(units=units, **self.rnn_kwargs)
        unpool = UpSampling1D(period)
        crop = Lambda(lambda x: self._crop(x[0], x[1]), 
                      output_shape=output_shape[1:])
        delay = Lambda(lambda x: self._delay(x), 
                       output_shape=output_shape[1:])
        concat = Concatenate()
        
        block = (pool, rnn, unpool, crop, delay, concat)
        
        pool.build(input_shape)
        pool_output_shape = pool.compute_output_shape(input_shape)
        rnn.build(pool_output_shape)
        self._trainable_weights.extend(rnn.trainable_weights)
        rnn_output_shape = rnn.compute_output_shape(pool_output_shape)
        unpool.build(rnn_output_shape)
        crop.build([unpool.compute_output_shape(rnn_output_shape), ()])
        delay.build(output_shape)
        concat.build([input_shape, output_shape])

        return block, output_shape, \
            concat.compute_output_shape([input_shape, output_shape])

    def _call_clockwork_block(self, x, pool, rnn, unpool, crop, delay, concat):
        pooled = pool(x)
        rnn_out = rnn(pooled)
        unpooled = unpool(rnn_out)
        to_dense = crop([unpooled, K.shape(x)[1]])
        delayed = delay(to_dense)
        to_next_block = concat([x, delayed])

        return to_dense, to_next_block

    def get_config(self):
        config = super(ClockworkRNN, self).get_config()
        
        config['units_per_period'] = self.units_per_period
        config['periods'] = self.periods
        config['rnn_dtype'] = self.rnn_dtype.__name__
        config['rnn_kwargs'] = self.rnn_kwargs
        config['dense_kwargs'] = self.dense_kwargs
        config['include_top'] = self.include_top
        config['return_sequences'] = self.return_sequences
        config['sort_ascending'] = self.sort_ascending

        return config