예제 #1
0
    def __init__(
        self,
        input_shape,
        inner_dim,
        activation=torch.nn.Sigmoid,
        norm=BatchNorm1d,
    ):
        super().__init__()
        self.inner_dim = inner_dim
        self.norm = norm
        self.activation = activation

        bz, t, chn = input_shape
        self.conv = Sequential(input_shape=input_shape)
        self.conv.append(
            DepthwiseSeparableConv1d,
            out_channels=chn,
            kernel_size=1,
            stride=1,
        )
        self.conv.append(self.norm)
        self.conv.append(self.activation())

        self.avg_pool = AdaptivePool(1)
        self.bottleneck = Sequential(
            Linear(input_size=input_shape[-1], n_neurons=self.inner_dim),
            self.activation(),
            Linear(input_size=self.inner_dim, n_neurons=chn),
            self.activation(),
        )
예제 #2
0
    def __init__(
        self,
        vocab,
        d_model=512,
        nhead=8,
        num_encoder_layers=12,
        num_decoder_layers=0,
        d_ffn=2048,
        dropout=0.1,
        activation=nn.ReLU,
        positional_encoding="fixed_abs_sine",
        normalize_before=False,
        d_embedding=None,
        max_length=2500,
        causal=True,
        attention_type="regularMHA",
    ):
        super().__init__(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            d_ffn=d_ffn,
            dropout=dropout,
            activation=activation,
            positional_encoding=positional_encoding,
            normalize_before=normalize_before,
            max_length=max_length,
            causal=causal,
            attention_type=attention_type,
        )

        self.d_embedding = d_embedding
        if d_embedding is None:
            self.d_embedding = d_model

        self.custom_src_module = NormalizedEmbedding(self.d_embedding, vocab)

        self.embedding_proj = None
        if d_embedding is not None:
            self.embedding_proj = Linear(input_size=self.d_embedding,
                                         n_neurons=d_model)

        self.output_proj = ModuleList(
            Linear(input_size=d_model, n_neurons=d_model),
            LayerNorm(d_model, eps=1e-6),
            Linear(input_size=d_model, n_neurons=vocab),
        )

        self.num_encoder_layers = num_encoder_layers
        self.num_decoder_layers = num_decoder_layers

        # reset the params of the transformer model
        self._reset_params()
예제 #3
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=256,
                 dropout=0,
                 activation="relu"):

        from torch.nn.modules.activation import MultiheadAttention
        from torch.nn.modules.normalization import LayerNorm
        from torch.nn.modules.dropout import Dropout
        from torch.nn.modules.rnn import LSTM
        from torch.nn.modules.linear import Linear

        super(DPTNetBlock, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        # self.linear1 = Linear(d_model, dim_feedforward)
        self.rnn = LSTM(d_model, d_model * 2, 1, bidirectional=True)
        self.dropout = Dropout(dropout)
        # self.linear2 = Linear(dim_feedforward, d_model)
        self.linear2 = Linear(d_model * 2 * 2, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
예제 #4
0
    def __init__(
        self,
        d_model,
        output_size,
        output_activation=nn.ReLU,
        nhead=8,
        num_layers=8,
        d_ffn=512,
        dropout=0.1,
        activation=nn.LeakyReLU,
        causal=True,
        custom_emb_module=None,
        normalize_before=False,
    ):
        super().__init__(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=0,
            d_ffn=d_ffn,
            dropout=dropout,
            activation=activation,
            positional_encoding=None,
            normalize_before=normalize_before,
            causal=causal,
        )

        self.custom_emb_module = custom_emb_module
        self.output_layer = Linear(output_size, input_size=d_model, bias=False)
        self.output_activation = output_activation()
예제 #5
0
    def __init__(
        self,
        input_size,
        device="cpu",
        lin_blocks=0,
        lin_neurons=192,
        out_neurons=1211,
    ):

        super().__init__()
        self.blocks = nn.ModuleList()

        for block_index in range(lin_blocks):
            self.blocks.extend(
                [
                    _BatchNorm1d(input_size),
                    Linear(input_size=input_size, n_neurons=lin_neurons),
                ]
            )
            input_size = lin_neurons

        # Final Layer
        self.weight = nn.Parameter(
            torch.FloatTensor(out_neurons, input_size, device=device)
        )
        nn.init.xavier_uniform_(self.weight)
예제 #6
0
def test_linear():

    from speechbrain.nnet.linear import Linear

    inputs = torch.rand(1, 2, 4)
    lin_t = Linear(n_neurons=4, input_size=inputs.shape[-1], bias=False)
    lin_t.w.weight = torch.nn.Parameter(torch.eye(inputs.shape[-1]))
    outputs = lin_t(inputs)
    assert torch.all(torch.eq(inputs, outputs))
예제 #7
0
    def append(self, layer, *args, **kwargs):
        """Appends the specified module to the shortcut model.

        Arguments
        ---------
        layer : torch.nn.Module class
            This layer will get initialized with *args and **kwargs. Also,
            the argument ``input_shape`` will be passed if the layer takes it.
        *args, **kwargs
            Passed unchanged to the layer **EXCEPT** the kwarg ``end_of_block``
            which is used to indicate that the shorcut should be added in.
        """
        if self.new_block:
            self.blocks.append(Sequential(input_shape=self.block_input_shape))
            self.new_block = False

        end_of_block = False
        if "end_of_block" in kwargs:
            end_of_block = kwargs["end_of_block"]
            del kwargs["end_of_block"]

        self.blocks[-1].append(layer, *args, **kwargs)

        # When we reach the end of the block, prepare to add shortcut
        if end_of_block:

            # Use dummy input to find shape of next block
            dummy_input = torch.zeros(self.block_input_shape)
            dummy_output = self.blocks[-1](dummy_input)

            # Initialize projection if necessary
            if self.shortcut_projection:
                projection_size = functools.reduce(
                    operator.mul, dummy_output.shape[2:], 1
                )

                if self.shortcut_type == "residual":
                    shape = self.first_input_shape
                    dummy_input = torch.zeros(self.first_input_shape)
                else:
                    shape = self.block_input_shape

                self.projections.append(
                    Linear(
                        n_neurons=projection_size,
                        input_shape=shape,
                        bias=False,
                        combine_dims=True,
                    )
                )

            # Prepare for next block
            self.new_block = True
            dummy_output = self._combine(dummy_input, dummy_output, -1)
            self.block_input_shape = dummy_output.shape
예제 #8
0
    def __init__(
        self,
        tgt_vocab,
        input_size,
        d_model=512,
        nhead=8,
        num_encoder_layers=6,
        num_decoder_layers=6,
        d_ffn=2048,
        dropout=0.1,
        activation=nn.ReLU,
        positional_encoding="fixed_abs_sine",
        normalize_before=False,
        kernel_size: Optional[int] = 31,
        bias: Optional[bool] = True,
        encoder_module: Optional[str] = "transformer",
        conformer_activation: Optional[nn.Module] = Swish,
        attention_type: Optional[str] = "regularMHA",
        max_length: Optional[int] = 2500,
        causal: Optional[bool] = True,
    ):
        super().__init__(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            d_ffn=d_ffn,
            dropout=dropout,
            activation=activation,
            positional_encoding=positional_encoding,
            normalize_before=normalize_before,
            kernel_size=kernel_size,
            bias=bias,
            encoder_module=encoder_module,
            conformer_activation=conformer_activation,
            attention_type=attention_type,
            max_length=max_length,
            causal=causal,
        )

        self.custom_src_module = ModuleList(
            Linear(
                input_size=input_size,
                n_neurons=d_model,
                bias=True,
                combine_dims=False,
            ),
            torch.nn.Dropout(dropout),
        )
        self.custom_tgt_module = ModuleList(
            NormalizedEmbedding(d_model, tgt_vocab))

        # reset parameters using xavier_normal_
        self._init_params()
예제 #9
0
    def __init__(
        self,
        intra_mdl,
        inter_mdl,
        out_channels,
        norm="ln",
        skip_around_intra=True,
        linear_layer_after_inter_intra=True,
    ):
        super(Dual_Computation_Block, self).__init__()

        self.intra_mdl = intra_mdl
        self.inter_mdl = inter_mdl
        self.skip_around_intra = skip_around_intra
        self.linear_layer_after_inter_intra = linear_layer_after_inter_intra

        # Norm
        self.norm = norm
        if norm is not None:
            self.intra_norm = select_norm(norm, out_channels, 4)
            self.inter_norm = select_norm(norm, out_channels, 4)

        # Linear
        if linear_layer_after_inter_intra:
            if isinstance(intra_mdl, SBRNNBlock):
                self.intra_linear = Linear(
                    out_channels, input_size=2 * intra_mdl.mdl.rnn.hidden_size
                )
            else:
                self.intra_linear = Linear(
                    out_channels, input_size=out_channels
                )

            if isinstance(inter_mdl, SBRNNBlock):
                self.inter_linear = Linear(
                    out_channels, input_size=2 * intra_mdl.mdl.rnn.hidden_size
                )
            else:
                self.inter_linear = Linear(
                    out_channels, input_size=out_channels
                )
예제 #10
0
    def __init__(
        self,
        device="cpu",
        activation=torch.nn.LeakyReLU,
        tdnn_blocks=5,
        tdnn_channels=[512, 512, 512, 512, 1500],
        tdnn_kernel_sizes=[5, 3, 3, 1, 1],
        tdnn_dilations=[1, 2, 3, 1, 1],
        lin_neurons=512,
        in_channels=40,
    ):

        super().__init__()
        self.blocks = nn.ModuleList()

        # TDNN has convolutional layers with the given dilation factors
        # and kernel sizes. We here loop over all the convolutional layers
        # that we wanna add. Note that batch normalization is used after
        # the activations function in this case. This improves the
        # sound classification performance a bit.
        for block_index in range(tdnn_blocks):
            out_channels = tdnn_channels[block_index]
            self.blocks.extend([
                Conv1d(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=tdnn_kernel_sizes[block_index],
                    dilation=tdnn_dilations[block_index],
                ),
                activation(),
                BatchNorm1d(input_size=out_channels),
            ])
            in_channels = tdnn_channels[block_index]

        # Statistical pooling. It converts a tensor of variable length
        # into a fixed-length tensor. The statistical pooling returns the
        # mean and the standard deviation.
        self.blocks.append(StatisticsPooling())

        # Final linear transformation.
        self.blocks.append(
            Linear(
                input_size=out_channels * 2,  # mean + std,
                n_neurons=lin_neurons,
                bias=True,
                combine_dims=False,
            ))
예제 #11
0
    def __init__(
        self,
        device="cpu",
        activation=torch.nn.LeakyReLU,
        tdnn_blocks=5,
        tdnn_channels=[512, 512, 512, 512, 1500],
        tdnn_kernel_sizes=[5, 3, 3, 1, 1],
        tdnn_dilations=[1, 2, 3, 1, 1],
        lin_neurons=512,
        in_channels=40,
    ):

        super().__init__()
        self.blocks = nn.ModuleList()

        # TDNN layers
        for block_index in range(tdnn_blocks):
            out_channels = tdnn_channels[block_index]
            self.blocks.extend([
                Conv1d(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=tdnn_kernel_sizes[block_index],
                    dilation=tdnn_dilations[block_index],
                ),
                activation(),
                BatchNorm1d(input_size=out_channels),
            ])
            in_channels = tdnn_channels[block_index]

        # Statistical pooling
        self.blocks.append(StatisticsPooling())

        # Final linear transformation
        self.blocks.append(
            Linear(
                input_size=out_channels * 2,
                n_neurons=lin_neurons,
                bias=True,
                combine_dims=False,
            ))