Exemple #1
0
def test_activations(activation_tuple):
    torch_act, asteroid_act = activation_tuple
    torch_act = torch_act()
    asteroid_act = activations.get(asteroid_act)()

    inp = torch.randn(10, 11, 12)
    assert_allclose(torch_act(inp), asteroid_act(inp))
Exemple #2
0
    def __init__(
        self, kernel_size, stride, in_chan, n_src, bn_chan, chunk_size, hop_size, mask_act
    ):
        super(SingleDecoder, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.in_chan = in_chan
        self.bn_chan = bn_chan
        self.chunk_size = chunk_size
        self.hop_size = hop_size
        self.n_src = n_src
        self.mask_act = mask_act

        # Masking in 3D space
        net_out_conv = nn.Conv2d(bn_chan, n_src * bn_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(bn_chan, bn_chan, 1), nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(bn_chan, bn_chan, 1), nn.Sigmoid())
        self.mask_net = nn.Conv1d(bn_chan, in_chan, 1, bias=False)

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()

        _, self.trans_conv = make_enc_dec(
            "free", kernel_size=kernel_size, stride=stride, n_filters=in_chan
        )
Exemple #3
0
    def __init__(
        self,
        n_srcs,
        bn_chan=128,
        hid_size=128,
        chunk_size=100,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        mask_act="sigmoid",
        bidirectional=True,
        rnn_type="LSTM",
        num_layers=1,
        dropout=0,
        kernel_size=16,
        n_filters=64,
        stride=8,
        encoder_activation=None,
        use_mulcat=False,
        sample_rate=8000,
    ):
        super().__init__(sample_rate=sample_rate)
        self.encoder_activation = encoder_activation
        self.enc_activation = activations.get(encoder_activation or "linear")()
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.encoder, _ = make_enc_dec(
            "free",
            kernel_size=kernel_size,
            n_filters=n_filters,
            stride=stride,
        )
        # Update in_chan
        self.masker = DPRNN_MultiStage(
            in_chan=n_filters,
            bn_chan=bn_chan,
            hid_size=hid_size,
            chunk_size=chunk_size,
            hop_size=hop_size,
            n_repeats=n_repeats,
            norm_type=norm_type,
            bidirectional=bidirectional,
            rnn_type=rnn_type,
            use_mulcat=use_mulcat,
            num_layers=num_layers,
            dropout=dropout,
        )
        self.decoder_select = Decoder_Select(
            kernel_size=kernel_size,
            stride=stride,
            in_chan=n_filters,
            n_srcs=n_srcs,
            bn_chan=bn_chan,
            chunk_size=chunk_size,
            hop_size=hop_size,
            mask_act=mask_act,
        )

        """
    def __init__(
        self,
        embed_dim,
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",
        norm="gLN",
    ):
        super(PreLNTransformerLayer, self).__init__()

        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(embed_dim, dim_ff)
        self.linear2 = nn.Linear(dim_ff, embed_dim)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
Exemple #5
0
    def __init__(
        self,
        embed_dim,
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",
        bidirectional=True,
        norm="gLN",
    ):
        super(ImprovedTransformedLayer, self).__init__()

        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)
        self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional)
        self.dropout = nn.Dropout(dropout)
        ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff
        self.linear = nn.Linear(ff_inner_dim, embed_dim)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
Exemple #6
0
    def __init__(
        self,
        in_chan,
        n_src,
        n_heads=4,
        ff_hid=256,
        chunk_size=100,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",
        bidirectional=True,
        dropout=0,
    ):
        super(DPTransformer, self).__init__()
        self.in_chan = in_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        self.mha_in_dim = ceil(self.in_chan / self.n_heads) * self.n_heads
        if self.in_chan % self.n_heads != 0:
            warnings.warn(
                f"DPTransformer input dim ({self.in_chan}) is not a multiple of the number of "
                f"heads ({self.n_heads}). Adding extra linear layer at input to accomodate "
                f"(size [{self.in_chan} x {self.mha_in_dim}])")
            self.input_layer = nn.Linear(self.in_chan, self.mha_in_dim)
        else:
            self.input_layer = None

        self.in_norm = norms.get(norm_type)(self.mha_in_dim)
        self.ola = DualPathProcessing(self.chunk_size, self.hop_size)

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    ImprovedTransformedLayer(
                        self.mha_in_dim,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        True,
                        self.norm_type,
                    ),
                    ImprovedTransformedLayer(
                        self.mha_in_dim,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.bidirectional,
                        self.norm_type,
                    ),
                ]))
        net_out_conv = nn.Conv2d(self.mha_in_dim, n_src * self.in_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                      nn.Sigmoid())

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
Exemple #7
0
    def __init__(
        self,
        in_chan,
        n_src,
        n_heads=4,
        ff_hid=256,
        chunk_size=100,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",
        bidirectional=True,
        dropout=0,
    ):
        super(DPTransformer, self).__init__()
        self.in_chan = in_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        self.in_norm = norms.get(norm_type)(in_chan)

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    ImprovedTransformedLayer(
                        self.in_chan,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        True,
                        self.norm_type,
                    ),
                    ImprovedTransformedLayer(
                        self.in_chan,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.bidirectional,
                        self.norm_type,
                    ),
                ]))
        net_out_conv = nn.Conv2d(self.in_chan, n_src * self.in_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                      nn.Sigmoid())

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
Exemple #8
0
    def __init__(
        self,
        embed_dim,  # in_chan
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",  # ff activation
        bidirectional=True,
        norm="gLN",
        n_blocks=3,
        bn_chan=128,
        hid_chan=512,
        skip_chan=128,
        conv_kernel_size=3,
    ):
        super(DualTransformedLayer, self).__init__()

        # query,key,value dim
        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)

        # ------1------

        # self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional)
        # ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff
        # self.linear = nn.Linear(ff_inner_dim, embed_dim)

        # ------2------
        # input dim, hidden dim
        self.ff = nn.Sequential(
            norms.get(norm)(embed_dim),
            activations.get(activation)(),
            nn.Conv1d(embed_dim,
                      dim_ff,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False),
            norms.get(norm)(dim_ff),
            activations.get(activation)(),
            nn.Conv1d(dim_ff,
                      embed_dim,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False),
        )

        # # ------3------
        # self.skip_chan = skip_chan
        # self.ff = nn.ModuleList()
        # for x in range(n_blocks):
        #     padding = (conv_kernel_size - 1) * (2 ** x - 1) // 2
        #     self.ff.append(
        #         Conv1DBlock(
        #             bn_chan,
        #             hid_chan,
        #             skip_chan,
        #             conv_kernel_size,
        #             padding=padding,
        #             dilation=(2 ** x - 1),
        #             norm_type=norm,
        #         )
        #     )

        self.dropout = nn.Dropout(dropout)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
Exemple #9
0
    def __init__(
        self,
        in_chan,  # encoder out channel 64
        n_src,
        out_chan=None,
        bn_chan=64,
        n_heads=4,
        ff_hid=256,
        rnn_hid=128,
        rnn_layers=1,
        pe_conv_k=3,
        chunk_size=100,
        hop_size=None,  # 50
        n_repeats=6,  # 2
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",  # sigmoid
        bidirectional=True,
        dropout=0,
    ):
        super(DualTransformer, self).__init__()
        self.in_chan = in_chan
        out_chan = out_chan if out_chan is not None else in_chan
        self.out_chan = out_chan
        self.bn_chan = bn_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.rnn_hid = rnn_hid
        self.rnn_layers = rnn_layers
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        # mean, var for the whole sequence and channel, but gamma beta only for channel size
        # gln vs cln: on whole sequence or separately
        # self.in_norm = norms.get(norm_type)(in_chan)
        layer_norm = norms.get(norm_type)(in_chan)
        bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1)
        self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv)

        pe_conv_list = []
        for i in range(pe_conv_k):
            pe_conv_list.append(
                nn.Conv2d(bn_chan,
                          bn_chan,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False))
            pe_conv_list.append(norms.get(norm_type)(bn_chan))
            pe_conv_list.append(activations.get(ff_activation)())
        self.pe_conv = nn.Sequential(*pe_conv_list)
        d_model = self.bn_chan

        # # *2 for PE
        # self.pe = PositionalEmbedding(in_chan)
        # d_model = self.in_chan * 2

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    # ImprovedTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     True,
                    #     self.norm_type,
                    # ),
                    # ImprovedTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     self.bidirectional,
                    #     self.norm_type,
                    # ),
                    SingleRNNBlock(
                        in_chan=d_model,
                        hid_size=self.rnn_hid,
                        norm_type=self.norm_type,
                        bidirectional=self.bidirectional,
                        rnn_type='LSTM',
                        num_layers=1,
                        dropout=self.dropout,
                    ),

                    # DualTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     self.norm_type,
                    # ),
                    AcousticTransformerLayer(
                        d_model,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.norm_type,
                    ),
                ]))
            # self.layers.append(
            #     nn.ModuleList(
            #         [
            #             DualTransformedLayer(
            #                 d_model,
            #                 self.n_heads,
            #                 self.ff_hid,
            #                 self.dropout,
            #                 self.ff_activation,
            #                 self.norm_type,
            #             ),
            #             DualTransformedLayer(
            #                 d_model,
            #                 self.n_heads,
            #                 self.ff_hid,
            #                 self.dropout,
            #                 self.ff_activation,
            #                 self.norm_type,
            #             ),
            #         ]
            #     )
            # )
        # 1x1 conv
        # *2 for PE
        self.strnn_norm_out = norms.get(norm_type)(self.bn_chan)
        net_out_conv = nn.Conv2d(d_model, n_src * self.bn_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1),
                                      nn.Sigmoid())
        self.mask_net = nn.Conv1d(bn_chan, out_chan, 1, bias=False)

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
Exemple #10
0
    def __init__(
            self,
            embed_dim,  # in_chan
            n_heads,
            dim_ff,
            dropout=0.0,
            activation="relu",  # ff activation
            bidirectional=True,
            norm="gLN",
            n_blocks=3,
            bn_chan=128,
            hid_chan=512,
            skip_chan=128,
            conv_kernel_size=3,
            use_sdu=False,
            use_mem=False,
            num_mem_token=2):
        super(AcousticTransformerLayer, self).__init__()

        self.use_sdu = use_sdu
        self.use_mem = use_mem
        self.num_mem_token = num_mem_token

        if use_mem:
            w = torch.empty(num_mem_token, embed_dim)
            nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))
            self.mem = nn.Parameter(w, requires_grad=True).unsqueeze(0)

        # query,key,value dim
        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)

        # input dim, hidden dim
        # self.ff1 = nn.Sequential(
        #     norms.get(norm)(embed_dim),
        #     nn.Conv1d(embed_dim, dim_ff, kernel_size=1, stride=1, padding=0, bias=True),
        #     activations.get(activation)(),
        # )
        self.ff1 = nn.Conv1d(embed_dim,
                             dim_ff,
                             kernel_size=1,
                             stride=1,
                             padding=0,
                             bias=True)
        self.ff2 = nn.Conv1d(dim_ff,
                             embed_dim,
                             kernel_size=1,
                             stride=1,
                             padding=0,
                             bias=True)

        self.dropout = nn.Dropout(dropout)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
        self.norm_out = norms.get(norm)(embed_dim)

        if use_sdu:
            self.mha_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                         nn.Tanh())
            self.mha_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                          nn.Sigmoid())
            self.ff_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                        nn.Tanh())
            self.ff_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                         nn.Sigmoid())
Exemple #11
0
def test_get_none():
    assert activations.get(None) is None
Exemple #12
0
def test_get_errors(wrong):
    with pytest.raises(ValueError):
        # Should raise for anything not a Optimizer instance + unknown string
        activations.get(wrong)
Exemple #13
0
def test_softmax():
    torch_softmax = nn.Softmax(dim=-1)
    asteroid_softmax = activations.get("softmax")(dim=-1)
    inp = torch.randn(10, 11, 12)
    assert_allclose(torch_softmax(inp), asteroid_softmax(inp))
    assert torch_softmax == activations.get(torch_softmax)