예제 #1
0
def test_norms(norm_str, channel_size):
    norm_layer = norms.get(norm_str)
    # Use get on the class
    out_from_get = norms.get(norm_layer)
    assert out_from_get == norm_layer
    # Use get on the instance
    norm_layer = norm_layer(channel_size)
    out_from_get = norms.get(norm_layer)
    assert out_from_get == norm_layer

    # Test forward
    inp = torch.randn(4, channel_size, 12)
    _ = norm_layer(inp)
예제 #2
0
    def __init__(self, in_chan, n_src, out_chan=None, n_blocks=8, n_repeats=3,
                 bn_chan=128, hid_chan=512,  kernel_size=3,
                 norm_type="gLN"):
        super(TCN, self).__init__()
        self.in_chan = in_chan
        self.n_src = n_src
        out_chan = out_chan if out_chan else in_chan
        self.out_chan = out_chan
        self.n_blocks = n_blocks
        self.n_repeats = n_repeats
        self.bn_chan = bn_chan
        self.hid_chan = hid_chan
        self.kernel_size = kernel_size
        self.norm_type = norm_type

        layer_norm = norms.get(norm_type)(in_chan)
        bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1)
        self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv)
        # Succession of Conv1DBlock with exponentially increasing dilation.
        self.TCN = nn.ModuleList()
        for r in range(n_repeats):
            for x in range(n_blocks):
                padding = (kernel_size - 1) * 2**x // 2
                self.TCN.append(Conv1DBlock(bn_chan, hid_chan,
                                            kernel_size, padding=padding,
                                            dilation=2**x, norm_type=norm_type))
        out_conv = nn.Conv1d(bn_chan, n_src*out_chan, 1)
        self.out = nn.Sequential(nn.PReLU(), out_conv)
예제 #3
0
파일: TCN.py 프로젝트: popcornell/MicRank
    def __init__(self, in_chan=256, out_chan_tcn=1, n_blocks=5, n_repeats=3,
                 bn_chan=64, hid_chan=128,  kernel_size=3,
                 norm_type="gLN",
                 chunk=200,
                 stride=40,
                 ):

        super(TCN, self).__init__()
        self.in_chan = in_chan
        self.out_chan_tcn = out_chan_tcn
        self.n_blocks = n_blocks
        self.n_repeats = n_repeats
        self.bn_chan = bn_chan
        self.hid_chan = hid_chan
        self.kernel_size = kernel_size
        self.norm_type = norm_type
        self.chunk = chunk
        self.stride = stride

        self.in_norm = norms.get(norm_type)(in_chan)
        self.bottleneck = nn.Sequential(nn.Conv1d(in_chan, bn_chan, 1))
        # Succession of Conv1DBlock with exponentially increasing dilation.
        self.TCN = nn.ModuleList()
        for r in range(n_repeats):
            res_blocks = nn.ModuleList()
            for x in range(n_blocks):
                padding = (kernel_size - 1) * 2**x // 2
                res_blocks.append(Conv1DBlock(bn_chan, hid_chan,
                                            kernel_size, padding=padding,
                                            dilation=2**x, norm_type=norm_type))

            self.TCN.append(res_blocks)

        self.out = nn.Sequential(nn.PReLU(), nn.Conv1d(bn_chan, 1, 1))
예제 #4
0
    def __init__(
        self,
        embed_dim,
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",
        norm="gLN",
    ):
        super(PreLNTransformerLayer, self).__init__()

        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(embed_dim, dim_ff)
        self.linear2 = nn.Linear(dim_ff, embed_dim)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
예제 #5
0
    def __init__(self, in_chan, hid_chan, kernel_size, padding,
                 dilation, norm_type="bN", delta=False):
        super(Conv1DBlock, self).__init__()
        conv_norm = norms.get(norm_type)
        self.delta = delta
        if delta:
            self.linear = nn.Linear(in_chan, in_chan)
            self.linear_norm = norms.get(norm_type)(in_chan*2)

        in_bottle = in_chan if not delta else in_chan*2
        in_conv1d = nn.Conv1d(in_bottle, hid_chan, 1)
        depth_conv1d = nn.Conv1d(hid_chan, hid_chan, kernel_size,
                                 padding=padding, dilation=dilation,
                                 groups=hid_chan)
        self.shared_block = nn.Sequential(in_conv1d, nn.PReLU(),
                                          conv_norm(hid_chan), depth_conv1d,
                                          nn.PReLU(), conv_norm(hid_chan))
        self.res_conv = nn.Conv1d(hid_chan, in_chan, 1)
예제 #6
0
    def __init__(
        self,
        embed_dim,
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",
        bidirectional=True,
        norm="gLN",
    ):
        super(ImprovedTransformedLayer, self).__init__()

        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)
        self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional)
        self.dropout = nn.Dropout(dropout)
        ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff
        self.linear = nn.Linear(ff_inner_dim, embed_dim)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
예제 #7
0
def test_register():
    class Custom(nn.Module):
        def __init__(self):
            super().__init__()

    norms.register_norm(Custom)
    cls = norms.get("Custom")
    assert cls == Custom

    with pytest.raises(ValueError):
        norms.register_norm(norms.CumLN)
예제 #8
0
    def __init__(
        self,
        in_chan,
        bn_chan,
        hid_size,
        chunk_size,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        bidirectional=True,
        rnn_type="LSTM",
        use_mulcat=True,
        num_layers=1,
        dropout=0,
    ):
        super(DPRNN_Multistage, self).__init__()
        self.in_chan = in_chan
        self.bn_chan = bn_chan
        self.hid_size = hid_size
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.norm_type = norm_type
        self.bidirectional = bidirectional
        self.rnn_type = rnn_type
        self.num_layers = num_layers
        self.dropout = dropout
        self.use_mulcat = use_mulcat
        self.num_layers = num_layers

        layer_norm = norms.get(norm_type)(in_chan)
        bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1)
        self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv)

        # Succession of DPRNNBlocks.
        self.net = nn.ModuleList([])
        for i in range(self.n_repeats):
            self.net.append(
                DPRNNBlock(
                    bn_chan,
                    hid_size,
                    norm_type=norm_type,
                    bidirectional=bidirectional,
                    rnn_type=rnn_type,
                    use_mulcat=use_mulcat,
                    num_layers=num_layers,
                    dropout=dropout,
                ))
예제 #9
0
    def __init__(self,
                 input_size,
                 n_outs=5,
                 hidden_sizes=(512, 1024, 512, 256),
                 bidirectional=False):
        super(LSTMDense, self).__init__()
        self.norm = norms.get("gLN")(input_size)
        self.lstm = nn.LSTM(input_size,
                            hidden_sizes[0],
                            bidirectional=bidirectional)

        out_feats = hidden_sizes[
            0] if bidirectional == False else hidden_sizes[0] * 2

        self.denses = nn.Sequential(
            nn.Linear(out_feats, hidden_sizes[1]), nn.ReLU(),
            nn.Linear(hidden_sizes[1], hidden_sizes[2]), nn.ReLU(),
            nn.Linear(hidden_sizes[2], hidden_sizes[3]), nn.ReLU())

        self.out = nn.Sequential(nn.Linear(hidden_sizes[-1], n_outs),
                                 nn.Softmax())
예제 #10
0
파일: crnn.py 프로젝트: popcornell/OSDC
    def __init__(self,
                 in_size,
                 n_outs=3,
                 repeats=2,
                 blocks=2,
                 channels=(64, 32, 128, 64),
                 hidden_size=40,
                 bidirectional=False,
                 ksz=(3, 3),
                 dropout=0,
                 activation=nn.ReLU()):
        super(C2DRNN, self).__init__()
        self.in_size = in_size

        assert len(channels) == repeats * blocks
        self.norm = norms.get("gLN")(in_size)
        net = []
        for i in range(repeats):
            for j in range(blocks):

                if i == 0 and j == 0:
                    conv_in = 1
                else:
                    conv_in = channels[i * 2 + j - 1]

                net.extend([
                    nn.Conv2d(conv_in, channels[i * 2 + j], ksz, 1), activation
                ])

            net.append(nn.MaxPool2d(ksz))

        net.append(nn.Dropout(dropout))

        self.feats = nn.Sequential(*net)
        self.lstm = nn.LSTM(64 * 5, hidden_size, bidirectional=bidirectional)
        feats_in = hidden_size if not bidirectional else hidden_size * 2
        self.max1d = nn.MaxPool1d(2, stride=2)
        self.out = nn.Sequential(nn.Linear(1040, n_outs), nn.Softmax(-1))
예제 #11
0
    def __init__(
        self,
        in_chan,
        n_src,
        n_heads=4,
        ff_hid=256,
        chunk_size=100,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",
        bidirectional=True,
        dropout=0,
    ):
        super(DPTransformer, self).__init__()
        self.in_chan = in_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        self.in_norm = norms.get(norm_type)(in_chan)

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    ImprovedTransformedLayer(
                        self.in_chan,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        True,
                        self.norm_type,
                    ),
                    ImprovedTransformedLayer(
                        self.in_chan,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.bidirectional,
                        self.norm_type,
                    ),
                ]))
        net_out_conv = nn.Conv2d(self.in_chan, n_src * self.in_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                      nn.Sigmoid())

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
예제 #12
0
def test_get_none():
    assert norms.get(None) is None
예제 #13
0
def test_get_errors(wrong):
    with pytest.raises(ValueError):
        # Should raise for anything not a Optimizer instance + unknown string
        norms.get(wrong)
예제 #14
0
    def __init__(
        self,
        in_chan,
        n_src,
        n_heads=4,
        ff_hid=256,
        chunk_size=100,
        hop_size=None,
        n_repeats=6,
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",
        bidirectional=True,
        dropout=0,
    ):
        super(DPTransformer, self).__init__()
        self.in_chan = in_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        self.mha_in_dim = ceil(self.in_chan / self.n_heads) * self.n_heads
        if self.in_chan % self.n_heads != 0:
            warnings.warn(
                f"DPTransformer input dim ({self.in_chan}) is not a multiple of the number of "
                f"heads ({self.n_heads}). Adding extra linear layer at input to accomodate "
                f"(size [{self.in_chan} x {self.mha_in_dim}])")
            self.input_layer = nn.Linear(self.in_chan, self.mha_in_dim)
        else:
            self.input_layer = None

        self.in_norm = norms.get(norm_type)(self.mha_in_dim)
        self.ola = DualPathProcessing(self.chunk_size, self.hop_size)

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    ImprovedTransformedLayer(
                        self.mha_in_dim,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        True,
                        self.norm_type,
                    ),
                    ImprovedTransformedLayer(
                        self.mha_in_dim,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.bidirectional,
                        self.norm_type,
                    ),
                ]))
        net_out_conv = nn.Conv2d(self.mha_in_dim, n_src * self.in_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1),
                                      nn.Sigmoid())

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
예제 #15
0
    def __init__(
            self,
            embed_dim,  # in_chan
            n_heads,
            dim_ff,
            dropout=0.0,
            activation="relu",  # ff activation
            bidirectional=True,
            norm="gLN",
            n_blocks=3,
            bn_chan=128,
            hid_chan=512,
            skip_chan=128,
            conv_kernel_size=3,
            use_sdu=False,
            use_mem=False,
            num_mem_token=2):
        super(AcousticTransformerLayer, self).__init__()

        self.use_sdu = use_sdu
        self.use_mem = use_mem
        self.num_mem_token = num_mem_token

        if use_mem:
            w = torch.empty(num_mem_token, embed_dim)
            nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))
            self.mem = nn.Parameter(w, requires_grad=True).unsqueeze(0)

        # query,key,value dim
        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)

        # input dim, hidden dim
        # self.ff1 = nn.Sequential(
        #     norms.get(norm)(embed_dim),
        #     nn.Conv1d(embed_dim, dim_ff, kernel_size=1, stride=1, padding=0, bias=True),
        #     activations.get(activation)(),
        # )
        self.ff1 = nn.Conv1d(embed_dim,
                             dim_ff,
                             kernel_size=1,
                             stride=1,
                             padding=0,
                             bias=True)
        self.ff2 = nn.Conv1d(dim_ff,
                             embed_dim,
                             kernel_size=1,
                             stride=1,
                             padding=0,
                             bias=True)

        self.dropout = nn.Dropout(dropout)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)
        self.norm_out = norms.get(norm)(embed_dim)

        if use_sdu:
            self.mha_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                         nn.Tanh())
            self.mha_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                          nn.Sigmoid())
            self.ff_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                        nn.Tanh())
            self.ff_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1),
                                         nn.Sigmoid())
예제 #16
0
    def __init__(
        self,
        in_chan,  # encoder out channel 64
        n_src,
        out_chan=None,
        bn_chan=64,
        n_heads=4,
        ff_hid=256,
        rnn_hid=128,
        rnn_layers=1,
        pe_conv_k=3,
        chunk_size=100,
        hop_size=None,  # 50
        n_repeats=6,  # 2
        norm_type="gLN",
        ff_activation="relu",
        mask_act="relu",  # sigmoid
        bidirectional=True,
        dropout=0,
    ):
        super(DualTransformer, self).__init__()
        self.in_chan = in_chan
        out_chan = out_chan if out_chan is not None else in_chan
        self.out_chan = out_chan
        self.bn_chan = bn_chan
        self.n_src = n_src
        self.n_heads = n_heads
        self.ff_hid = ff_hid
        self.rnn_hid = rnn_hid
        self.rnn_layers = rnn_layers
        self.chunk_size = chunk_size
        hop_size = hop_size if hop_size is not None else chunk_size // 2
        self.hop_size = hop_size
        self.n_repeats = n_repeats
        self.n_src = n_src
        self.norm_type = norm_type
        self.ff_activation = ff_activation
        self.mask_act = mask_act
        self.bidirectional = bidirectional
        self.dropout = dropout

        # mean, var for the whole sequence and channel, but gamma beta only for channel size
        # gln vs cln: on whole sequence or separately
        # self.in_norm = norms.get(norm_type)(in_chan)
        layer_norm = norms.get(norm_type)(in_chan)
        bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1)
        self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv)

        pe_conv_list = []
        for i in range(pe_conv_k):
            pe_conv_list.append(
                nn.Conv2d(bn_chan,
                          bn_chan,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False))
            pe_conv_list.append(norms.get(norm_type)(bn_chan))
            pe_conv_list.append(activations.get(ff_activation)())
        self.pe_conv = nn.Sequential(*pe_conv_list)
        d_model = self.bn_chan

        # # *2 for PE
        # self.pe = PositionalEmbedding(in_chan)
        # d_model = self.in_chan * 2

        # Succession of DPRNNBlocks.
        self.layers = nn.ModuleList([])
        for x in range(self.n_repeats):
            self.layers.append(
                nn.ModuleList([
                    # ImprovedTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     True,
                    #     self.norm_type,
                    # ),
                    # ImprovedTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     self.bidirectional,
                    #     self.norm_type,
                    # ),
                    SingleRNNBlock(
                        in_chan=d_model,
                        hid_size=self.rnn_hid,
                        norm_type=self.norm_type,
                        bidirectional=self.bidirectional,
                        rnn_type='LSTM',
                        num_layers=1,
                        dropout=self.dropout,
                    ),

                    # DualTransformedLayer(
                    #     d_model,
                    #     self.n_heads,
                    #     self.ff_hid,
                    #     self.dropout,
                    #     self.ff_activation,
                    #     self.norm_type,
                    # ),
                    AcousticTransformerLayer(
                        d_model,
                        self.n_heads,
                        self.ff_hid,
                        self.dropout,
                        self.ff_activation,
                        self.norm_type,
                    ),
                ]))
            # self.layers.append(
            #     nn.ModuleList(
            #         [
            #             DualTransformedLayer(
            #                 d_model,
            #                 self.n_heads,
            #                 self.ff_hid,
            #                 self.dropout,
            #                 self.ff_activation,
            #                 self.norm_type,
            #             ),
            #             DualTransformedLayer(
            #                 d_model,
            #                 self.n_heads,
            #                 self.ff_hid,
            #                 self.dropout,
            #                 self.ff_activation,
            #                 self.norm_type,
            #             ),
            #         ]
            #     )
            # )
        # 1x1 conv
        # *2 for PE
        self.strnn_norm_out = norms.get(norm_type)(self.bn_chan)
        net_out_conv = nn.Conv2d(d_model, n_src * self.bn_chan, 1)
        self.first_out = nn.Sequential(nn.PReLU(), net_out_conv)
        # Gating and masking in 2D space (after fold)
        self.net_out = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1),
                                     nn.Tanh())
        self.net_gate = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1),
                                      nn.Sigmoid())
        self.mask_net = nn.Conv1d(bn_chan, out_chan, 1, bias=False)

        # Get activation function.
        mask_nl_class = activations.get(mask_act)
        # For softmax, feed the source dimension.
        if has_arg(mask_nl_class, "dim"):
            self.output_act = mask_nl_class(dim=1)
        else:
            self.output_act = mask_nl_class()
예제 #17
0
    def __init__(
        self,
        embed_dim,  # in_chan
        n_heads,
        dim_ff,
        dropout=0.0,
        activation="relu",  # ff activation
        bidirectional=True,
        norm="gLN",
        n_blocks=3,
        bn_chan=128,
        hid_chan=512,
        skip_chan=128,
        conv_kernel_size=3,
    ):
        super(DualTransformedLayer, self).__init__()

        # query,key,value dim
        self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout)

        # ------1------

        # self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional)
        # ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff
        # self.linear = nn.Linear(ff_inner_dim, embed_dim)

        # ------2------
        # input dim, hidden dim
        self.ff = nn.Sequential(
            norms.get(norm)(embed_dim),
            activations.get(activation)(),
            nn.Conv1d(embed_dim,
                      dim_ff,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False),
            norms.get(norm)(dim_ff),
            activations.get(activation)(),
            nn.Conv1d(dim_ff,
                      embed_dim,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False),
        )

        # # ------3------
        # self.skip_chan = skip_chan
        # self.ff = nn.ModuleList()
        # for x in range(n_blocks):
        #     padding = (conv_kernel_size - 1) * (2 ** x - 1) // 2
        #     self.ff.append(
        #         Conv1DBlock(
        #             bn_chan,
        #             hid_chan,
        #             skip_chan,
        #             conv_kernel_size,
        #             padding=padding,
        #             dilation=(2 ** x - 1),
        #             norm_type=norm,
        #         )
        #     )

        self.dropout = nn.Dropout(dropout)
        self.activation = activations.get(activation)()
        self.norm_mha = norms.get(norm)(embed_dim)
        self.norm_ff = norms.get(norm)(embed_dim)