def __init__(self,
                 rnn_layers=2,
                 rnn_units=128,
                 win_len=400,
                 win_inc=100,
                 fft_len=512,
                 win_type='hanning',
                 mode='TCS'):
        super(Net, self).__init__()

        # for fft
        self.win_len = win_len
        self.win_inc = win_inc
        self.fft_len = fft_len
        self.win_type = win_type

        input_dim = win_len
        output_dim = win_len

        self.rnn_units = rnn_units
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = rnn_layers
        fix = True
        self.fix = fix
        self.stft = ConvSTFT(self.win_len,
                             self.win_inc,
                             fft_len,
                             self.win_type,
                             'complex',
                             fix=fix)
        self.istft = ConviSTFT(self.win_len,
                               self.win_inc,
                               fft_len,
                               self.win_type,
                               'complex',
                               fix=fix)

        in_dim = self.fft_len // 2
        out_dim = self.fft_len // 2

        self.clp = ComplexLinearProjection(in_dim)
        self.bn = nn.BatchNorm1d(in_dim, affine=False)

        self.fsmn1 = DFSMN(in_dim, rnn_units, out_dim, 3, 3, 1, 1)
        self.fsmn2 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn3 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn4 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn5 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn6 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn7 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn8 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn9 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn10 = DFSMN(in_dim, rnn_units, out_dim * 2, 3, 3, 0, 1)

        self.df = DeepFilter(1, 2)
        show_params(self)
    def __init__(self,
                 spk_layers=14,
                 sep_stack_size=10,
                 sep_stack_num=4,
                 latent_dim=512,
                 numspks=2,
                 overallspks=3,
                 with_filter=True):
        super(WaveSplit, self).__init__()
        self.with_filter = with_filter
        if with_filter:
            self.encoder = nn.Conv1d(
                1,
                latent_dim,
                kernel_size=16,
                stride=8,
                bias=False,
            )
            nn.init.normal_(self.encoder.weight)
            self.decoder = nn.ConvTranspose1d(
                latent_dim,
                1,
                kernel_size=16,
                stride=8,
                bias=False,
            )
            nn.init.normal_(self.decoder.weight)

            self.speaker = SpeakerStack(in_dim=latent_dim,
                                        out_dim=latent_dim,
                                        num_layers=spk_layers,
                                        numspks=numspks,
                                        latent_dim=latent_dim)
            self.separation = SeparationStack(in_dim=latent_dim,
                                              out_dim=latent_dim,
                                              num_stack=sep_stack_num,
                                              stack_size=sep_stack_size,
                                              latent_dim=latent_dim)
        else:
            self.speaker = SpeakerStack(in_dim=1,
                                        out_dim=latent_dim,
                                        num_layers=spk_layers,
                                        numspks=numspks,
                                        latent_dim=latent_dim)
            self.separation = SeparationStack(in_dim=1,
                                              out_dim=1,
                                              num_stack=sep_stack_num,
                                              stack_size=sep_stack_size,
                                              latent_dim=latent_dim)

        self.kmean = KMeans(1, numspks, latent_dim, iter_nums=80)
        self.numspks = numspks
        self.loss_func = SpeakerLoss(latent_dim,
                                     numspks=numspks,
                                     overallspks=overallspks)
        self.latent_dim = latent_dim
        show_params(self)
Ejemplo n.º 3
0
    def __init__(self,
                 rnn_layers=2,
                 rnn_units=128,
                 win_len=400,
                 win_inc=100,
                 fft_len=512,
                 win_type='hanning',
                 mode='TCS'):
        super(Net, self).__init__()

        # for fft
        self.win_len = win_len
        self.win_inc = win_inc
        self.fft_len = fft_len
        self.win_type = win_type

        input_dim = win_len
        output_dim = win_len

        self.rnn_units = rnn_units
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = rnn_layers
        fix = True
        self.fix = fix
        self.stft = ConvSTFT(self.win_len,
                             self.win_inc,
                             fft_len,
                             self.win_type,
                             'real',
                             fix=fix)
        self.istft = ConviSTFT(self.win_len,
                               self.win_inc,
                               fft_len,
                               self.win_type,
                               'real',
                               fix=fix)

        in_dim = self.fft_len // 2
        out_dim = self.fft_len // 2

        self.fsmn1 = DFSMN(in_dim, rnn_units, out_dim, 3, 3, 1, 1)
        self.fsmn2 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn3 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn4 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn5 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn6 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 1, 1)
        self.fsmn7 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn8 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn9 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn10 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn11 = DFSMN(in_dim, rnn_units, out_dim, 7, 3, 0, 1)
        self.fsmn12 = DFSMN(in_dim, rnn_units, out_dim, 3, 3, 0, 1)

        show_params(self)
Ejemplo n.º 4
0
    def __init__(self,
                 input_dim=257,
                 output_dim=257,
                 hidden_layers=2,
                 hidden_units=512,
                 left_context=1,
                 right_context=1,
                 kernel_size=6,
                 kernel_num=9,
                 dropout=0.2):
        super(CLDNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = hidden_layers
        self.hidden_units = hidden_units
        self.left_context = left_context
        self.right_context = right_context
        self.kernel_size = kernel_size
        self.kernel_sum = kernel_num
        super(CLDNN, self).__init__()

        self.input_layer = nn.Sequential(
            nn.Linear((left_context + 1 + right_context) * input_dim,
                      hidden_units), nn.Tanh())

        self.rnn_layer = nn.GRU(
            input_size=hidden_units,
            hidden_size=hidden_units,
            num_layers=hidden_layers,
            dropout=dropout,
        )

        self.conv2d_layer = nn.Sequential(
            Conv2d(in_channels=1,
                   out_channels=kernel_num,
                   kernel_size=(kernel_size, kernel_size)), nn.Tanh(),
            nn.MaxPool2d(3, stride=1, padding=(1, 1)))

        self.output_layer = nn.Sequential(
            nn.Linear(hidden_units * kernel_num,
                      (left_context + 1 + right_context) * self.output_dim),
            nn.Sigmoid())

        self.loss_func = nn.MSELoss(reduction='sum')
        #self.loss_func = nn.MSELoss()
        show_model(self)
        show_params(self)
Ejemplo n.º 5
0
    def __init__(
                    self, 
                    rnn_layers=2,
                    rnn_units=128,
                    win_len=400,
                    win_inc=100, 
                    fft_len=512,
                    win_type='hanning',
                    masking_mode='E',
                    use_clstm=False,
                    use_cbn = False,
                    kernel_size=5,
                    kernel_num=[16,32,64,128,256,256]
                ):
        ''' 
            
            rnn_layers: the number of lstm layers in the crn,
            rnn_units: for clstm, rnn_units = real+imag

        '''

        super(DCCRN, self).__init__()

        # for fft 
        self.win_len = win_len
        self.win_inc = win_inc
        self.fft_len = fft_len
        self.win_type = win_type 

        input_dim = win_len
        output_dim = win_len
        
        self.rnn_units = rnn_units
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = rnn_layers
        self.kernel_size = kernel_size
        #self.kernel_num = [2, 8, 16, 32, 128, 128, 128]
        #self.kernel_num = [2, 16, 32, 64, 128, 256, 256]
        self.kernel_num = [2]+kernel_num 
        self.masking_mode = masking_mode
        self.use_clstm = use_clstm
        
        #bidirectional=True
        bidirectional=False
        fac = 2 if bidirectional else 1 


        fix=True
        self.fix = fix
        self.stft = ConvSTFT(self.win_len, self.win_inc, fft_len, self.win_type, 'complex', fix=fix)
        self.istft = ConviSTFT(self.win_len, self.win_inc, fft_len, self.win_type, 'complex', fix=fix)
        
        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()
        for idx in range(len(self.kernel_num)-1):
            self.encoder.append(
                nn.Sequential(
                    #nn.ConstantPad2d([0, 0, 0, 0], 0),
                    ComplexConv2d(
                        self.kernel_num[idx],
                        self.kernel_num[idx+1],
                        kernel_size=(self.kernel_size, 2),
                        stride=(2, 1),
                        padding=(2, 1)
                    ),
                    nn.BatchNorm2d(self.kernel_num[idx+1]) if not use_cbn else ComplexBatchNorm(self.kernel_num[idx+1]),
                    nn.PReLU()
                )
            )
        hidden_dim = self.fft_len//(2**(len(self.kernel_num))) 

        if self.use_clstm: 
            rnns = []
            for idx in range(rnn_layers):
                rnns.append(
                        NavieComplexLSTM(
                        input_size= hidden_dim*self.kernel_num[-1] if idx == 0 else self.rnn_units,
                        hidden_size=self.rnn_units,
                        bidirectional=bidirectional,
                        batch_first=False,
                        projection_dim= hidden_dim*self.kernel_num[-1] if idx == rnn_layers-1 else None,
                        )
                    )
                self.enhance = nn.Sequential(*rnns)
        else:
            self.enhance = nn.LSTM(
                    input_size= hidden_dim*self.kernel_num[-1],
                    hidden_size=self.rnn_units,
                    num_layers=2,
                    dropout=0.0,
                    bidirectional=bidirectional,
                    batch_first=False
            )
            self.tranform = nn.Linear(self.rnn_units * fac, hidden_dim*self.kernel_num[-1])

        for idx in range(len(self.kernel_num)-1, 0, -1):
            if idx != 1:
                self.decoder.append(
                    nn.Sequential(
                        ComplexConvTranspose2d(
                        self.kernel_num[idx]*2,
                        self.kernel_num[idx-1],
                        kernel_size =(self.kernel_size, 2),
                        stride=(2, 1),
                        padding=(2,0),
                        output_padding=(1,0)
                    ),
                    nn.BatchNorm2d(self.kernel_num[idx-1]) if not use_cbn else ComplexBatchNorm(self.kernel_num[idx-1]),
                    #nn.ELU()
                    nn.PReLU()
                    )
                )
            else:
                self.decoder.append(
                    nn.Sequential(
                        ComplexConvTranspose2d(
                        self.kernel_num[idx]*2,
                        self.kernel_num[idx-1],
                        kernel_size =(self.kernel_size, 2),
                        stride=(2, 1),
                        padding=(2,0),
                        output_padding=(1,0)
                    ),
                    )
                )
        
        show_model(self)
        show_params(self)
        self.flatten_parameters() 
Ejemplo n.º 6
0
    def __init__(self,
                 win_len=400,
                 win_inc=100,
                 fft_len=512,
                 win_type='hanning',
                 num_blocks=3,
                 channel_amp=9,
                 channel_phase=8,
                 rnn_nums=300):
        super(PHASEN, self).__init__()
        self.num_blocks = 3
        self.feat_dim = fft_len // 2 + 1

        self.win_len = win_len
        self.win_inc = win_inc
        self.fft_len = fft_len
        self.win_type = win_type

        fix = True
        self.stft = ConvSTFT(self.win_len,
                             self.win_inc,
                             self.fft_len,
                             self.win_type,
                             feature_type='complex',
                             fix=fix)
        self.istft = ConviSTFT(self.win_len,
                               self.win_inc,
                               self.fft_len,
                               self.win_type,
                               feature_type='complex',
                               fix=fix)

        self.amp_conv1 = nn.Sequential(
            nn.Conv2d(2, channel_amp, kernel_size=[7, 1], padding=(3, 0)),
            nn.BatchNorm2d(channel_amp),
            nn.ReLU(),
            nn.Conv2d(channel_amp,
                      channel_amp,
                      kernel_size=[1, 7],
                      padding=(0, 3)),
            nn.BatchNorm2d(channel_amp),
            nn.ReLU(),
        )
        self.phase_conv1 = nn.Sequential(
            nn.Conv2d(2, channel_phase, kernel_size=[3, 5], padding=(1, 2)),
            nn.Conv2d(channel_phase,
                      channel_phase,
                      kernel_size=[3, 25],
                      padding=(1, 12)),
        )

        self.tsbs = nn.ModuleList()
        for idx in range(self.num_blocks):
            self.tsbs.append(
                TSB(input_dim=self.feat_dim,
                    channel_amp=channel_amp,
                    channel_phase=channel_phase))

        self.amp_conv2 = nn.Sequential(
            nn.Conv2d(channel_amp, 8, kernel_size=[1, 1]),
            nn.BatchNorm2d(8),
            nn.ReLU(),
        )
        self.phase_conv2 = nn.Sequential(
            nn.Conv1d(channel_phase, 2, kernel_size=[1, 1]))
        self.rnn = nn.GRU(self.feat_dim * 8, rnn_nums, bidirectional=True)
        self.fcs = nn.Sequential(nn.Linear(rnn_nums * 2, 600), nn.ReLU(),
                                 nn.Linear(600, 600), nn.ReLU(),
                                 nn.Linear(600, 514), nn.Sigmoid())
        show_params(self)