def __init__(self, device: str = 'cuda'):
        self.device = device

        # make mel converter
        self.mel_func = LogMelSpectrogram(settings.SAMPLE_RATE,
                                          settings.MEL_SIZE, settings.N_FFT,
                                          settings.WIN_LENGTH,
                                          settings.HOP_LENGTH,
                                          float(settings.MEL_MIN),
                                          float(settings.MEL_MAX)).to(device)

        # PQMF module
        self.pqmf_func = PQMF().to(device)

        # load model
        self.gen = Generator().to(device)
        chk = torch.load(VCTK_BASE_CHK_PATH, map_location=torch.device(device))
        self.gen.load_state_dict(chk)
        self.gen.eval()

        self.stft = STFT(settings.WIN_LENGTH, settings.HOP_LENGTH).to(device)

        # denoise - reference https://github.com/NVIDIA/waveglow/blob/master/denoiser.py
        mel_input = torch.zeros((1, 80, 88)).float().to(device)
        with torch.no_grad():
            bias_audio = self.decode(mel_input, is_denoise=False).squeeze(1)
            bias_spec, _ = self.stft.transform(bias_audio)

        self.bias_spec = bias_spec[:, :, 0][:, :, None]
Exemple #2
0
def build_stft_functions(*params: Tuple[int, int, int]):
    """
    Make stft modules by given parameters
    :param params: arguments of tuples (n_fft, window size, hop size)
    :return: STFT modules
    """
    print('Build Mel Functions ...')
    return [
        STFT(
            win, hop, win, fft
        ).cuda() for fft, win, hop in params
    ]
Exemple #3
0
def build_stft_functions():
    print('Build Mel Functions ...')
    mel_funcs_for_loss = [
        STFT(fft, hop, win).cuda()
        for fft, win, hop in FB_STFT_PARAMS + MB_STFT_PARAMS
    ]

    mel_func = LogMelSpectrogram(settings.SAMPLE_RATE,
                                 settings.MEL_SIZE,
                                 settings.N_FFT,
                                 settings.WIN_LENGTH,
                                 settings.HOP_LENGTH,
                                 mel_min=float(settings.MEL_MIN),
                                 mel_max=float(settings.MEL_MAX)).cuda()
    return mel_func, mel_funcs_for_loss
    def __init__(self,
                 spec_dim: int,
                 hidden_dim: int,
                 filter_len: int,
                 hop_len: int,
                 layers: int = 3,
                 block_layers: int = 3,
                 kernel_size: int = 5,
                 is_mask: bool = False,
                 norm: str = 'bn',
                 act: str = 'tanh'):
        super().__init__()
        self.layers = layers
        self.is_mask = is_mask

        # stft modules
        self.stft = STFT(filter_len, hop_len)

        if norm == 'bn':
            self.bn_func = nn.BatchNorm1d
        elif norm == 'ins':
            self.bn_func = lambda x: nn.InstanceNorm1d(x, affine=True)
        else:
            raise NotImplementedError('{} is not implemented !'.format(norm))

        if act == 'tanh':
            self.act_func = nn.Tanh
            self.act_out = nn.Tanh
        elif act == 'comp':
            self.act_func = ComplexActLayer
            self.act_out = lambda: ComplexActLayer(is_out=True)
        else:
            raise NotImplementedError('{} is not implemented !'.format(act))

        # prev conv
        self.prev_conv = ComplexConv1d(spec_dim * 2, hidden_dim, 1)

        # down
        self.down = nn.ModuleList()
        self.down_pool = nn.MaxPool1d(3, stride=2, padding=1)
        for idx in range(self.layers):
            block = ComplexConvBlock(hidden_dim,
                                     hidden_dim,
                                     kernel_size=kernel_size,
                                     padding=kernel_size // 2,
                                     bn_func=self.bn_func,
                                     act_func=self.act_func,
                                     layers=block_layers)
            self.down.append(block)

        # up
        self.up = nn.ModuleList()
        for idx in range(self.layers):
            in_c = hidden_dim if idx == 0 else hidden_dim * 2
            self.up.append(
                nn.Sequential(
                    ComplexConvBlock(in_c,
                                     hidden_dim,
                                     kernel_size=kernel_size,
                                     padding=kernel_size // 2,
                                     bn_func=self.bn_func,
                                     act_func=self.act_func,
                                     layers=block_layers),
                    self.bn_func(hidden_dim),
                    self.act_func(),
                    ComplexTransposedConv1d(hidden_dim,
                                            hidden_dim,
                                            kernel_size=2,
                                            stride=2),
                ))

        # out_conv
        self.out_conv = nn.Sequential(
            ComplexConvBlock(hidden_dim * 2,
                             spec_dim * 2,
                             kernel_size=kernel_size,
                             padding=kernel_size // 2,
                             bn_func=self.bn_func,
                             act_func=self.act_func),
            self.bn_func(spec_dim * 2), self.act_func())

        # refine conv
        self.refine_conv = nn.Sequential(
            ComplexConvBlock(spec_dim * 4,
                             spec_dim * 2,
                             kernel_size=kernel_size,
                             padding=kernel_size // 2,
                             bn_func=self.bn_func,
                             act_func=self.act_func),
            self.bn_func(spec_dim * 2), self.act_func())