def __init__(self, device: str = 'cuda'): self.device = device # make mel converter self.mel_func = LogMelSpectrogram(settings.SAMPLE_RATE, settings.MEL_SIZE, settings.N_FFT, settings.WIN_LENGTH, settings.HOP_LENGTH, float(settings.MEL_MIN), float(settings.MEL_MAX)).to(device) # PQMF module self.pqmf_func = PQMF().to(device) # load model self.gen = Generator().to(device) chk = torch.load(VCTK_BASE_CHK_PATH, map_location=torch.device(device)) self.gen.load_state_dict(chk) self.gen.eval() self.stft = STFT(settings.WIN_LENGTH, settings.HOP_LENGTH).to(device) # denoise - reference https://github.com/NVIDIA/waveglow/blob/master/denoiser.py mel_input = torch.zeros((1, 80, 88)).float().to(device) with torch.no_grad(): bias_audio = self.decode(mel_input, is_denoise=False).squeeze(1) bias_spec, _ = self.stft.transform(bias_audio) self.bias_spec = bias_spec[:, :, 0][:, :, None]
def build_stft_functions(*params: Tuple[int, int, int]): """ Make stft modules by given parameters :param params: arguments of tuples (n_fft, window size, hop size) :return: STFT modules """ print('Build Mel Functions ...') return [ STFT( win, hop, win, fft ).cuda() for fft, win, hop in params ]
def build_stft_functions(): print('Build Mel Functions ...') mel_funcs_for_loss = [ STFT(fft, hop, win).cuda() for fft, win, hop in FB_STFT_PARAMS + MB_STFT_PARAMS ] mel_func = LogMelSpectrogram(settings.SAMPLE_RATE, settings.MEL_SIZE, settings.N_FFT, settings.WIN_LENGTH, settings.HOP_LENGTH, mel_min=float(settings.MEL_MIN), mel_max=float(settings.MEL_MAX)).cuda() return mel_func, mel_funcs_for_loss
def __init__(self, spec_dim: int, hidden_dim: int, filter_len: int, hop_len: int, layers: int = 3, block_layers: int = 3, kernel_size: int = 5, is_mask: bool = False, norm: str = 'bn', act: str = 'tanh'): super().__init__() self.layers = layers self.is_mask = is_mask # stft modules self.stft = STFT(filter_len, hop_len) if norm == 'bn': self.bn_func = nn.BatchNorm1d elif norm == 'ins': self.bn_func = lambda x: nn.InstanceNorm1d(x, affine=True) else: raise NotImplementedError('{} is not implemented !'.format(norm)) if act == 'tanh': self.act_func = nn.Tanh self.act_out = nn.Tanh elif act == 'comp': self.act_func = ComplexActLayer self.act_out = lambda: ComplexActLayer(is_out=True) else: raise NotImplementedError('{} is not implemented !'.format(act)) # prev conv self.prev_conv = ComplexConv1d(spec_dim * 2, hidden_dim, 1) # down self.down = nn.ModuleList() self.down_pool = nn.MaxPool1d(3, stride=2, padding=1) for idx in range(self.layers): block = ComplexConvBlock(hidden_dim, hidden_dim, kernel_size=kernel_size, padding=kernel_size // 2, bn_func=self.bn_func, act_func=self.act_func, layers=block_layers) self.down.append(block) # up self.up = nn.ModuleList() for idx in range(self.layers): in_c = hidden_dim if idx == 0 else hidden_dim * 2 self.up.append( nn.Sequential( ComplexConvBlock(in_c, hidden_dim, kernel_size=kernel_size, padding=kernel_size // 2, bn_func=self.bn_func, act_func=self.act_func, layers=block_layers), self.bn_func(hidden_dim), self.act_func(), ComplexTransposedConv1d(hidden_dim, hidden_dim, kernel_size=2, stride=2), )) # out_conv self.out_conv = nn.Sequential( ComplexConvBlock(hidden_dim * 2, spec_dim * 2, kernel_size=kernel_size, padding=kernel_size // 2, bn_func=self.bn_func, act_func=self.act_func), self.bn_func(spec_dim * 2), self.act_func()) # refine conv self.refine_conv = nn.Sequential( ComplexConvBlock(spec_dim * 4, spec_dim * 2, kernel_size=kernel_size, padding=kernel_size // 2, bn_func=self.bn_func, act_func=self.act_func), self.bn_func(spec_dim * 2), self.act_func())