コード例 #1
0
 def __init__(self, num_srcs, n_fft, hop_length, win_length, window,
              center):
     self.num_srcs = num_srcs
     self.n_fft = n_fft
     self.hop_length = hop_length
     self.win_length = win_length
     if window == 'hann':
         self.window = torch.hann_window(win_length).cuda()
     self.center = center
     self.loss = PITLossWrapper(PairwiseNegSDR("sisdr"), pit_from="pw_mtx")
コード例 #2
0
def kernel_downsample2(zeros=56):
    """kernel_downsample2.

    """
    win = th.hann_window(4 * zeros + 1, periodic=False)
    winodd = win[1::2]
    t = th.linspace(-zeros + 0.5, zeros - 0.5, 2 * zeros)
    t.mul_(math.pi)
    kernel = (sinc(t) * winodd).view(1, 1, -1)
    return kernel
コード例 #3
0
 def __init__(self, filter_size, block_size):
     super(Generator, self).__init__()
     self.apply(self.init_parameters)
     self.block_size = block_size
     self.filter_size = filter_size
     self.noise_att = 1e-4
     self.filter_window = nn.Parameter(torch.hann_window(filter_size).roll(
         filter_size // 2, -1),
                                       requires_grad=False)
     self.filter_coef = None
コード例 #4
0
ファイル: filters.py プロジェクト: photosynthesis-team/piq
def hann_filter(kernel_size: int) -> torch.Tensor:
    r"""Creates  Hann kernel
    Returns:
        kernel: Tensor with shape (1, kernel_size, kernel_size)
    """
    # Take bigger window and drop borders
    window = torch.hann_window(kernel_size + 2, periodic=False)[1:-1]
    kernel = window[:, None] * window[None, :]
    # Normalize and reshape kernel
    return kernel.view(1, kernel_size, kernel_size) / kernel.sum()
コード例 #5
0
 def stft(self, audio):
     '''
     wrapper around th.stft
     audio: wave signal as th.Tensor
     '''
     hann = th.hann_window(self.win_length)
     hann = hann.cuda() if audio.is_cuda else hann
     spec = th.stft(audio, n_fft=self.fft_bins, hop_length=self.hop_length, win_length=self.win_length,
                    window=hann, center=not self.causal, normalized=self.normalized)
     return spec.contiguous()
コード例 #6
0
    def __init__(self, size):
        super(InstantaneousFrequency, self).__init__()

        self.size    = size
        self.hop     = size //4
        self.window  = nn.Parameter(torch.hann_window(size))

        freq_angular = np.linspace(0, 2 * np.pi, size, endpoint=False)
        d_window = np.sin(-freq_angular) * np.pi / size
        self.d_window = nn.Parameter(torch.from_numpy(d_window).float())
コード例 #7
0
 def istft(self, x):
     return torch.istft(x,
                        n_fft=self.n_fft,
                        hop_length=self.n_fft // 4,
                        win_length=self.n_fft,
                        center=True,
                        normalized=False,
                        onesided=True,
                        window=torch.hann_window(self.n_fft).to(x.device),
                        length=(x.size(2) - 1) * self.n_fft // 4)
コード例 #8
0
 def stft(self, x):
     return torch.stft(x,
                       n_fft=self.n_fft,
                       hop_length=self.n_fft // 4,
                       win_length=self.n_fft,
                       center=True,
                       normalized=False,
                       onesided=True,
                       pad_mode='reflect',
                       window=torch.hann_window(self.n_fft).to(x.device))
コード例 #9
0
def file_log_spectrogram(sound, segment_time=20, overlap_time=10):
    r"""Generates a spectrogram of a given sound file.
    """
    waveform, fs = torchaudio.load(sound)
    nperseg = int(segment_time * fs / 1000)  # TODO: do not hardcode these
    noverlap = int(overlap_time * fs / 1000)
    cur_input = torch.log(
        F.spectrogram(waveform, 0, torch.hann_window(nperseg), nperseg,
                      nperseg - noverlap, nperseg, 2, 0) + 1e-10)
    return torch.squeeze(torch.transpose(cur_input, 1, 2))
コード例 #10
0
 def __init__(self, n_fft=4096, n_hop=1024, center=False, window=None):
     super(TorchSTFT, self).__init__()
     if window is not None:
         self.window = nn.Parameter(torch.hann_window(n_fft),
                                    requires_grad=False)
     else:
         self.window = window
     self.n_fft = n_fft
     self.n_hop = n_hop
     self.center = center
コード例 #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input',
                        help='Input mixture .wav file\nIf input contains '
                        'more than one channel, ch0 will be used',
                        type=str)
    parser.add_argument('output',
                        help='Output path of separated .wav file',
                        type=str)
    parser.add_argument('--model',
                        '-m',
                        help='Trained model',
                        type=str,
                        metavar='PATH',
                        required=True)
    parser.add_argument('--gpu',
                        '-g',
                        help='GPU id (Negative number indicates CPU)',
                        type=int,
                        metavar='ID',
                        default=-1)
    args = parser.parse_args()

    if_use_cuda = torch.cuda.is_available() and args.gpu >= 0
    device = torch.device(f'cuda:{args.gpu}' if if_use_cuda else 'cpu')

    with torch.no_grad():
        sound, _ = torchaudio.load(args.input)
        sound = sound[[0], :].to(device)

        window = torch.hann_window(N_FFT, device=device)

        # Convert it to power spectrogram, and pad it to make the number of
        # time frames to a multiple of 64 to be fed into U-NET
        sound_stft = torch.stft(sound, N_FFT, window=window)
        sound_spec = sound_stft.pow(2).sum(-1).sqrt()
        sound_spec, (left, right) = padding(sound_spec)

        # Load the model
        model = UNet(N_PART)
        model.load_state_dict(torch.load(args.model))
        model.to(device)
        model.eval()

        right = sound_spec.size(2) - right
        mask = model(sound_spec).squeeze(0)[:, :, left:right]
        separated = mask.unsqueeze(3) * sound_stft
        separated = torch.istft(separated,
                                N_FFT,
                                window=window,
                                length=sound.size(-1))
        separated = separated.cpu().numpy()

    # Save the separated signals
    sf.write(args.output, separated.T, SAMPLING_RATE)
コード例 #12
0
ファイル: dsp_xt.py プロジェクト: ishine/DurIAN4S-1
def istft(magnitude, phase, config):
    window = torch.hann_window(config.win_size)
    stft_matrix = torch.stack((magnitude*torch.cos(phase), magnitude*torch.sin(phase)), dim=-1)
    stft_matrix, window = set_device((stft_matrix, window), config.device)
    y = torchaudio.functional.istft(stft_matrix,
                                    n_fft=config.fft_size,
                                    hop_length=config.hop_size,
                                    win_length=config.win_size,
                                    window=window)

    return y
コード例 #13
0
 def __init__(self, n_fft, hop_length, center=True):
     # n_fft: resolution on freq axis
     self.n_fft = n_fft
     self.hop_length = hop_length # resolution on time axis; width of hann window/4; overlap shd add up to 1
     # center - true: t-th frame in spectrogram is centered at time t x hop_length of the signal # orcaspot: center=False
     # --> create 1-to-1 correspondence
     # done by reflected padding (padding on both sides)
     self.center = center
     # hann window: more weight on 'current' freqs at time t (weighting functions/weight matrix used in FFT analysis)
     # window functions control the amount of signal leakage between freq bins of FFT
     self.window = torch.hann_window(self.n_fft)
コード例 #14
0
def spectrogram(wav, hparams):
    stft = torch.stft(
        wav,
        n_fft=hparams.n_fft,
        hop_length=hparams.hop_size,
        win_length=hparams.win_size,
        window=torch.hann_window(hparams.win_size).cuda()
    )
    power = (stft ** 2).sum(dim=-1)
    log_spec = 10. * torch.log10(torch.clamp(power / power.max(), 1e-10))
    return torch.max(log_spec, log_spec.max() - hparams.top_db)
コード例 #15
0
def get_power_loss(y, y1, frame_length=1024, hop_length=256):
    batch = y.size(0)
    x = y.view(batch, -1)
    x1 = y1.view(batch, -1)
    window = torch.hann_window(frame_length, periodic=True)
    if use_cuda:
        window = window.cuda()
    s = torch.stft(x, frame_length=frame_length, hop=hop_length, window=window)
    s1 = torch.stft(x1, frame_length=frame_length, hop=hop_length, window=window)
    ss = torch.log(torch.sqrt(torch.sum(s ** 2, -1) + 1e-5)) - torch.log(torch.sqrt(torch.sum(s1 ** 2, -1) + 1e-5))
    return torch.sum(ss ** 2) / batch
コード例 #16
0
 def func(tensor):
     n_fft = 400
     ws = 400
     hop = 200
     window = torch.hann_window(ws, device=tensor.device, dtype=tensor.dtype)
     power = 2.
     momentum = 0.99
     n_iter = 32
     length = 1000
     rand_int = False
     return F.griffinlim(tensor, window, n_fft, hop, ws, power, n_iter, momentum, length, rand_int)
コード例 #17
0
ファイル: modules.py プロジェクト: zebiak/TTS-Cube
def stft(y, scale='linear'):
    D = torch.stft(y, n_fft=1024, hop_length=256, win_length=1024, window=torch.hann_window(1024).cuda())
    D = torch.sqrt(D.pow(2).sum(-1) + 1e-10)
    # D = torch.sqrt(torch.clamp(D.pow(2).sum(-1), min=1e-10))
    if scale == 'linear':
        return D
    elif scale == 'log':
        S = 2 * torch.log(torch.clamp(D, 1e-10, float("inf")))
        return S
    else:
        pass
コード例 #18
0
    def __getitem__(self, index):
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        win_len = int(1024 * (fs / 16))
        window = torch.hann_window(window_length=win_len,
                                   periodic=True,
                                   dtype=None,
                                   layout=torch.strided,
                                   device=None,
                                   requires_grad=False)
        tgt_item = self.tgt_paths[index] if self.tgt_paths is not None else None
        tgt_wav, _ = torchaudio.load(tgt_item)

        noi_item = self.noi_paths[index] if self.noi_paths is not None else None
        noi_wav, _ = torchaudio.load(noi_item)

        tgt_wav_len = tgt_wav.shape[1]

        spec_tgt = torchaudio.functional.spectrogram(waveform=tgt_wav,
                                                     pad=0,
                                                     window=window,
                                                     n_fft=win_len,
                                                     hop_length=int(win_len /
                                                                    4),
                                                     win_length=win_len,
                                                     power=None,
                                                     normalized=False)
        spec_noi = torchaudio.functional.spectrogram(waveform=noi_wav,
                                                     pad=0,
                                                     window=window,
                                                     n_fft=win_len,
                                                     hop_length=int(win_len /
                                                                    4),
                                                     win_length=win_len,
                                                     power=None,
                                                     normalized=False)
        tgt_wav_real = spec_tgt[0, :, :, 0]
        tgt_wav_imag = spec_tgt[0, :, :, 1]
        input_wav_real = spec_noi[0, :, :, 0]
        input_wav_imag = spec_noi[0, :, :, 1]
        num = index

        batch_dict = {
            "id": index,
            "tgt_wav_len": tgt_wav_len,
            "audio_wav": [noi_wav, tgt_wav],
            "audio_data_Real": [input_wav_real, tgt_wav_real],
            "audio_data_Imagine": [input_wav_imag, tgt_wav_imag]
        }

        with open(self.save_path + '/' + str(num) + '.pkl', 'wb') as f:
            pickle.dump(batch_dict, f)

        return index
コード例 #19
0
    def __init__(self, sampling_rate: int = 22050, n_fft: int = 1024, window_size: int = 1024, hop_size: int = 256,
                 num_mels: int = 80, fmin: float = 0., fmax: float = 8000.):
        super().__init__()
        self.n_fft = n_fft
        self.hop_size = hop_size
        self.window_size = window_size
        self.pad_size = (self.n_fft - self.hop_size) // 2

        mel_filter_tensor = torch.FloatTensor(mel(sampling_rate, n_fft, num_mels, fmin, fmax))
        self.register_buffer('mel_filter', mel_filter_tensor)
        self.register_buffer('window', torch.hann_window(window_size))
コード例 #20
0
 def func(tensor):
     sample_rate = 44100
     n_fft = 400
     ws = 400
     hop = 200
     pad = 0
     window = torch.hann_window(ws,
                                device=tensor.device,
                                dtype=tensor.dtype)
     return F.spectral_centroid(tensor, sample_rate, pad, window, n_fft,
                                hop, ws)
コード例 #21
0
def _feature_window_function(window_type, window_size, blackman_coeff):
    r"""Returns a window function with the given type and size
    """
    if window_type == HANNING:
        return torch.hann_window(window_size, periodic=False)
    elif window_type == HAMMING:
        return torch.hamming_window(window_size, periodic=False, alpha=0.54, beta=0.46)
    elif window_type == POVEY:
        # like hanning but goes to zero at edges
        return torch.hann_window(window_size, periodic=False).pow(0.85)
    elif window_type == RECTANGULAR:
        return torch.ones(window_size)
    elif window_type == BLACKMAN:
        a = 2 * math.pi / (window_size - 1)
        window_function = torch.arange(window_size)
        # can't use torch.blackman_window as they use different coefficients
        return blackman_coeff - 0.5 * torch.cos(a * window_function) + \
            (0.5 - blackman_coeff) * torch.cos(2 * a * window_function)
    else:
        raise Exception('Invalid window type ' + window_type)
コード例 #22
0
    def test_spectrogram(self):
        tensor = torch.rand((1, 1000))
        n_fft = 400
        ws = 400
        hop = 200
        pad = 0
        window = torch.hann_window(ws)
        power = 2
        normalize = False

        _test_torchscript_functional(F.spectrogram, tensor, pad, window, n_fft,
                                     hop, ws, power, normalize)
コード例 #23
0
 def func(tensor):
     n_fft = 400
     ws = 400
     hop = 200
     pad = 0
     window = torch.hann_window(ws,
                                device=tensor.device,
                                dtype=tensor.dtype)
     power = None
     normalize = False
     return F.spectrogram(tensor, pad, window, n_fft, hop, ws, power,
                          normalize)
コード例 #24
0
 def test_linearity_of_istft2(self):
     # hann_window, centered, not normalized, not onesided
     kwargs2 = {
         'n_fft': 12,
         'window': torch.hann_window(12),
         'center': True,
         'pad_mode': 'reflect',
         'normalized': False,
         'onesided': False,
     }
     data_size = (2, 12, 7, 2)
     self._test_linearity_of_istft(data_size, kwargs2)
コード例 #25
0
 def __init__(self,
              fft_size=32,
              win_size=20,
              hop_size=10,
              logratio=0.0,
              device="cuda"):
     super(STFTLoss, self).__init__()
     self.fft_size = fft_size
     self.win_size = win_size
     self.hop_size = hop_size
     self.logratio = logratio
     self.window = torch.hann_window(win_size).to(device)
コード例 #26
0
ファイル: math_ops.py プロジェクト: malfet/pytorch
 def spectral_ops(self):
     a = torch.randn(10)
     b = torch.randn(10, 8, 4, 2)
     return (
         torch.stft(a, 8),
         torch.istft(b, 8),
         torch.bartlett_window(2, dtype=torch.float),
         torch.blackman_window(2, dtype=torch.float),
         torch.hamming_window(4, dtype=torch.float),
         torch.hann_window(4, dtype=torch.float),
         torch.kaiser_window(4, dtype=torch.float),
     )
コード例 #27
0
ファイル: train_student.py プロジェクト: botmatic/tacotron2
    def forward(self, student_hat, y):
        device = self.device
        batch_size = student_hat.size(0)
        student_hat = student_hat.view(batch_size, -1)
        y = y.view(batch_size, -1)

        # window = torch.hann_window(1024, periodic=True).to(device)
        # # we need to get the magnitudes after stft
        # student_stft = torch.stft(student_hat, frame_length=hparams.fft_size, hop=hparams.hop_size, window=window)
        # y_stft = torch.stft(y, frame_length=hparams.fft_size, hop=hparams.hop_size, window=window)

        WIN_SIZE = 1200
        window1 = torch.hann_window(WIN_SIZE, periodic=True).to(device)
        window_pad = int((WIN_SIZE - 512) / 2)
        window2 = window1[window_pad:window_pad + 512]
        freq = int(3000 / (self.sample_rate * 0.5) * 1025)
        # we use fft size 2048 for frequence lower than 3000hz
        student_stft = torch.stft(student_hat,
                                  win_length=WIN_SIZE,
                                  hop_length=300,
                                  n_fft=2048,
                                  window=window1)[:, :freq, :, :]
        y_stft = torch.stft(y,
                            win_length=WIN_SIZE,
                            hop_length=300,
                            n_fft=2048,
                            window=window1)[:, :freq, :, :]
        student_magnitude = self.get_magnitude(student_stft)
        y_magnitude = self.get_magnitude(y_stft)
        loss = torch.pow(
            torch.norm(torch.abs(student_magnitude) - torch.abs(y_magnitude),
                       p=2,
                       dim=1), 2)

        freq1 = int(3000 / (self.sample_rate * 0.5) * 257)
        student_stft1 = torch.stft(student_hat,
                                   win_length=window2.size(0),
                                   hop_length=300,
                                   n_fft=512,
                                   window=window2)[:, freq1:, :, :]
        y_stft1 = torch.stft(y,
                             win_length=window2.size(0),
                             hop_length=300,
                             n_fft=512,
                             window=window2)[:, freq1:, :, :]
        student_magnitude1 = self.get_magnitude(student_stft1)
        y_magnitude1 = self.get_magnitude(y_stft1)
        loss1 = torch.pow(
            torch.norm(torch.abs(student_magnitude1) - torch.abs(y_magnitude1),
                       p=2,
                       dim=1), 2)

        return torch.mean(loss, dim=1) + 10 * torch.mean(loss1, dim=1)
コード例 #28
0
    def inv_f(self, input, phase):
        input = torch.stack([input * torch.cos(phase), input * torch.sin(phase)], dim=-1)

        input = istft(
            input,
            n_fft=self.num_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=torch.hann_window(self.win_length, device=input.device),
        )

        return input
コード例 #29
0
 def __init__(self, stft_params, device):
     self.device = device
     self.dtype = torch.float32
     self.n_fft = stft_params['n_fft']
     self.hop_length = stft_params['hop_length']
     self.win_length = stft_params['win_length']
     self.window = torch.hann_window(self.win_length).to(self.dtype).to(
         self.device)
     self.freq_num = self._cal_freq_num()
     self.pad = None
     self.pad_len = None
     self.sample_len = None
コード例 #30
0
ファイル: loss.py プロジェクト: macroustc/FB-MelGAN
 def __init__(self,
              fft_size,
              hop_size,
              win_size):
     super(STFTLoss, self).__init__()
     
     self.fft_size = fft_size
     self.hop_size = hop_size
     self.win_size = win_size
     self.window = torch.hann_window(win_size)
     self.sc_loss = SpectralConvergence()
     self.mag_loss = LogSTFTMagnitude()