Exemple #1
0
class RondomStretchMelSpectrogram(nn.Module):
    def __init__(self, sample_rate, n_fft, top_db, max_perc):
        super().__init__()
        self.time_stretch = TimeStretch(hop_length=None, n_freq=n_fft // 2 + 1)
        self.stft = Spectrogram(n_fft=n_fft, power=None)
        self.com_norm = ComplexNorm(power=2.)
        self.mel_specgram = MelSpectrogram(sample_rate,
                                           n_fft=n_fft,
                                           f_max=8000)
        self.AtoDB = AmplitudeToDB(top_db=top_db)
        self.dist = Uniform(1. - max_perc, 1 + max_perc)

    def forward(self, x, train):
        x = self.stft(x)
        if train:
            x = self.time_stretch(x, self.dist.sample().item())
        x = self.com_norm(x)
        x = self.mel_specgram.mel_scale(x)
        x = self.AtoDB(x)

        size = torch.tensor(x.size())

        if size[3] > 157:
            x = x[:, :, :, 0:157]
        else:
            x = torch.cat([
                x,
                torch.cuda.FloatTensor(size[0], size[1], size[2],
                                       157 - size[3]).fill_(0)
            ],
                          dim=3)

        return x
Exemple #2
0
class RondomStretchMelSpectrogram(nn.Module):
    def __init__(self, sample_rate, n_fft, top_db, max_perc):
        super().__init__()
        self.time_stretch = TimeStretch(hop_length=None, n_freq=n_fft // 2 + 1)
        self.stft = Spectrogram(n_fft=n_fft, power=None)
        self.com_norm = ComplexNorm(power=2.)
        self.fm = FrequencyMasking(100)
        self.tm = TimeMasking(100)
        self.mel_specgram = MelSpectrogram(sample_rate,
                                           n_fft=n_fft,
                                           f_max=8000)
        self.AtoDB = AmplitudeToDB(top_db=top_db)
        self.max_perc = max_perc
        self.sample_rate = sample_rate
        self.resamples = [
            Resample(sample_rate, sample_rate * 0.6),
            Resample(sample_rate, sample_rate * 0.7),
            Resample(sample_rate, sample_rate * 0.8),
            Resample(sample_rate, sample_rate * 0.9),
            Resample(sample_rate, sample_rate * 1),
            Resample(sample_rate, sample_rate * 1.1),
            Resample(sample_rate, sample_rate * 1.2),
            Resample(sample_rate, sample_rate * 1.3),
            Resample(sample_rate, sample_rate * 1.4)
        ]

    def forward(self, x, train):
        x = random.choice(self.resamples)(x)

        x = self.stft(x)

        if train:
            dist = Uniform(1. - self.max_perc, 1 + self.max_perc)
            x = self.time_stretch(x, dist.sample().item())
            x = self.com_norm(x)
            x = self.fm(x, 0)
            x = self.tm(x, 0)
        else:
            x = self.com_norm(x)

        x = self.mel_specgram.mel_scale(x)
        x = self.AtoDB(x)

        size = torch.tensor(x.size())

        if size[3] > 157:
            x = x[:, :, :, 0:157]
        else:
            x = torch.cat([
                x,
                torch.cuda.FloatTensor(size[0], size[1], size[2],
                                       157 - size[3]).fill_(0)
            ],
                          dim=3)

        return x
Exemple #3
0
class MelspectrogramStretch(object):
    def __init__(self):

        sample_rate = 44100
        num_mels = 128
        fft_length = 2048
        hop_length = fft_length // 2

        self.stft = Spectrogram(n_fft=fft_length,
                                win_length=fft_length,
                                hop_length=None,
                                pad=0,
                                power=None,
                                normalized=False)

        self.mst = MelSpectrogram(sample_rate=sample_rate,
                                  n_fft=fft_length,
                                  hop_length=hop_length,
                                  n_mels=num_mels)

        # Normalization (pot spec processing)
        self.complex_norm = ComplexNorm(power=2.)

    def forward(self, data):
        tsf = AudioTransforms()
        sig_t, sr, _ = tsf.apply(data, None)

        length = torch.tensor(sig_t.size(0))
        sr = torch.tensor(sr)
        data = [d.unsqueeze(0).to("cpu") for d in [sig_t, length, sr]]

        # x-> (batch, time, channel)
        x, lengths, _ = data  # unpacking seqs, lengths and srs
        # x-> (batch, channel, time)
        xt = x.float().transpose(1, 2)
        # xt -> (batch, channel, freq, time)
        x = self.stft(xt)
        # x -> (fft_length//2+1,bins,channel)

        #print(x.shape)  #torch.Size([1, 1, 1025, 173, 2])
        x = self.complex_norm(x)
        #print(x.shape)  #torch.Size([1, 1, 1025, 173])
        x = self.mst.mel_scale(x)
        #print(x.shape)  #torch.Size([1, 1, 128, 173])

        # Normalize melspectrogram
        # Independent mean, std per batch
        non_batch_inds = [1, 2, 3]
        mean = x.mean(non_batch_inds, keepdim=True)
        std = x.std(non_batch_inds, keepdim=True)
        x = (x - mean) / std

        x = x.to('cpu').detach().numpy().copy()

        lengths = [x.shape[3]]
        return x, lengths