Esempio n. 1
0
def test_log_spectrogram_small_range():
    x = np.random.rand(1024 * 1024).reshape([1024, 1024])

    log_x = log_spectrogram(x, dynamic_range_dB=30)
    inv_log_x = inv_log_spectrogram(log_x)

    assert (np.linalg.norm(inv_log_x - x) < 0.08)
Esempio n. 2
0
def test_log_mel_spectrogram_small_range():
    x = np.random.rand(1024 * 513).reshape([513, 1024])

    x_mel = mel_spectrogram(x)

    log_x = log_mel_spectrogram(x, dynamic_range_dB=30)
    inv_log_x = inv_log_spectrogram(log_x)

    assert (np.linalg.norm(inv_log_x - x_mel) < 0.08)
Esempio n. 3
0
 def mel_spectrogram(self, spectrogram, dynamic_range_dB=50):
     melspectrogram = torch.matmul(
         self.mel_basis[:spectrogram.shape[0], :, :, :-1],
         inv_log_spectrogram(25 * (spectrogram - 1)))
     melspectrogram = torch.abs(melspectrogram)  # for safety
     minimum_relative_amplitude = torch.max(melspectrogram) / 10**(
         dynamic_range_dB / 10)
     logMelSpectrogram = 10 * torch.log10(
         torch.clamp(
             melspectrogram, min=minimum_relative_amplitude.data, max=None))
     logMelSpectrogram = logMelSpectrogram / (dynamic_range_dB / 2) + 1
     return logMelSpectrogram
Esempio n. 4
0
def consistency(log10_spectrogram):
    log_spectrogram = np.log(inv_log_spectrogram(log10_spectrogram))

    ttderiv = log_spectrogram[1:-1, :-2] - 2 * log_spectrogram[
        1:-1, 1:-1] + log_spectrogram[1:-1, 2:] + np.pi / 4
    ffderiv = log_spectrogram[:-2, 1:-1] - 2 * log_spectrogram[
        1:-1, 1:-1] + log_spectrogram[2:, 1:-1] + np.pi / 4

    absttderiv = substractMeanAndDivideByStd(np.abs(ttderiv))
    absffderiv = substractMeanAndDivideByStd(np.abs(ffderiv))

    consistencies = np.sum(absttderiv * absffderiv)
    return consistencies
Esempio n. 5
0
        def pghi_istft(x):
            use_truncated_window = True
            if use_truncated_window:
                stft_system = GaussTruncTF(
                    hop_size=getattr(self, 'hop_size', 256),
                    stft_channels=getattr(self, 'stft_channels', 512))
            else:
                stft_system = GaussTF(hop_size=getattr(self, 'hop_size', 256),
                                      stft_channels=getattr(
                                          self, 'stft_channels', 512))

            x = np.squeeze(x.numpy(), axis=0)
            new_Y = inv_log_spectrogram(x)
            new_y = stft_system.invert_spectrogram(new_Y)
            return new_y
Esempio n. 6
0
    def train(self, train_loader, epoch, batch_idx=0):
        self.summarizer = TensorboardSummarizer(
            self.args['save_path'] + self.args['experiment_name'] + '_summary',
            self.args['tensorboard_interval'])

        self.consoleSummarizer = ConsoleSummarizer(
            self.args['log_interval'], self.args['optimizer']['batch_size'],
            len(train_loader))

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if batch_idx == 0 and epoch == 0:
            self.initModel()
        else:
            self.loadModel(batch_idx, epoch - 1)

        print('try')

        try:
            should_restart = True
            for batch_idx, data in enumerate(train_loader, batch_idx):
                data = data.to(device).float()
                data = data.view(self.args['optimizer']['batch_size'],
                                 *self.args['spectrogram_shape'])
                real_spectrograms = data[::2]
                fake_left_borders = data[1::2, :, :, :self.args['split'][0]]
                fake_right_borders = data[1::2, :, :, self.args['split'][0] +
                                          self.args['split'][1]:]

                # optimize stft_D
                for _ in range(self.args['optimizer']['n_critic']):
                    for index, (discriminator, optim_d) in enumerate(
                            zip(self.stft_discriminators, self.stft_optims_d)):
                        optim_d.zero_grad()
                        generated_spectrograms = self.generateGap(
                            [fake_left_borders, fake_right_borders])
                        fake_spectrograms = torch.cat(
                            (fake_left_borders, generated_spectrograms,
                             fake_right_borders), 3)

                        scale = 2**index
                        start, end = self.start_end_for_scale(scale)
                        x_fake = self.time_average(
                            fake_spectrograms[:, :, :, start:end],
                            scale).detach()
                        x_real = self.time_average(
                            real_spectrograms[:, :, :, start:end],
                            scale).detach()

                        d_loss_f = discriminator(x_fake).mean()
                        d_loss_r = discriminator(x_real).mean()

                        grad_pen = calc_gradient_penalty_bayes(
                            discriminator, x_real, x_fake,
                            self.args['gamma_gp'])
                        d_loss_gp = grad_pen.mean()
                        disc_loss = d_loss_f - d_loss_r + d_loss_gp

                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss".format(int(index)), disc_loss)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/GradPen".format(int(index)), d_loss_gp)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss_f".format(int(index)), d_loss_f)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss_r".format(int(index)), d_loss_r)

                        disc_loss.backward()
                        optim_d.step()

                    # optimize mel_D
                    for index, (discriminator, optim_d) in enumerate(
                            zip(self.mel_discriminators, self.mel_optims_d),
                            self.args['mel_discriminator_start_powscale']):
                        optim_d.zero_grad()
                        generated_spectrograms = self.generateGap(
                            [fake_left_borders, fake_right_borders])
                        fake_spectrograms = torch.cat(
                            (fake_left_borders, generated_spectrograms,
                             fake_right_borders), 3)

                        scale = 2**index
                        start, end = self.start_end_for_scale(scale)
                        x_fake = self.time_average(
                            self.mel_spectrogram(fake_spectrograms[:, :, :,
                                                                   start:end]),
                            scale).detach()
                        x_real = self.time_average(
                            self.mel_spectrogram(real_spectrograms[:, :, :,
                                                                   start:end]),
                            scale).detach()

                        d_loss_f = discriminator(x_fake).mean()
                        d_loss_r = discriminator(x_real).mean()

                        grad_pen = calc_gradient_penalty_bayes(
                            discriminator, x_real, x_fake,
                            self.args['gamma_gp'])
                        d_loss_gp = grad_pen.mean()
                        disc_loss = d_loss_f - d_loss_r + d_loss_gp

                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss".format(int(index)), disc_loss)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/GradPen".format(int(index)), d_loss_gp)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss_f".format(int(index)), d_loss_f)
                        self.summarizer.trackScalar(
                            "Disc{:1d}/Loss_r".format(int(index)), d_loss_r)

                        disc_loss.backward()
                        optim_d.step()

                # optimize G

                self.optim_g.zero_grad()

                gen_loss = 0

                for index, discriminator in enumerate(
                        self.stft_discriminators):
                    generated_spectrograms = self.generateGap(
                        [fake_left_borders, fake_right_borders])
                    fake_spectrograms = torch.cat(
                        (fake_left_borders, generated_spectrograms,
                         fake_right_borders), 3)

                    scale = 2**index
                    start, end = self.start_end_for_scale(scale)
                    x_fake = self.time_average(
                        fake_spectrograms[:, :, :, start:end], scale)

                    d_loss_f = discriminator(x_fake).mean()
                    gen_loss += -d_loss_f.mean()

                for index, discriminator in enumerate(
                        self.mel_discriminators,
                        self.args['mel_discriminator_start_powscale']):
                    generated_spectrograms = self.generateGap(
                        [fake_left_borders, fake_right_borders])
                    fake_spectrograms = torch.cat(
                        (fake_left_borders, generated_spectrograms,
                         fake_right_borders), 3)

                    scale = 2**index
                    start, end = self.start_end_for_scale(scale)
                    x_fake = self.time_average(
                        self.mel_spectrogram(fake_spectrograms[:, :, :,
                                                               start:end]),
                        scale)

                    d_loss_f = discriminator(x_fake).mean()
                    gen_loss += -d_loss_f.mean()

                self.summarizer.trackScalar("Gen/Loss", gen_loss)

                gen_loss.backward()
                self.optim_g.step()

                if batch_idx % self.args['log_interval'] == 0:
                    self.consoleSummarizer.printSummary(batch_idx, epoch)
                if batch_idx % self.args['tensorboard_interval'] == 0:
                    unprocessed_fake_spectrograms = inv_log_spectrogram(
                        25 * (fake_spectrograms[:8] -
                              1)).detach().cpu().numpy().squeeze()
                    fake_sounds = self._spectrogramInverter.invertSpectrograms(
                        unprocessed_fake_spectrograms)
                    real_sounds = self._spectrogramInverter.invertSpectrograms(
                        inv_log_spectrogram(
                            25 * (real_spectrograms[:8] -
                                  1)).detach().cpu().numpy().squeeze())

                    self.summarizer.trackScalar(
                        "Gen/Projection_loss",
                        torch.from_numpy(
                            self._spectrogramInverter.projectionLossBetween(
                                unprocessed_fake_spectrograms, fake_sounds) *
                            self.args['tensorboard_interval']))

                    self.summarizer.writeSummary(batch_idx, real_spectrograms,
                                                 generated_spectrograms,
                                                 fake_spectrograms,
                                                 fake_sounds, real_sounds,
                                                 self.args['sampling_rate'])
                if batch_idx % self.args['save_interval'] == 0:
                    self.model_saver.saveModel(self, batch_idx, epoch)
        except KeyboardInterrupt:
            should_restart = False
        self.model_saver.saveModel(self, batch_idx, epoch)
        return batch_idx, should_restart