Esempio n. 1
0
    def postprocess(self, data: Dict,
                    dataset: data_manager.CustomDataset) -> List[Dict]:
        samples, outs = [], []

        # speech de-normalization, de-emphasis
        xs = data['x'].permute(0, 2, 1)  # B, T, C
        xs = dataset.norm_modules['x'].denormalize_(xs)
        xs = de_emphasis(gen.convert(xs[..., 0], astype=np.ndarray))

        ys = data['y']
        ys = dataset.norm_modules['y'].denormalize_(ys)
        ys = de_emphasis(gen.convert(ys, astype=np.ndarray))

        outs = data['out']
        outs = dataset.norm_modules['y'].denormalize_(outs)
        outs = de_emphasis(gen.convert(outs, astype=np.ndarray))

        data['x'], data['y'], data['out'] = xs, ys, outs

        T_xs = data['T_xs'].int()
        T_ys = data['T_ys'].int()

        # Dict[List] -> List[Dict]
        for idx in range(len(data['x'])):
            sample = dict()
            T_x, T_y = T_xs[idx], T_ys[idx]
            sample['T_xs'], sample['T_ys'] = T_x, T_y

            for key, value in data.items():
                value = value[idx]
                if key == 'x':
                    value = value[..., :T_x]
                    value = np.nan_to_num(value, posinf=1., neginf=-1.)
                    value = np.asfortranarray(np.clip(value, -1, 1))
                elif len(key) > 3:
                    pass
                else:
                    value = value[..., :T_y]
                    value = np.nan_to_num(value, posinf=1., neginf=-1.)
                    value = np.asfortranarray(np.clip(value, -1, 1))
                sample[key] = value
            samples.append(sample)

        return samples
Esempio n. 2
0
    def test_pre_emphasis(self):
        """
        Tests equality after de-emphasizing pre-emphasized signal.
        """
        rand_signal_batch = np.random.randint(low=1,
                                              high=10,
                                              size=(10, 1, 400))
        reconst_batch = emph.de_emphasis(emph.pre_emphasis(rand_signal_batch))

        # after de-emphasis, the signal must have been restored
        self.assertEqual(rand_signal_batch.shape, reconst_batch.shape)
        self.assertTrue(np.allclose(rand_signal_batch, reconst_batch))
Esempio n. 3
0
            print(
                'Epoch {}, Step {}, d_clean_loss {}, d_noisy_loss {}, g_loss {}, g_loss_cond {}'
                .format(epoch + 1, i + 1, clean_loss.data[0],
                        noisy_loss.data[0], g_loss.data[0],
                        g_cond_loss.data[0]))
            ### Functions below print various information about the network. Uncomment to use.
            # print('Weight for latent variable z : {}'.format(z))
            # print('Generated Outputs : {}'.format(generated_outputs))
            # print('Encoding 8th layer weight: {}'.format(generator.module.enc8.weight))

        # save sampled audio at the beginning of each epoch
        if i == 0:
            fake_speech = generator(fixed_test_noise, z)
            fake_speech_data = fake_speech.data.cpu().numpy(
            )  # convert to numpy array
            fake_speech_data = emph.de_emphasis(fake_speech_data,
                                                emph_coeff=0.95)

            for idx in range(4):  # select four samples
                generated_sample = fake_speech_data[idx]
                filepath = os.path.join(
                    gen_data_path,
                    '{}_e{}.wav'.format(test_noise_filenames[idx], epoch + 1))
                wavfile.write(filepath, sample_rate, generated_sample.T)

    # save the model parameters for each epoch
    g_path = os.path.join(models_path, 'generator-{}.pkl'.format(epoch + 1))
    d_path = os.path.join(models_path,
                          'discriminator-{}.pkl'.format(epoch + 1))
    torch.save(generator.state_dict(), g_path)
    torch.save(discriminator.state_dict(), d_path)
print('Finished Training!')