def postprocess(self, data: Dict, dataset: data_manager.CustomDataset) -> List[Dict]: samples, outs = [], [] # speech de-normalization, de-emphasis xs = data['x'].permute(0, 2, 1) # B, T, C xs = dataset.norm_modules['x'].denormalize_(xs) xs = de_emphasis(gen.convert(xs[..., 0], astype=np.ndarray)) ys = data['y'] ys = dataset.norm_modules['y'].denormalize_(ys) ys = de_emphasis(gen.convert(ys, astype=np.ndarray)) outs = data['out'] outs = dataset.norm_modules['y'].denormalize_(outs) outs = de_emphasis(gen.convert(outs, astype=np.ndarray)) data['x'], data['y'], data['out'] = xs, ys, outs T_xs = data['T_xs'].int() T_ys = data['T_ys'].int() # Dict[List] -> List[Dict] for idx in range(len(data['x'])): sample = dict() T_x, T_y = T_xs[idx], T_ys[idx] sample['T_xs'], sample['T_ys'] = T_x, T_y for key, value in data.items(): value = value[idx] if key == 'x': value = value[..., :T_x] value = np.nan_to_num(value, posinf=1., neginf=-1.) value = np.asfortranarray(np.clip(value, -1, 1)) elif len(key) > 3: pass else: value = value[..., :T_y] value = np.nan_to_num(value, posinf=1., neginf=-1.) value = np.asfortranarray(np.clip(value, -1, 1)) sample[key] = value samples.append(sample) return samples
def test_pre_emphasis(self): """ Tests equality after de-emphasizing pre-emphasized signal. """ rand_signal_batch = np.random.randint(low=1, high=10, size=(10, 1, 400)) reconst_batch = emph.de_emphasis(emph.pre_emphasis(rand_signal_batch)) # after de-emphasis, the signal must have been restored self.assertEqual(rand_signal_batch.shape, reconst_batch.shape) self.assertTrue(np.allclose(rand_signal_batch, reconst_batch))
print( 'Epoch {}, Step {}, d_clean_loss {}, d_noisy_loss {}, g_loss {}, g_loss_cond {}' .format(epoch + 1, i + 1, clean_loss.data[0], noisy_loss.data[0], g_loss.data[0], g_cond_loss.data[0])) ### Functions below print various information about the network. Uncomment to use. # print('Weight for latent variable z : {}'.format(z)) # print('Generated Outputs : {}'.format(generated_outputs)) # print('Encoding 8th layer weight: {}'.format(generator.module.enc8.weight)) # save sampled audio at the beginning of each epoch if i == 0: fake_speech = generator(fixed_test_noise, z) fake_speech_data = fake_speech.data.cpu().numpy( ) # convert to numpy array fake_speech_data = emph.de_emphasis(fake_speech_data, emph_coeff=0.95) for idx in range(4): # select four samples generated_sample = fake_speech_data[idx] filepath = os.path.join( gen_data_path, '{}_e{}.wav'.format(test_noise_filenames[idx], epoch + 1)) wavfile.write(filepath, sample_rate, generated_sample.T) # save the model parameters for each epoch g_path = os.path.join(models_path, 'generator-{}.pkl'.format(epoch + 1)) d_path = os.path.join(models_path, 'discriminator-{}.pkl'.format(epoch + 1)) torch.save(generator.state_dict(), g_path) torch.save(discriminator.state_dict(), d_path) print('Finished Training!')