def test_batch_mulaw(self): waveform, sample_rate = torchaudio.load(self.test_filepath) # (2, 278756), 44100 # Single then transform then batch waveform_encoded = transforms.MuLawEncoding()(waveform) expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1) # Batch then transform waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1) computed = transforms.MuLawEncoding()(waveform_batched) # shape = (3, 2, 201, 1394) self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape)) self.assertTrue(torch.allclose(computed, expected)) # Single then transform then batch waveform_decoded = transforms.MuLawDecoding()(waveform_encoded) expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1) # Batch then transform computed = transforms.MuLawDecoding()(computed) # shape = (3, 2, 201, 1394) self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape)) self.assertTrue(torch.allclose(computed, expected))
def test_mu_law_companding(self): sig = self.sig.clone() quantization_channels = 256 sig = self.sig.numpy() sig = sig / np.abs(sig).max() self.assertTrue(sig.min() >= -1. and sig.max() <= 1.) sig_mu = transforms.MuLawEncoding(quantization_channels)(sig) self.assertTrue(sig_mu.min() >= 0. and sig.max() <= quantization_channels) sig_exp = transforms.MuLawExpanding(quantization_channels)(sig_mu) self.assertTrue(sig_exp.min() >= -1. and sig_exp.max() <= 1.) sig = self.sig.clone() sig = sig / torch.abs(sig).max() self.assertTrue(sig.min() >= -1. and sig.max() <= 1.) sig_mu = transforms.MuLawEncoding(quantization_channels)(sig) self.assertTrue(sig_mu.min() >= 0. and sig.max() <= quantization_channels) sig_exp = transforms.MuLawExpanding(quantization_channels)(sig_mu) self.assertTrue(sig_exp.min() >= -1. and sig_exp.max() <= 1.) repr_test = transforms.MuLawEncoding(quantization_channels) repr_test.__repr__() repr_test = transforms.MuLawExpanding(quantization_channels) repr_test.__repr__()
def test_mu_law_companding(self): sig = self.sig.clone() quantization_channels = 256 sig = self.sig.numpy() sig = sig / np.abs(sig).max() self.assertTrue(sig.min() >= -1. and sig.max() <= 1.) sig_mu = transforms.MuLawEncoding(quantization_channels)(sig) self.assertTrue(sig_mu.min() >= 0. and sig.max() <= quantization_channels) sig_exp = transforms.MuLawExpanding(quantization_channels)(sig_mu) self.assertTrue(sig_exp.min() >= -1. and sig_exp.max() <= 1.) #diff = sig - sig_exp #mse = np.linalg.norm(diff) / diff.shape[0] #self.assertTrue(mse, np.isclose(mse, 0., atol=1e-4)) # not always true sig = self.sig.clone() sig = sig / torch.abs(sig).max() self.assertTrue(sig.min() >= -1. and sig.max() <= 1.) sig_mu = transforms.MuLawEncoding(quantization_channels)(sig) self.assertTrue(sig_mu.min() >= 0. and sig.max() <= quantization_channels) sig_exp = transforms.MuLawExpanding(quantization_channels)(sig_mu) self.assertTrue(sig_exp.min() >= -1. and sig_exp.max() <= 1.)
def test_mu_law_companding(self): quantization_channels = 256 waveform = self.waveform.clone() waveform /= torch.abs(waveform).max() self.assertTrue(waveform.min() >= -1. and waveform.max() <= 1.) waveform_mu = transforms.MuLawEncoding(quantization_channels)(waveform) self.assertTrue(waveform_mu.min() >= 0. and waveform_mu.max() <= quantization_channels) waveform_exp = transforms.MuLawDecoding(quantization_channels)(waveform_mu) self.assertTrue(waveform_exp.min() >= -1. and waveform_exp.max() <= 1.)
def test_mu_law_companding(self): quantization_channels = 256 waveform = self.waveform.clone() if not waveform.is_floating_point(): waveform = waveform.to(torch.get_default_dtype()) waveform /= torch.abs(waveform).max() self.assertTrue(waveform.min() >= -1. and waveform.max() <= 1.) waveform_mu = transforms.MuLawEncoding(quantization_channels)(waveform) self.assertTrue(waveform_mu.min() >= 0. and waveform_mu.max() <= quantization_channels) waveform_exp = transforms.MuLawDecoding(quantization_channels)(waveform_mu) self.assertTrue(waveform_exp.min() >= -1. and waveform_exp.max() <= 1.)
def test_MuLawEncoding(self): tensor = common_utils.get_whitenoise() self._assert_consistency(T.MuLawEncoding(), tensor)
def test_MuLawEncoding(self): tensor = torch.rand((1, 10)) self._assert_consistency(T.MuLawEncoding(), tensor)
inputs = inputs[:x[-1].astype(int)] targets = targets[:x[-1].astype(int)] inputs = inputs[:len(inputs) // seq_M * seq_M] targets = targets[:len(targets) // seq_M * seq_M] h = f(np.arange(1, len(inputs) + 1)) train_wav.append(inputs) train_features.append(h) train_targets.append(targets) train_wav = np.concatenate(train_wav) train_features = np.vstack(train_features) train_targets = np.concatenate(train_targets) enc = transforms.MuLawEncoding(channels) dec = transforms.MuLawExpanding(channels) train_wav = enc(train_wav) train_targets = enc(train_targets) scaler = StandardScaler() train_features = scaler.fit_transform(train_features) train_wav = train_wav.reshape(-1, seq_M) train_features = np.rollaxis( train_features.reshape(-1, seq_M, features_size), 2, 1) train_targets = train_targets.reshape(-1, seq_M) train_wav = torch.from_numpy(train_wav).long() train_features = torch.from_numpy(train_features).float()
depth = args.depth radixs = [2] * depth N = np.prod(radixs) channels = args.channels lr = args.lr steps = args.steps c = args.c generation_time = args.file_size filename = args.outfile maxlen = 50000 print('==> Downloading YesNo Dataset..') transform = transforms.Compose( [transforms.Scale(), transforms.PadTrim(maxlen), transforms.MuLawEncoding(quantization_channels=channels)]) data = torchaudio.datasets.YESNO('./data', download=True, transform=transform) data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True) print('==> Building model..') net = general_FFTNet(radixs, 128, channels).cuda() print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.") optimizer = optim.Adam(net.parameters(), lr=lr) criterion = torch.nn.CrossEntropyLoss() print("Start Training.") a = datetime.now().replace(microsecond=0) step = 0