def do_noise_test(model, device): """ Test on the noisy data. :param model: pytorch model to be tested :type model: torch.nn.Module :param device: :type device: torch.device """ for noise in [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]: noise_wavdata_to_tensor = [ LoadAudio(), FixAudioLength(), AddNoise(noise), ToMelSpectrogram(n_mels=32), ToTensor("mel_spectrogram", "input"), Unsqueeze("input") ] cachefile = "gsc_test_noise{}.npz".format("{:.2f}".format(noise)[2:]) test_dataset = dataset_from_wavfiles(EXTRACTPATH / "test", noise_wavdata_to_tensor, cachefilepath=DATAPATH / cachefile) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE) results = test(model=model, loader=test_loader, criterion=F.nll_loss, device=device) print("Noise level: {}, Results: {}".format(noise, results))
for xform in wavdata_to_tensor: d = xform(d) x[i] = d y = np.array(ids, dtype=np.int) print("Saving preprocessed data to {}".format(cachefilepath)) np.savez(cachefilepath, x, y) if __name__ == "__main__": random.seed(SEED) np.random.seed(SEED) test_wavdata_to_tensor = [ LoadAudio(), FixAudioLength(), ToMelSpectrogram(n_mels=32), ToTensor("mel_spectrogram", "input"), Unsqueeze("input"), ] train_wavdata_to_tensor = [ LoadAudio(), ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(), TimeshiftAudioOnSTFT(), FixSTFTDimension(), ToMelSpectrogramFromSTFT(n_mels=32),
def do_training(model, device): """ Train the model. :param model: pytorch model to be trained :type model: torch.nn.Module :param device: :type device: torch.device """ test_wavdata_to_tensor = [ LoadAudio(), FixAudioLength(), ToMelSpectrogram(n_mels=32), ToTensor("mel_spectrogram", "input"), Unsqueeze("input"), ] valid_dataset = dataset_from_wavfiles( EXTRACTPATH / "valid", test_wavdata_to_tensor, cachefilepath=DATAPATH / "gsc_valid.npz", ) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=VALID_BATCH_SIZE) train_wavdata_to_tensor = [ LoadAudio(), ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(), TimeshiftAudioOnSTFT(), FixSTFTDimension(), ToMelSpectrogramFromSTFT(n_mels=32), DeleteSTFT(), ToTensor("mel_spectrogram", "input"), Unsqueeze("input"), ] sgd = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) lr_scheduler = optim.lr_scheduler.StepLR(sgd, step_size=1, gamma=LEARNING_RATE_GAMMA) for epoch in range(EPOCHS): train_dataset = dataset_from_wavfiles( EXTRACTPATH / "train", train_wavdata_to_tensor, cachefilepath=DATAPATH / "gsc_train{}.npz".format(epoch), silence_percentage=0.1, ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=(FIRST_EPOCH_BATCH_SIZE if epoch == 0 else TRAIN_BATCH_SIZE), shuffle=True, ) train(model=model, loader=train_loader, optimizer=sgd, criterion=F.nll_loss, device=device) lr_scheduler.step() model.apply(rezero_weights) model.apply(update_boost_strength) results = test(model=model, loader=valid_loader, criterion=F.nll_loss, device=device) print("Epoch {}: {}".format(epoch, results))