def main(): data_transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((129, 129)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) dataset = AudioDataset(raw_data_root=DATA_PATH_HIGHINTENSITY, saved_data_root=SAVED_DATA_PATH_HIGHINTENSITY, noise_reps=5, transforms=data_transform) dataset_length = dataset.__len__() train_dataset, test_dataset = random_split( dataset, [int(dataset_length * .7), int(dataset_length * .3)]) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False) # Train the model trained_model = train(train_loader, test_loader) # Print out the final accuracy of the trained model accuracy = validate(test_loader, trained_model) print("Final Accuracy: ", accuracy) save(trained_model.state_dict(), MODEL_STORE_PATH + "model.pt")
net.classifier[-1] = nn.Linear(in_features=in_features, out_features=out_features, bias=True) # load best parameters net.load_state_dict(torch.load('../../models/mobilenet_best.pt')) # initialize datasets and dataloaders data_dir = '../../data' sample_rate = 22050 test_dataset = AudioDataset(data_dir, sample_rate, 'test') test_dataloader = torch.utils.data.DataLoader( dataset=test_dataset, batch_size=8, shuffle=True, num_workers=0, pin_memory=(device == 'cuda')) metrics = test(net, test_dataloader, device) print('\nConfusion Matrix:\n{}\n'.format(metrics['CM'])) print('Sensitivity/Recall: {:.3f}'.format(metrics['sens'])) print('Specificity: {:.3f}'.format(metrics['spec'])) print('Accuracy: {:.3f}'.format(metrics['acc'])) print('Balanced Accuracy: {:.3f}'.format(metrics['bal_acc']))
from matplotlib import pyplot as plt ###https://github.com/pytorch/tutorials/blob/master/beginner_source/blitz/cifar10_tutorial.py### ##download dataset and extract #transform = transforms.Compose( # [transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) csv_filepath = Path( 'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach\\spkrinfo.csv' ) train_test_splitter = TrainTestSplitter(csv_file=csv_filepath, test_ratio=0.2) trainset = AudioDataset( train_test_splitter, csv_file=csv_filepath, root_dir= 'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach', is_train=True) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0) testset = AudioDataset( train_test_splitter, csv_file=csv_filepath, root_dir= 'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach', is_train=False) testloader = torch.utils.data.DataLoader(testset, batch_size=4,
in_features = net.classifier[-1].in_features out_features = 1 net.classifier[-1] = nn.Linear(in_features=in_features, out_features=out_features, bias=True) # initialize datasets and dataloaders data_dir = '../../data' sample_rate = 22050 train_dataset = AudioDataset(data_dir, sample_rate, 'train') train_dataloader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=8, shuffle=True, num_workers=0, pin_memory=(device == 'cuda')) # initialize loss function num_coughs = sum(train_dataset.labels) pos_weight = torch.tensor(len(train_dataset) / num_coughs) loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
from DataProcessor import * import numpy as np from Model import * from AudioDataset import AudioDataset EMBEDDING_SIZE = 50 FEATURE_DIM = 40 # data_processor = DataProcessor(r"segmentation", r"data\train-clean-500", "train_data") # model = Speech2Vec(data_processor.AUDIO_MAX_SIZE, FEATURE_DIM, EMBEDDING_SIZE) # model.load("memory/models/model9") # # generate_embedding(model, data_processor.w2a, data_processor.ids, "memory/features/", "embedding.txt") data_processor = DataProcessor(r"light_segmentation", r"light_data", "train_data") log(f"mean = {data_processor.mean}") log(f"std = {data_processor.std}") dataset = AudioDataset(data_processor.ids, shape=(data_processor.AUDIO_MAX_SIZE, FEATURE_DIM)) log(data_processor.AUDIO_MAX_SIZE) log("Loading the model...") model = Speech2Vec(data_processor.AUDIO_MAX_SIZE, data_processor.feature_dim, EMBEDDING_SIZE).cuda() log("Training the model...") val_losses, train_losses = train(model, dataset, batch_size=512, nb_epochs=10) log("Saving the losses") np.save('val_loss', val_losses) np.save('train_loss', train_losses) log("Done !")
loss = checkpoint['loss'] tasnet.eval() # For inference and testing else: print("Error: Checkpoint is required for evaluation.") exit(6) training_dir = args.dst_dir with open(training_dir + "testing.log", "a") as testlog: testlog.write("Loaded Checkpoint: " + args.checkpoint_file + "\n") learning_started_date = datetime.now().strftime('%Y-%m-%d_%H:%M') # Load Test dataset test_data_path = BASE_DATA_PATH + "tt/" testset = AudioDataset(test_data_path) testloader = data_utils.DataLoader(testset, batch_size=MINIBATCH_SIZE, shuffle=False) # Start Testing sdr_sum = 0 sir_sum = 0 sarn_sum = 0 stoi_sum = 0 pesq_sum = 0 perm_sum = 0 global_audio_cnt = 0 running_loss = 0.0 current_testing_result = 0
in_features = net.classifier[-1].in_features out_features = 1 net.classifier[-1] = nn.Linear(in_features = in_features, out_features = out_features, bias = True) # initialize datasets and dataloaders data_dir = '../../data' sample_rate = 22050 val_dataset = AudioDataset(data_dir,sample_rate,'val') val_dataloader = torch.utils.data.DataLoader( dataset = val_dataset, batch_size = 8, shuffle = True, num_workers = 0, pin_memory = (device == 'cuda')) # initialize loss function num_coughs = sum(val_dataset.labels) pos_weight = torch.tensor(len(val_dataset)/num_coughs) loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
in_features = net.classifier[-1].in_features out_features = 1 net.classifier[-1] = nn.Linear(in_features = in_features, out_features = out_features, bias = True) # initialize datasets and dataloaders data_dir = '../../data' sample_rate = 22050 train_dataset = AudioDataset(data_dir,sample_rate,'train') train_dataloader = torch.utils.data.DataLoader( dataset = train_dataset, batch_size = 8, shuffle = True, num_workers = 0, pin_memory = (device == 'cuda')) val_dataset = AudioDataset(data_dir,sample_rate,'val') val_dataloader = torch.utils.data.DataLoader( dataset = val_dataset, batch_size = 8, shuffle = True, num_workers = 0,
import torch from AudioDataset import AudioDataset, TrainTestSplitter from pathlib import Path import torch.nn as nn from nn_modules import Net classes = ('M', 'F') csv_filepath = Path('data/mf_test/spkrinfo.csv') train_test_splitter = TrainTestSplitter(csv_file=csv_filepath, test_ratio=0.9) testset = AudioDataset(train_test_splitter, csv_file=csv_filepath, root_dir='data/mf_test', is_train=False) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0) # Path to the net's state dictionary STATE_DICT_PATH = './speech_net.pth' # test vs ground truth dataiter = iter(testloader) data = dataiter.next() test_inputs = data['audio'] test_labels = data['label'] net = Net() net.load_state_dict(torch.load(STATE_DICT_PATH))