Esempio n. 1
0
def main():
    data_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((129, 129)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    dataset = AudioDataset(raw_data_root=DATA_PATH_HIGHINTENSITY,
                           saved_data_root=SAVED_DATA_PATH_HIGHINTENSITY,
                           noise_reps=5,
                           transforms=data_transform)

    dataset_length = dataset.__len__()
    train_dataset, test_dataset = random_split(
        dataset, [int(dataset_length * .7),
                  int(dataset_length * .3)])

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

    # Train the model
    trained_model = train(train_loader, test_loader)

    # Print out the final accuracy of the trained model
    accuracy = validate(test_loader, trained_model)
    print("Final Accuracy: ", accuracy)

    save(trained_model.state_dict(), MODEL_STORE_PATH + "model.pt")
Esempio n. 2
0
    net.classifier[-1] = nn.Linear(in_features=in_features,
                                   out_features=out_features,
                                   bias=True)

    # load best parameters

    net.load_state_dict(torch.load('../../models/mobilenet_best.pt'))

    # initialize datasets and dataloaders

    data_dir = '../../data'

    sample_rate = 22050

    test_dataset = AudioDataset(data_dir, sample_rate, 'test')

    test_dataloader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=8,
        shuffle=True,
        num_workers=0,
        pin_memory=(device == 'cuda'))

    metrics = test(net, test_dataloader, device)

    print('\nConfusion Matrix:\n{}\n'.format(metrics['CM']))
    print('Sensitivity/Recall: {:.3f}'.format(metrics['sens']))
    print('Specificity: {:.3f}'.format(metrics['spec']))
    print('Accuracy: {:.3f}'.format(metrics['acc']))
    print('Balanced Accuracy: {:.3f}'.format(metrics['bal_acc']))
Esempio n. 3
0
from matplotlib import pyplot as plt

###https://github.com/pytorch/tutorials/blob/master/beginner_source/blitz/cifar10_tutorial.py###
##download dataset and extract

#transform = transforms.Compose(
#   [transforms.ToTensor(),
#   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
csv_filepath = Path(
    'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach\\spkrinfo.csv'
)
train_test_splitter = TrainTestSplitter(csv_file=csv_filepath, test_ratio=0.2)

trainset = AudioDataset(
    train_test_splitter,
    csv_file=csv_filepath,
    root_dir=
    'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach',
    is_train=True)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=4,
                                          shuffle=True,
                                          num_workers=0)

testset = AudioDataset(
    train_test_splitter,
    csv_file=csv_filepath,
    root_dir=
    'C:\\Users\\ophir\\source\\repos\\voice-corruption-classifier\\data\\voice_data\\data\\clean_speach',
    is_train=False)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=4,
Esempio n. 4
0
    in_features = net.classifier[-1].in_features

    out_features = 1

    net.classifier[-1] = nn.Linear(in_features=in_features,
                                   out_features=out_features,
                                   bias=True)

    # initialize datasets and dataloaders

    data_dir = '../../data'

    sample_rate = 22050

    train_dataset = AudioDataset(data_dir, sample_rate, 'train')

    train_dataloader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=8,
        shuffle=True,
        num_workers=0,
        pin_memory=(device == 'cuda'))

    # initialize loss function

    num_coughs = sum(train_dataset.labels)

    pos_weight = torch.tensor(len(train_dataset) / num_coughs)

    loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
Esempio n. 5
0
from DataProcessor import *
import numpy as np
from Model import *
from AudioDataset import AudioDataset

EMBEDDING_SIZE = 50
FEATURE_DIM = 40

# data_processor = DataProcessor(r"segmentation", r"data\train-clean-500", "train_data")
# model = Speech2Vec(data_processor.AUDIO_MAX_SIZE, FEATURE_DIM, EMBEDDING_SIZE)
# model.load("memory/models/model9")
#
# generate_embedding(model, data_processor.w2a, data_processor.ids, "memory/features/", "embedding.txt")

data_processor = DataProcessor(r"light_segmentation", r"light_data",
                               "train_data")
log(f"mean = {data_processor.mean}")
log(f"std = {data_processor.std}")
dataset = AudioDataset(data_processor.ids,
                       shape=(data_processor.AUDIO_MAX_SIZE, FEATURE_DIM))
log(data_processor.AUDIO_MAX_SIZE)
log("Loading the model...")
model = Speech2Vec(data_processor.AUDIO_MAX_SIZE, data_processor.feature_dim,
                   EMBEDDING_SIZE).cuda()
log("Training the model...")
val_losses, train_losses = train(model, dataset, batch_size=512, nb_epochs=10)
log("Saving the losses")
np.save('val_loss', val_losses)
np.save('train_loss', train_losses)
log("Done !")
Esempio n. 6
0
        loss = checkpoint['loss']

        tasnet.eval()  # For inference and testing
    else:
        print("Error: Checkpoint is required for evaluation.")
        exit(6)

    training_dir = args.dst_dir
    with open(training_dir + "testing.log", "a") as testlog:
        testlog.write("Loaded Checkpoint: " + args.checkpoint_file + "\n")

    learning_started_date = datetime.now().strftime('%Y-%m-%d_%H:%M')

    # Load Test dataset
    test_data_path = BASE_DATA_PATH + "tt/"
    testset = AudioDataset(test_data_path)
    testloader = data_utils.DataLoader(testset,
                                       batch_size=MINIBATCH_SIZE,
                                       shuffle=False)

    # Start Testing
    sdr_sum = 0
    sir_sum = 0
    sarn_sum = 0
    stoi_sum = 0
    pesq_sum = 0
    perm_sum = 0

    global_audio_cnt = 0
    running_loss = 0.0
    current_testing_result = 0
Esempio n. 7
0
 
 in_features = net.classifier[-1].in_features
 
 out_features = 1
 
 net.classifier[-1] = nn.Linear(in_features = in_features,
                                out_features = out_features,
                                bias = True)
 
 # initialize datasets and dataloaders
 
 data_dir = '../../data'
 
 sample_rate = 22050
 
 val_dataset = AudioDataset(data_dir,sample_rate,'val')
 
 val_dataloader = torch.utils.data.DataLoader(
                    dataset = val_dataset,
                    batch_size = 8,
                    shuffle = True,
                    num_workers = 0,
                    pin_memory = (device == 'cuda'))
     
 # initialize loss function
 
 num_coughs = sum(val_dataset.labels)
 
 pos_weight = torch.tensor(len(val_dataset)/num_coughs)
 
 loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
Esempio n. 8
0
in_features = net.classifier[-1].in_features

out_features = 1

net.classifier[-1] = nn.Linear(in_features = in_features,
                               out_features = out_features,
                               bias = True)

# initialize datasets and dataloaders

data_dir = '../../data'

sample_rate = 22050

train_dataset = AudioDataset(data_dir,sample_rate,'train')

train_dataloader = torch.utils.data.DataLoader(
                   dataset = train_dataset,
                   batch_size = 8,
                   shuffle = True,
                   num_workers = 0,
                   pin_memory = (device == 'cuda'))

val_dataset = AudioDataset(data_dir,sample_rate,'val')

val_dataloader = torch.utils.data.DataLoader(
                 dataset = val_dataset,
                 batch_size = 8,
                 shuffle = True,
                 num_workers = 0,
Esempio n. 9
0
import torch
from AudioDataset import AudioDataset, TrainTestSplitter
from pathlib import Path
import torch.nn as nn
from nn_modules import Net

classes = ('M', 'F')

csv_filepath = Path('data/mf_test/spkrinfo.csv')
train_test_splitter = TrainTestSplitter(csv_file=csv_filepath, test_ratio=0.9)

testset = AudioDataset(train_test_splitter,
                       csv_file=csv_filepath,
                       root_dir='data/mf_test',
                       is_train=False)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=4,
                                         shuffle=False,
                                         num_workers=0)

# Path to the net's state dictionary
STATE_DICT_PATH = './speech_net.pth'

# test vs ground truth
dataiter = iter(testloader)
data = dataiter.next()
test_inputs = data['audio']
test_labels = data['label']

net = Net()
net.load_state_dict(torch.load(STATE_DICT_PATH))