Ejemplo n.º 1
0
def loadDataForServer():

    training_dataset = NSynth(
        "/local/sandbox/nsynth/nsynth-train",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])

    validation_dataset = NSynth(
        "/local/sandbox/nsynth/nsynth-valid",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist synth_lead instrument
        categorical_field_list=["instrument_family", "instrument_source"])

    testing_dataset = NSynth(
        "/local/sandbox/nsynth/nsynth-test",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])

    # create dataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                               batch_size=BATCH_SIZE)

    validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset,
                                                    batch_size=1)

    test_loader = torch.utils.data.DataLoader(dataset=testing_dataset,
                                              batch_size=1)

    print('Finished preparing data loaders for server testing')

    return train_loader, validation_loader, test_loader
Ejemplo n.º 2
0
def main():
    # Subsampling
    subsample_transform = transforms.Lambda(lambda x: x[::4])
    toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)

    trainData = NSynth(
        "/local/sandbox/nsynth/nsynth-train",
        transform=transforms.Compose([subsample_transform, toFloat]),
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])

    validation_dataset = NSynth(
        "/local/sandbox/nsynth/nsynth-valid",
        transform=transforms.Compose([subsample_transform, toFloat]),
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])

    test_dataset = NSynth(
        "/local/sandbox/nsynth/nsynth-test",
        transform=transforms.Compose([subsample_transform, toFloat]),
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])
    print(len(trainData))
    train_loader = data.DataLoader(trainData, batch_size=64, shuffle=True)

    valid_loader = data.DataLoader(validation_dataset,
                                   batch_size=64,
                                   shuffle=True)

    test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=True)

    train(train_loader, valid_loader, len(trainData), len(validation_dataset))
    test(test_loader)
Ejemplo n.º 3
0
def read_dataset(datasetLocation, INPUT_SIZE):
    """
    Method to read the data sets from the given dataset location.

    :param datasetLocation: Base location where training, test and validation datasets are located
    :return: training loader, test loader and validation loader data sets
    """

    # Pre-processing transform to get the dataset in the range of [-1, 1] and reshape the input to multiple bins
    maxIntValue = np.iinfo(np.int16).max
    # toFloat = transforms.Lambda(lambda x: ((x / maxIntValue) + 1) / 2)
    toFloat = transforms.Compose([
        transforms.Lambda(lambda x: x / maxIntValue),
        transforms.Lambda(lambda x: x.reshape(-1, INPUT_SIZE))
    ])

    # Reading the train dataset
    trainFolderName = "nsynth-train"
    train_dataset = NSynth(
        datasetLocation + trainFolderName,
        transform=toFloat,
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])
    train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Read the test dataset
    testFolderName = "nsynth-test"
    test_dataset = NSynth(
        datasetLocation + testFolderName,
        transform=toFloat,
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])
    test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=True)

    # Read the validation dataset
    validationFolderName = "nsynth-valid"
    validation_dataset = NSynth(
        datasetLocation + validationFolderName,
        transform=toFloat,
        blacklist_pattern=["synth_lead"],
        categorical_field_list=["instrument_family", "instrument_source"])
    validation_loader = data.DataLoader(validation_dataset,
                                        batch_size=32,
                                        shuffle=True)

    # return the dataset loader
    return train_loader, test_loader, validation_loader
Ejemplo n.º 4
0
def testProcess(toFloat):
    testSet = NSynth(
        "/local/sandbox/nsynth/nsynth-test",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family_str"])

    return testSet
Ejemplo n.º 5
0
def validProcess(toFloat):
    validSet = NSynth(
        "/local/sandbox/nsynth/nsynth-valid",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family_str"])

    input_dimension = 0

    return validSet, input_dimension
Ejemplo n.º 6
0
def dataLoaders(batch_size):
    '''
    This function loads the data for training and testing of the model
    :param batch_size: the size of mini batches
    :return: dataloader objects of train, validation and testing data
    '''
    # audio samples are loaded as an int16 numpy array
    # rescale intensity range as float [-1, 1]
    print("--- Loading data ---")
    toFloat = transforms.Lambda(lambda x: (x / np.iinfo(
        np.int16).max) + 1)  # Added +1 for solving negative number problem
    # normalizeValue = transforms.Normalize(torch.mean(x),torch.std(x))
    # use instrument_family and instrument_source as classification targets
    dataset_Train = NSynth(
        "/local/sandbox/nsynth/nsynth-train",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])

    loader_Train = data.DataLoader(dataset_Train,
                                   batch_size=batch_size,
                                   shuffle=True)

    dataset_Valid = NSynth(
        "/local/sandbox/nsynth/nsynth-valid",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])
    loader_Valid = data.DataLoader(dataset_Valid,
                                   batch_size=batch_size,
                                   shuffle=True)

    dataset_Test = NSynth(
        "/local/sandbox/nsynth/nsynth-test",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])
    loader_Test = data.DataLoader(dataset_Test,
                                  batch_size=batch_size,
                                  shuffle=False)
    return loader_Train, loader_Valid, loader_Test
Ejemplo n.º 7
0
def loadDataForLocal(want_to_test):

    training_dataset = NSynth(
        "./nsynth-test",
        transform=toFloat,
        blacklist_pattern=["synth_lead"],  # blacklist string instrument
        categorical_field_list=["instrument_family", "instrument_source"])

    # Splitting training dataset into training and validation and testing
    num_train = len(training_dataset)
    indices = list(range(num_train))
    splitVal = int(np.floor(VALIDATION_SPLIT * num_train))
    splitTest = int(np.floor(TESTING_SPLIT * num_train)) + splitVal

    # Make sure you get same numbers every time when rand_seed = 0
    np.random.seed(seed=RAND_SEED)

    # Shuffle the indices
    np.random.shuffle(indices)

    # Get training set index and validation set index
    validation_idx, test_idx, train_idx = indices[splitVal:], \
                                          indices[splitVal:splitTest], \
                                          indices[splitTest:]

    # create samplers
    train_sampler = data_utils.SubsetRandomSampler(train_idx)
    test_sampler = data_utils.SubsetRandomSampler(test_idx)
    validation_sampler = data_utils.SubsetRandomSampler(validation_idx)

    # create dataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                               batch_size=BATCH_SIZE,
                                               sampler=train_sampler)

    validation_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                                    batch_size=1,
                                                    sampler=validation_sampler)

    test_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                              batch_size=1,
                                              sampler=test_sampler)

    if want_to_test == '1':
        test_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                                  batch_size=1)

    print('Finished preparing data loaders for local testing')

    return train_loader, validation_loader, test_loader
Ejemplo n.º 8
0
def loader_function(path):
    #audio files are loaded as an int16 numpy array
    #rescaling intensity of float[-1,1]
    selectColmns = transforms.Lambda(lambda x: x[0:16000])
    toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max + 1)
    dataset = NSynth(
        path,
        transform=transforms.Compose([selectColmns, toFloat]),
        blacklist_pattern=["synth_lead"],  #blaclkist synth_lead instrument
        categorical_field_list=["instrument_family", "instrument_source"])
    question2(dataset[0][0], "1-D_audio_waveform1.png", "1-D audio waveform1")
    return dataset, torch_data.DataLoader(dataset,
                                          batch_size=64,
                                          shuffle=True,
                                          num_workers=16)
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import sklearn.metrics as sk
import wave
import sys

import pandas as pd

transform = transforms.Compose([
    transforms.Lambda(lambda x: x / np.iinfo(np.int16).max),
    transforms.Lambda(lambda x: torch.from_numpy(x).float()),
    transforms.Lambda(lambda x: x[0:16000])
])
train_dataset = NSynth(
    "/local/sandbox/nsynth/nsynth-train",
    transform=transform,
    blacklist_pattern=["synth_lead"],  # blacklist string istrument
    categorical_field_list=["instrument_family", "instrument_source"])
print(type(train_dataset))

train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = NSynth(
    "/local/sandbox/nsynth/nsynth-test",
    transform=transform,
    blacklist_pattern=["synth_lead"],  # blacklist string instrument
    categorical_field_list=["instrument_family", "instrument_source"])
test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=True)
loss_validation = []
loss_train = []
valid_dataset = NSynth(