Exemple #1
0
def get_audiosets(cfg):
    """
  get audioset
  """

    # channel size
    channel_size = 1 if not cfg['feature_params']['use_channels'] else int(
        cfg['feature_params']['use_cepstral_features']) + int(
            cfg['feature_params']['use_delta_features']) + int(
                cfg['feature_params']['use_double_delta_features'])

    # feature size
    feature_size = (cfg['feature_params']['n_ceps_coeff'] + int(
        cfg['feature_params']['use_energy_features'])) * int(
            cfg['feature_params']['use_cepstral_features']) + (
                cfg['feature_params']['n_ceps_coeff'] +
                int(cfg['feature_params']['use_energy_features'])
            ) * int(cfg['feature_params']['use_delta_features']) + (
                cfg['feature_params']['n_ceps_coeff'] +
                int(cfg['feature_params']['use_energy_features'])) * int(
                    cfg['feature_params']['use_double_delta_features']
                ) if not cfg['feature_params']['use_channels'] else (
                    cfg['feature_params']['n_ceps_coeff'] +
                    int(cfg['feature_params']['use_energy_features']))

    # exception
    if feature_size == 0 or channel_size == 0: return None, None, None

    # audio sets
    audio_set1 = AudioDataset(cfg['datasets']['speech_commands'],
                              cfg['feature_params'])
    audio_set2 = AudioDataset(cfg['datasets']['my_recordings'],
                              cfg['feature_params'])

    # create dataset if not existing
    if not check_files_existance(audio_set1.feature_files):
        audio_set1 = SpeechCommandsDataset(
            cfg['datasets']['speech_commands'],
            feature_params=cfg['feature_params'],
            verbose=False)
        audio_set1.extract_features()

    # create dataset if not existing
    if not check_files_existance(audio_set2.feature_files):
        audio_set2 = MyRecordingsDataset(cfg['datasets']['my_recordings'],
                                         feature_params=cfg['feature_params'],
                                         verbose=False)
        audio_set2.extract_features()

    # select feature files
    all_feature_files = audio_set1.feature_files + audio_set2.feature_files if len(
        audio_set1.labels) == len(
            audio_set2.labels) else audio_set1.feature_files

    return audio_set1, audio_set2, all_feature_files
    def test_scalogram_preprocessing(self):
        cqt_default_dict = {
            'sample_rate': 44100,
            'fmin': 30,
            'n_bins': 292,
            'bins_per_octave': 32,
            'filter_scale': 0.5,
            'hop_length': 256,
            'trainable_cqt': False
        }

        dataset = '/Volumes/Elements/Datasets/MelodicProgressiveHouse_mp3'
        dataset = AudioDataset(location=dataset, item_length=44100 * 5)

        #audio_clip = torch.rand([1, 1, 44100*5]) * 1. - 0.5
        audio_clip = dataset[20000][0, :].view(1, 1, -1)
        prep_module = PreprocessingModule(cqt_default_dict,
                                          phase=False,
                                          offset_zero=True,
                                          output_power=2,
                                          pooling=[1, 2])

        x = prep_module(audio_clip)
        x_min = x.min()
        x_max = x.max()
        plt.imshow(x[0, 0], origin='lower')
        plt.show()
        pass
def get_dataloaders(feat_folder, class_labels, folds, batch_size):

    data_set = AudioDataset(feat_folder, class_labels, folds)

    train_dl = DataLoader(
        data_set,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )

    return train_dl
Exemple #4
0
def audio_set_wavs(cfg):
    """
  audio set wavs
  """

    # plot path
    plot_path = '../docu/thesis/5_exp/figs/'

    # audio sets
    a1 = AudioDataset(cfg['datasets']['speech_commands'],
                      cfg['feature_params'],
                      root_path='../')
    a2 = AudioDataset(cfg['datasets']['my_recordings'],
                      cfg['feature_params'],
                      root_path='../')

    # feature extractor
    feature_extractor = FeatureExtractor(cfg['feature_params'])

    # get audio files
    a1.get_audiofiles()

    # random seed
    np.random.seed(1234)
    r = np.random.randint(low=0, high=150, size=len(a1.set_audio_files[1]))

    wav_grid = []

    # process wavs
    for wav in sorted([
            label_wavs[r[i]]
            for i, label_wavs in enumerate(a1.set_audio_files[1])
    ]):

        # info
        print("wav: ", wav)

        # get raw
        x, _ = a1.wav_pre_processing(wav)

        # extract feature vectors [m x l]
        _, bon_pos = feature_extractor.extract_mfcc(x,
                                                    reduce_to_best_onset=False)

        # append to wav grid
        wav_grid.append((librosa.util.normalize(x),
                         re.sub(r'[0-9]+-', '',
                                wav.split('/')[-1].split('.')[0]), bon_pos))

    # plot wav grid
    plot_wav_grid(wav_grid,
                  feature_params=a1.feature_params,
                  grid_size=(6, 5),
                  plot_path=plot_path,
                  name='wav_grid_c30',
                  show_plot=True)
# Result save path
asset_path = config.path['asset_path']
ckpt_path = config.path['ckpt_path']
result_path = config.path['result_path']
restore_epoch = args.restore_epoch
experiment_num = str(args.index)
ckpt_file_name = 'idx_'+experiment_num+'_%03d.pth.tar'
tf_logger = TF_Logger(os.path.join(asset_path, 'tensorboard', 'idx_'+experiment_num))
logger.info("==== Experiment Number : %d " % args.index)

if args.pre_model == 'cnn':
    config.experiment['batch_size'] = 20

# Data loader
train_dataset1 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset1,), num_workers=20, preprocessing=False, train=True, kfold=args.kfold)
train_dataset2 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset2,), num_workers=20, preprocessing=False, train=True, kfold=args.kfold)
train_dataset3 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset3,), num_workers=20, preprocessing=False, train=True, kfold=args.kfold)
train_dataset = train_dataset1.__add__(train_dataset2).__add__(train_dataset3)
valid_dataset1 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset1,), preprocessing=False, train=False, kfold=args.kfold)
valid_dataset2 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset2,), preprocessing=False, train=False, kfold=args.kfold)
valid_dataset3 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset3,), preprocessing=False, train=False, kfold=args.kfold)
valid_dataset = valid_dataset1.__add__(valid_dataset2).__add__(valid_dataset3)
train_dataloader = AudioDataLoader(dataset=train_dataset, batch_size=config.experiment['batch_size'], drop_last=False, shuffle=True)
valid_dataloader = AudioDataLoader(dataset=valid_dataset, batch_size=config.experiment['batch_size'], drop_last=False)

# Model and Optimizer
if args.pre_model == 'cnn':
    pre_model = CNN(config=config.model).to(device)
elif args.pre_model == 'crnn':
    pre_model = CRNN(config=config.model).to(device)
Exemple #6
0
    import yaml

    from batch_archive import SpeechCommandsBatchArchive
    from audio_dataset import AudioDataset
    from plots import plot_grid_images, plot_other_grid

    # yaml config file
    cfg = yaml.safe_load(open("./config.yaml"))

    # change config upon nn arch
    cfg['feature_params']['use_mfcc_features'] = False if cfg['ml'][
        'nn_arch'] == 'wavenet' else True

    # audio sets
    audio_set1 = AudioDataset(cfg['datasets']['speech_commands'],
                              cfg['feature_params'])
    audio_set2 = AudioDataset(cfg['datasets']['my_recordings'],
                              cfg['feature_params'])

    # create batches
    batch_archive = SpeechCommandsBatchArchive(audio_set1.feature_files +
                                               audio_set2.feature_files,
                                               batch_size=32,
                                               batch_size_eval=5)

    # reduce to label and add noise
    #batch_archive.reduce_to_label('up')
    #batch_archive.add_noise_data(shuffle=True)

    print("data size: ", batch_archive.data_size)
    print("classes: ", batch_archive.class_dict)
Exemple #7
0
transformation = utils.JointCompose([
    utils.JointHorizontalFlip(),
    utils.JointVerticalFlip(),
    #utils.JointNormailze(means = [0.485,0.456,0.406],stds = [1,1,1]), #TODO consider use
    utils.JointToTensor(),
])
val_transformation = utils.JointCompose([
    #utils.JointNormailze(means = [0.485,0.456,0.406],stds = [1,1,1]),
    utils.JointToTensor(),
])

VAL_PART = args.val_part

trainset = AudioDataset(data_h5_path='preprocess_audio/data.h5',
                        add_rpm=False,
                        train=True)
train_loader = torch.utils.data.DataLoader(trainset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=args.num_workers)

# trainset = AudioGenDataset("/home/simon/denoise/dataset/mini_dataset/", dataset_size=30, add_rpm=False)
# train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)

statistic_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=4,
                                               shuffle=True,
                                               num_workers=args.num_workers)

# valset = AudioDataset(data_dir=args.datapath, train=False, validation_part=VAL_PART, validation=True)
checkpoint_file = "/home/tomhirshberg/project/Denoising-drone-rotors/output//2019-05-24_15-27-48/checkpoint.pth.tar"  # checkpoint location
if os.path.isfile(checkpoint_file):
    print("loading checkpoint {}".format(checkpoint_file))
    checkpoint = torch.load(checkpoint_file, map_location=device)
    load_model(model, checkpoint)
else:
    print("can't load checkpoint file")
    exit()
# Load dataset

datapath = '/home/tomhirshberg/project/Denoising-drone-rotors/preprocess_audio/data.h5'
# testset = MSRDemosaic(root=datapath, train=False, transform=val_transformation)
# test_loader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=num_workers)

# use train set to check overfitting
testset = AudioDataset(data_h5_path=datapath, add_rpm=False, train=False)
test_loader = torch.utils.data.DataLoader(testset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=num_workers)

# testset = AudioDataset(data_h5_path=datapath, train=False)
# test_loader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=num_workers)

import IPython.display as ipd
from preprocess_audio.preprocess_audio import *
# convert back to audio
from preprocess_audio.postprocess_audio import *

# ipd.Audio('/home/simon/denoise/dataset/audio/file_example_WAV_1MG.wav')
# ipd.Audio(x, rate=sr) # load a NumPy array
Exemple #9
0
    parser.add_argument('--save_prefix',
                        help='Full path and prefix for saving output models')
    parser.add_argument('--use_autoencoder', action='store_true')
    args = parser.parse_args()

    if args.epochs is None:
        args.epochs = 5

    arch = [(i, j) for i, j in zip(args.arch[:-1], args.arch[1:])]

    with open(args.fold_config) as f:
        config = cPickle.load(f)

    preproc_layer = PreprocLayer(config=config, proc_type='standardize')

    dataset = TransformerDataset(raw=AudioDataset(which_set='train',
                                                  config=config),
                                 transformer=preproc_layer.layer_content)

    # transformer_yaml = '''!obj:pylearn2.datasets.transformer_dataset.TransformerDataset {
    #     raw : %(raw)s,
    #     transformer : %(transformer)s
    # }'''
    #
    # dataset_yaml = transformer_yaml % {
    #     'raw' : '''!obj:audio_dataset.AudioDataset {
    #         which_set : 'train',
    #         config : !pkl: "%(fold_config)s"
    #     }''' % {'fold_config' : args.fold_config},
    #     'transformer' : '''!obj:pylearn2.models.mlp.MLP {
    #         nvis : %(nvis)i,
    #         layers :
Exemple #10
0
        return self.weights

    def get_param_values(self):
        return list((self.get_weights(), self.get_biases()))

if __name__=='__main__':

    # tests
    import theano
    import cPickle
    from audio_dataset import AudioDataset

    with open('GTZAN_stratified.pkl') as f: 
        config = cPickle.load(f)
    
    D = AudioDataset(config)
    
    feat_space   = VectorSpace(dim=D.X.shape[1])
    feat_space_complex = VectorSpace(dim=D.X.shape[1], dtype='complex64')
    target_space = VectorSpace(dim=len(D.label_list))
    
    data_specs_frame = (CompositeSpace((feat_space,target_space)), ("features", "targets"))
    data_specs_song = (CompositeSpace((feat_space_complex, target_space)), ("songlevel-features", "targets"))

    framelevel_it = D.iterator(mode='sequential', batch_size=10, data_specs=data_specs_frame)
    frame_batch = framelevel_it.next()

    songlevel_it = D.iterator(mode='sequential', batch_size=1, data_specs=data_specs_song)    
    song_batch = songlevel_it.next()

Exemple #11
0
def main():
    config_filename = Path.cwd().joinpath(CONFIGS_DIR).joinpath(
        CONFIG_FILENAME)
    config = Configuration(config_filename)

    batch_size = 4
    epochs = 1

    results_dir_path = Path.cwd().joinpath(RESULTS_DIR)
    current_run_path = create_results_directories(results_dir_path)

    transforms = TransformsComposer([Rescale(output_size=10000), ToTensor()])

    encoder = LabelEncoder()

    data_loader = DataLoader(config)
    x_train, y_train = data_loader.get_train_set()
    encoder.fit(y_train)

    classes = encoder.classes_
    classes_map = {}
    for i, category in enumerate(classes):
        classes_map[i] = category
    print(classes_map)

    y_train = encoder.transform(y_train)
    train_dataset = AudioDataset(x_train, y_train, transforms)

    x_test, y_test = data_loader.get_test_set()
    y_test = encoder.transform(y_test)
    test_dataset = AudioDataset(x_test, y_test, transforms)

    model = M5(num_classes=len(classes_map))

    states_dir = Path.cwd().joinpath(STATES_DIR)
    state_filename = f'{uuid.uuid1()}_state_{epochs}_epochs.pth'
    state_path = current_run_path.joinpath('best_snapshot').joinpath(
        state_filename)

    classifier = Classifier(model=model, state_path=state_path)

    # Fit model on data
    train_loss_history, val_loss_history = classifier.fit(
        train_dataset,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=test_dataset)

    # plt.figure()
    # plt.title(f'Model Loss for {epochs} epochs')
    # plt.xlabel('epoch')
    # plt.ylabel('loss')
    # plt.plot(train_loss_history, label='train')
    # plt.plot(val_loss_history, label='test')
    # plt.legend()
    # plt.show()

    predictions_path = current_run_path.joinpath('./predicted.csv')
    validation_dataset = AudioDataset(x_test, y_test, transforms)
    validation_model = M5(num_classes=len(classes_map))
    validation_classifier = Classifier(validation_model, state_path=state_path)
    validation_classifier.predict(validation_dataset,
                                  batch_size=batch_size,
                                  output_filepath=predictions_path,
                                  classes=classes_map)
Exemple #12
0
    now_tuple[0] % 100, now_tuple[1], now_tuple[2], now_tuple[3], now_tuple[4])
ckpt_file_name = date_info + '_%03d.pth.tar'
#tf_logger = TF_Logger(os.path.join(asset_path, 'tensorboard', 'idx_'+experiment_num))
subdir = 'from_json' if args.from_json else 'from_wav'
writer = SummaryWriter(
    log_dir=(os.path.join(asset_path, 'tensorboard', subdir, date_info)))
logger.info("==== Experiment Number : %d " % args.index)

if args.model == 'cnn':
    config.experiment['batch_size'] = 10

# Data loader
train_dataset1 = AudioDataset(config,
                              root_dir=config.path['root_path'],
                              dataset_names=(args.dataset1, ),
                              num_workers=20,
                              from_json=args.from_json,
                              preprocessing=False,
                              train=True,
                              kfold=args.kfold)
# train_dataset2 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset2,), num_workers=20, preprocessing=False, train=True, kfold=args.kfold)
# train_dataset3 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset3,), num_workers=20, preprocessing=False, train=True, kfold=args.kfold)
# train_dataset = train_dataset1.__add__(train_dataset2).__add__(train_dataset3)
valid_dataset1 = AudioDataset(config,
                              root_dir=config.path['root_path'],
                              dataset_names=(args.dataset1, ),
                              from_json=args.from_json,
                              preprocessing=False,
                              train=False,
                              kfold=args.kfold)
# valid_dataset2 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset2,), preprocessing=False, train=False, kfold=args.kfold)
# valid_dataset3 = AudioDataset(config, root_dir=config.path['root_path'], dataset_names=(args.dataset3,), preprocessing=False, train=False, kfold=args.kfold)
Exemple #13
0
        lambda y: np.sqrt(y[:y.shape[0] / 2, :]**2 + y[y.shape[0] / 2:, :]**2))
    mask_value = -1.
else:
    transform_y = (lambda y: y)
    mask_value = 0.

# load the data
####################
maxlen = None
maxlen = 500

print "Loading data..."

# development data
D_valid = AudioDataset(config['taskfile_x_valid'],
                       config['taskfile_y_valid'],
                       datafile=config['datafile_valid'],
                       params_stft=config['params_stft'])

#print "  Loading validation data..."
#x_valid, y_valid, mask_valid = D_valid.get_padded_data_matrix(transform_x=transform_x, transform_y=transform_y, pad_value=mask_value, maxlen=maxlen)

for i in range(10):
    x = util.wavread(D_valid.x_wavfiles[i])[0:1, :]
    xr = D_valid.reconstruct_x(i)[0:1, :]
    if xr.shape[1] > x.shape[1]:
        xr = xr[:, :x.shape[1]]
    print "For file %d, NMSE between original x and reconstructed x is %e" % (
        i, np.mean((x - xr)**2) / np.mean(x**2))

    y = util.wavread(D_valid.y_wavfiles[i])[0:1, :]
    yr = D_valid.reconstruct_y(i)
Exemple #14
0
    parser.add_argument('--which_set', help='train, test, or valid')
    parser.add_argument('--save_file',
                        help='Save results to tab separated file')
    args = parser.parse_args()

    # get model
    model = serial.load(args.model_file)

    if args.which_set is None:
        args.which_set = 'test'

    if args.testset:  # dataset config passed in from command line
        print 'Using dataset passed in from command line'
        with open(args.testset) as f:
            config = cPickle.load(f)
        dataset = AudioDataset(config=config, which_set=args.which_set)

        # get model dataset for its labels...
        model_dataset = yaml_parse.load(model.dataset_yaml_src)
        label_list = model_dataset.label_list

    else:  # get dataset from model's yaml_src
        print "Using dataset from model's yaml src"
        p = re.compile(r"which_set.*'(train)'")
        dataset_yaml = p.sub("which_set: '{}'".format(args.which_set),
                             model.dataset_yaml_src)
        dataset = yaml_parse.load(dataset_yaml)

        label_list = dataset.label_list

    # measure test error