Beispiel #1
0
    def download(self, force_download=False):
        zenodo_url = "https://zenodo.org/record/2552860/files"

        zenodo_files = [
            'FSDKaggle2018.audio_test.zip', 'FSDKaggle2018.audio_train.zip',
            'FSDKaggle2018.doc.zip', 'FSDKaggle2018.meta.zip'
        ]
        super().download(zenodo_url, zenodo_files, force_download)

        mkdir_if_not_exists(self.audio_path)

        os.rename(
            os.path.join(self.dataset_path, 'FSDKaggle2018.audio_train'),
            os.path.join(self.audio_path, 'train'),
        )
        os.rename(
            os.path.join(self.dataset_path, 'FSDKaggle2018.audio_test'),
            os.path.join(self.audio_path, 'test'),
        )
        os.rename(
            os.path.join(self.dataset_path, 'FSDKaggle2018.meta'),
            os.path.join(self.dataset_path, 'meta'),
        )
        os.rename(
            os.path.join(self.dataset_path, 'FSDKaggle2018.doc'),
            os.path.join(self.dataset_path, 'doc'),
        )

        self.set_as_downloaded()
def test_check_if_extracted():
    dataset_path = "./tests/data"
    dataset = Dataset(dataset_path)
    path = "./tests/data/features/FeatureExtractor/original"
    mkdir_if_not_exists(path, parents=True)
    feature_extractor = FeatureExtractor()
    json_features = os.path.join(path, "parameters.json")
    _clean(json_features)
    feature_extractor.set_as_extracted(path)
    assert feature_extractor.check_if_extracted(dataset)
Beispiel #3
0
def test_move_all_files_to_parent():
    parent = "./new_parent"
    mkdir_if_not_exists("./new_parent/new_child", parents=True)
    mkdir_if_not_exists("./new_parent/new_child2")
    move_all_files_to_parent("./", "new_parent")
    assert os.path.exists("./new_child")
    assert os.path.exists("./new_child2")
    _clean("./new_child")
    _clean("./new_child2")
    _clean("./new_parent")
Beispiel #4
0
def test_move_all_files_to():
    parent = "./new_parent"
    mkdir_if_not_exists("./new_parent/new_child", parents=True)
    mkdir_if_not_exists("./new_parent/new_child2")
    dest = "./dest"
    move_all_files_to("./new_parent", dest)
    assert os.path.exists("./dest/new_child")
    assert os.path.exists("./dest/new_child2")
    assert not os.path.exists("./new_parent/new_child")
    assert not os.path.exists("./new_parent/new_child2")
    _clean("./dest")
    _clean("./new_parent")
Beispiel #5
0
def test_mkdir_if_not_exists():
    folder = "./new_folder"
    _clean(folder)
    mkdir_if_not_exists(folder)
    assert os.path.isdir(folder)
    _clean(folder)

    folder = "./new_parent/new_folder"
    mkdir_if_not_exists(folder, parents=True)
    assert os.path.isdir("./new_parent")
    assert os.path.isdir(folder)
    _clean(folder)
    _clean("./new_parent")
Beispiel #6
0
def test_duplicate_folder_structure():
    folder1 = "./new_parent/new_folder1"
    folder2 = "./new_parent/new_folder2"
    mkdir_if_not_exists(folder1, parents=True)
    mkdir_if_not_exists(folder2)

    folder_destination = "./test_dest"
    _clean(folder_destination)
    duplicate_folder_structure("./new_parent", folder_destination)
    assert os.path.isdir(folder_destination)
    assert os.path.isdir(os.path.join(folder_destination, "new_folder1"))
    assert os.path.isdir(os.path.join(folder_destination, "new_folder2"))
    _clean(folder_destination)
    _clean("./new_parent")
Beispiel #7
0
    def extract(self, dataset):
        """ Extracts features for each file in dataset.

        Call calculate() for each file in dataset and save the
        result into the features path.

        Parameters
        ----------
        dataset : Dataset
            Instance of the dataset.

        """
        features_path = self.get_features_path(dataset)
        mkdir_if_not_exists(features_path, parents=True)

        if not dataset.check_sampling_rate(self.sr):
            print('Changing sampling rate ...')
            dataset.change_sampling_rate(self.sr)
            print('Done!')

        # Define path to audio and features folders
        audio_path, subfolders = dataset.get_audio_paths(self.sr)

        # Duplicate folder structure of audio in features folder
        duplicate_folder_structure(audio_path, features_path)
        for audio_folder in subfolders:
            subfolder_name = os.path.basename(audio_folder)
            features_path_sub = os.path.join(features_path, subfolder_name)
            if not self.check_if_extracted_path(features_path_sub):
                # Navigate in the structure of audio folder and extract
                # features of the each wav file
                for path_audio in progressbar(list_wav_files(audio_folder)):
                    features_array = self.calculate(path_audio)
                    path_to_features_file = path_audio.replace(
                        audio_path, features_path)
                    path_to_features_file = path_to_features_file.replace(
                        'wav', 'npy')
                    np.save(path_to_features_file, features_array)

                # Save parameters.json for future checking
                self.set_as_extracted(features_path_sub)
Beispiel #8
0
    def download(self, force_download=False):
        zenodo_url = "https://zenodo.org/record/3338727/files/"

        zenodo_files = [
            'audio_train.zip', 'audio_validate.zip', 'audio_test.zip',
            'annotations_train.zip', 'annotations_validate.zip',
            'annotations_test.zip', 'README'
        ]

        super().download(zenodo_url, zenodo_files, force_download)

        mkdir_if_not_exists(self.audio_path)
        mkdir_if_not_exists(self.annotations_path)
        for fold in self.fold_list:
            os.rename(os.path.join(self.dataset_path, 'audio_%s' % fold),
                      os.path.join(self.audio_path, fold))
            os.rename(os.path.join(self.dataset_path, 'annotations_%s' % fold),
                      os.path.join(self.annotations_path, fold))

        # Convert .flac to .wav
        self.convert_to_wav()

        self.set_as_downloaded()
Beispiel #9
0
def main():
    # Parse arguments
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '-d',
        '--dataset',
        type=str,
        help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)',
        default='UrbanSound8k')
    parser.add_argument(
        '-f',
        '--features',
        type=str,
        help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)',
        default='MelSpectrogram')
    parser.add_argument('-p',
                        '--path',
                        type=str,
                        help='path to the parameters.json file',
                        default='../')
    parser.add_argument(
        '-m',
        '--model',
        type=str,
        help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)',
        default='SB_CNN')
    parser.add_argument('-fold',
                        '--fold_name',
                        type=str,
                        help='fold name',
                        default='fold1')
    parser.add_argument('-s',
                        '--models_path',
                        type=str,
                        help='path to save the trained model',
                        default='../trained_models')
    parser.add_argument('--aug', dest='augmentation', action='store_true')
    parser.add_argument('--no-aug', dest='augmentation', action='store_false')
    parser.set_defaults(augmentation=False)
    args = parser.parse_args()

    print(__doc__)

    if args.dataset not in get_available_datasets():
        raise AttributeError('Dataset not available')

    if args.features not in get_available_features():
        raise AttributeError('Features not available')

    if args.model not in get_available_models():
        raise AttributeError('Model not available')

    # Get parameters
    parameters_file = os.path.join(args.path, 'parameters.json')
    params = load_json(parameters_file)
    params_dataset = params['datasets'][args.dataset]
    params_features = params['features']
    params_model = params['models'][args.model]

    # Get and init dataset class
    dataset_class = get_available_datasets()[args.dataset]
    dataset_path = os.path.join(args.path, params_dataset['dataset_path'])
    dataset = dataset_class(dataset_path)

    if args.fold_name not in dataset.fold_list:
        raise AttributeError('Fold not available')

    # Data augmentation
    if args.augmentation:
        # Define the augmentations
        augmentations = params['data_augmentations']

        # Initialize AugmentedDataset
        dataset = AugmentedDataset(dataset, params['features']['sr'],
                                   augmentations)

        # Process all files
        print('Doing data augmentation ...')
        dataset.process()
        print('Done!')

    # Get and init feature class
    features_class = get_available_features()[args.features]
    features = features_class(
        sequence_time=params_features['sequence_time'],
        sequence_hop_time=params_features['sequence_hop_time'],
        audio_win=params_features['audio_win'],
        audio_hop=params_features['audio_hop'],
        sr=params_features['sr'],
        **params_features[args.features])
    print('Features shape: ', features.get_shape())

    # Check if features were extracted
    if not features.check_if_extracted(dataset):
        print('Extracting features ...')
        features.extract(dataset)
        print('Done!')

    use_validate_set = True
    if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']:
        # When have less data, don't use validation set.
        use_validate_set = False

    folds_train, folds_val, _ = evaluation_setup(
        args.fold_name,
        dataset.fold_list,
        params_dataset['evaluation_mode'],
        use_validate_set=use_validate_set)

    data_gen_train = DataGenerator(dataset,
                                   features,
                                   folds=folds_train,
                                   batch_size=params['train']['batch_size'],
                                   shuffle=True,
                                   train=True,
                                   scaler=None)

    scaler = Scaler(normalizer=params_model['normalizer'])
    print('Fitting scaler ...')
    scaler.fit(data_gen_train)
    print('Done!')

    # Pass scaler to data_gen_train to be used when data
    # loading
    data_gen_train.set_scaler(scaler)

    data_gen_val = DataGenerator(dataset,
                                 features,
                                 folds=folds_val,
                                 batch_size=params['train']['batch_size'],
                                 shuffle=False,
                                 train=False,
                                 scaler=scaler)

    # Define model
    features_shape = features.get_shape()
    n_frames_cnn = features_shape[1]
    n_freq_cnn = features_shape[2]
    n_classes = len(dataset.label_list)

    model_class = get_available_models()[args.model]

    metrics = ['classification']
    if args.dataset in sed_datasets:
        metrics = ['sed']
    if args.dataset in tagging_datasets:
        metrics = ['tagging']

    model_container = model_class(model=None,
                                  model_path=None,
                                  n_classes=n_classes,
                                  n_frames_cnn=n_frames_cnn,
                                  n_freq_cnn=n_freq_cnn,
                                  metrics=metrics,
                                  **params_model['model_arguments'])

    model_container.model.summary()

    # Set paths
    model_folder = os.path.join(args.models_path, args.model, args.dataset)
    exp_folder = os.path.join(model_folder, args.fold_name)
    mkdir_if_not_exists(exp_folder, parents=True)

    # Save model json and scaler
    model_container.save_model_json(model_folder)
    save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle'))

    # data_train = data_gen_train.get_data()
    # data_val = data_gen_val.get_data()

    # Train model
    model_container.train(
        data_gen_train,
        data_gen_val,
        # data_train, data_val,
        label_list=dataset.label_list,
        weights_path=exp_folder,
        **params['train'],
        sequence_time_sec=params_features['sequence_hop_time'])
Beispiel #10
0
def main():
    # Parse arguments
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        '-od', '--origin_dataset', type=str,
        help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)',
        default='UrbanSound8k'
    )
    parser.add_argument(
        '-ofold', '--origin_fold_name', type=str,
        help='origin fold name',
        default='fold1')
    parser.add_argument(
        '-d', '--dataset', type=str,
        help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)',
        default='ESC50'
    )
    parser.add_argument(
        '-fold', '--fold_name', type=str,
        help='destination fold name',
        default='fold1')
    parser.add_argument(
        '-f', '--features', type=str,
        help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)',
        default='MelSpectrogram'
    )
    parser.add_argument(
        '-p', '--path', type=str,
        help='path to the parameters.json file',
        default='../'
    )
    parser.add_argument(
        '-m', '--model', type=str,
        help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)',
        default='SB_CNN')

    parser.add_argument(
        '-s', '--models_path', type=str,
        help='path to save the trained model',
        default='../trained_models'
    )
    args = parser.parse_args()

    print(__doc__)

    if args.dataset not in get_available_datasets():
        raise AttributeError('Dataset not available')

    if args.features not in get_available_features():
        raise AttributeError('Features not available')

    if args.model not in get_available_models():
        raise AttributeError('Model not available')

    # Get parameters
    parameters_file = os.path.join(args.path, 'parameters.json')
    params = load_json(parameters_file)
    params_dataset = params['datasets'][args.dataset]
    params_features = params['features']
    params_model = params['models'][args.model]

    # Load origin model
    model_path_origin = os.path.join(args.models_path, args.model,
                                     args.origin_dataset)
    model_class = get_available_models()[args.model]
    metrics = ['accuracy']
    if args.dataset in sed_datasets:
        metrics = ['sed']
    model_container = model_class(
        model=None, model_path=model_path_origin,
        metrics=metrics
    )
    model_container.load_model_weights(
        os.path.join(model_path_origin, args.origin_fold_name))

    kwargs = {}
    if args.dataset in sed_datasets:
        kwargs = {'sequence_hop_time': params_features['sequence_hop_time']}

    # Get and init dataset class
    dataset_class = get_available_datasets()[args.dataset]
    dataset_path = os.path.join(args.path, params_dataset['dataset_path'])
    dataset = dataset_class(dataset_path, **kwargs)

    if args.fold_name not in dataset.fold_list:
        raise AttributeError('Fold not available')

    # Get and init feature class
    features_class = get_available_features()[args.features]
    features = features_class(
        sequence_time=params_features['sequence_time'],
        sequence_hop_time=params_features['sequence_hop_time'],
        audio_win=params_features['audio_win'],
        audio_hop=params_features['audio_hop'],
        sr=params_features['sr'], **params_features[args.features]
    )
    print('Features shape: ', features.get_shape())

    # Check if features were extracted
    if not features.check_if_extracted(dataset):
        print('Extracting features ...')
        features.extract(dataset)
        print('Done!')

    use_validate_set = True
    if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']:
        # When have less data, don't use validation set.
        use_validate_set = False

    folds_train, folds_val, _ = evaluation_setup(
        args.fold_name, dataset.fold_list,
        params_dataset['evaluation_mode'],
        use_validate_set=use_validate_set
    )

    data_gen_train = DataGenerator(
        dataset, features, folds=folds_train,
        batch_size=params['train']['batch_size'],
        shuffle=True, train=True, scaler=None
    )

    scaler = Scaler(normalizer=params_model['normalizer'])
    print('Fitting features ...')
    scaler.fit(data_gen_train)
    print('Done!')

    data_gen_train.set_scaler(scaler)

    data_gen_val = DataGenerator(
        dataset, features, folds=folds_val,
        batch_size=params['train']['batch_size'],
        shuffle=False, train=False, scaler=scaler
    )

    # Fine-tune model
    n_classes = len(dataset.label_list)
    layer_where_to_cut = -2
    model_container.fine_tuning(layer_where_to_cut,
                                new_number_of_classes=n_classes,
                                new_activation='sigmoid',
                                freeze_source_model=True)

    model_container.model.summary()

    # Set paths
    model_folder = os.path.join(
        args.models_path, args.model,
        args.origin_dataset+'_ft_'+args.dataset)
    exp_folder = os.path.join(model_folder, args.fold_name)
    mkdir_if_not_exists(exp_folder, parents=True)

    # Save model json and scaler
    model_container.save_model_json(model_folder)
    save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle'))

    # Train model
    model_container.train(
        data_gen_train, data_gen_val,
        label_list=dataset.label_list,
        weights_path=exp_folder,
        sequence_time_sec=params_features['sequence_hop_time'],
        **params['train'])
Beispiel #11
0
def start_training(status, fold_ix, normalizer, model_path, epochs,
                   early_stopping, optimizer_ix, learning_rate, batch_size,
                   considered_improvement, n_clicks_train, dataset_ix):
    global data_generator_train
    global data_generator_val

    if status == 'TRAINING':
        if fold_ix is None:
            return [True, 'Please select a Fold', 'danger', ""]
        if optimizer_ix is None:
            return [True, 'Please select an Optimizer', 'danger', ""]

        dataset_name = options_datasets[dataset_ix]['label']
        fold_name = dataset.fold_list[fold_ix]
        params_dataset = params['datasets'][dataset_name]
        optimizer = options_optimizers[optimizer_ix]['label']

        use_validate_set = True
        if dataset_name in ['TUTSoundEvents2017', 'ESC50', 'ESC10']:
            # When have less data, don't use validation set.
            use_validate_set = False

        folds_train, folds_val, _ = evaluation_setup(
            fold_name,
            dataset.fold_list,
            params_dataset['evaluation_mode'],
            use_validate_set=use_validate_set)
        data_generator_train = DataGenerator(
            dataset,
            feature_extractor,
            folds=folds_train,
            batch_size=params['train']['batch_size'],
            shuffle=True,
            train=True,
            scaler=None)

        scaler = Scaler(normalizer=normalizer)
        print('Fitting scaler ...')
        scaler.fit(data_generator_train)
        print('Done!')

        # Pass scaler to data_gen_train to be used when data
        # loading
        data_generator_train.set_scaler(scaler)

        data_generator_val = DataGenerator(dataset,
                                           feature_extractor,
                                           folds=folds_val,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           train=False,
                                           scaler=scaler)

        exp_folder_fold = conv_path(os.path.join(model_path, fold_name))
        mkdir_if_not_exists(exp_folder_fold, parents=True)

        scaler_path = os.path.join(exp_folder_fold, 'scaler.pickle')
        save_pickle(scaler, scaler_path)

        train_arguments = {
            'epochs': epochs,
            'early_stopping': early_stopping,
            'optimizer': optimizer,
            'learning_rate': learning_rate,
            'batch_size': batch_size,
            'considered_improvement': considered_improvement
        }
        with graph.as_default():
            model_container.train(data_generator_train,
                                  data_generator_val,
                                  weights_path=exp_folder_fold,
                                  label_list=dataset.label_list,
                                  **train_arguments)
            model_container.load_model_weights(exp_folder_fold)
        return [True, "Model trained", 'success', 'True']

    else:
        raise dash.exceptions.PreventUpdate
Beispiel #12
0
def create_model(n_clicks_create_model, n_clicks_load_model, model_ix,
                 feature_ix, dataset_ix, model_parameters, sequence_time,
                 sequence_hop_time, audio_hop, audio_win, sr,
                 specific_parameters, dataset_path, audio_folder,
                 features_folder, model_path):
    global model_container
    global feature_extractor
    global dataset

    ctx = dash.callback_context
    if (n_clicks_create_model is None) & (n_clicks_load_model is None):
        return [False, "", 'success', '']
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if (button_id == 'create_model') | (button_id == 'load_model'):
        if model_ix is None:
            return [True, 'Please select a Model', 'danger', '']
        if feature_ix is None:
            return [True, 'Please select a Feature extractor', 'danger', '']
        if dataset_ix is None:
            return [True, 'Please select a Dataset', 'danger', '']

        model_name = options_models[model_ix]['label']
        feature_name = options_features[feature_ix]['label']
        dataset_name = options_datasets[dataset_ix]['label']

        feature_extractor_class = get_available_features()[feature_name]
        specific_parameters = ast.literal_eval(specific_parameters)
        feature_extractor = feature_extractor_class(
            sequence_time=sequence_time,
            sequence_hop_time=sequence_hop_time,
            audio_win=audio_win,
            audio_hop=audio_hop,
            sr=sr,
            **specific_parameters)

        features_shape = feature_extractor.get_shape()
        n_frames_cnn = features_shape[1]
        n_freq_cnn = features_shape[2]

        # get dataset class
        dataset_class = get_available_datasets()[dataset_name]
        # init data_generator
        kwargs = {}
        if dataset_name == 'URBAN_SED':
            kwargs = {'sequence_hop_time': sequence_hop_time}
        dataset = dataset_class(dataset_path, **kwargs)

        n_classes = len(dataset.label_list)

        model_class = get_available_models()[model_name]

        model_parameters = ast.literal_eval(model_parameters)
        if (button_id == 'create_model'):
            with graph.as_default():
                model_container = model_class(model=None,
                                              model_path=None,
                                              n_classes=n_classes,
                                              n_frames_cnn=n_frames_cnn,
                                              n_freq_cnn=n_freq_cnn,
                                              **model_parameters)

                model_container.model.summary()
                if model_name == 'VGGish':
                    model_container.load_pretrained_model_weights()
                    model_container.fine_tuning(
                        -1,
                        new_number_of_classes=n_classes,
                        new_activation='softmax',
                        freeze_source_model=True)

                stringlist = []
                model_container.model.summary(
                    print_fn=lambda x: stringlist.append(x))
                summary = "\n".join(stringlist)

                mkdir_if_not_exists(conv_path(os.path.dirname(model_path)))
                mkdir_if_not_exists(conv_path(model_path))
                model_container.save_model_json(conv_path(model_path))

                return [True, 'Model created', 'success', summary]

        if (button_id == 'load_model'):
            with graph.as_default():
                model_container = model_class(model=None,
                                              model_path=conv_path(model_path))
                model_container.model.summary()
                stringlist = []
                model_container.model.summary(
                    print_fn=lambda x: stringlist.append(x))
                summary = "\n".join(stringlist)
            return [True, 'Model loaded', 'success', summary]
        # model_container.save_model_weights(model_path)

    return [False, "", 'success', '']
Beispiel #13
0
X_pca = np.zeros((1, 4))
X = np.zeros((1, 128, 64))
Y = np.zeros((1, 10))
file_names = []

graph = get_default_graph()

# VIS TAB


def conv_path(file_or_folder):
    return os.path.join(os.path.dirname(__file__), file_or_folder)


mkdir_if_not_exists(conv_path('models'))


@app.callback(
    [Output('plot_mel', 'figure'),
     Output('audio-player', 'overrideProps')],
    [Input('plot2D', 'selectedData')],
    [State('x_select', 'value'),
     State('y_select', 'value')])
def click_on_plot2d(clickData, x_select, y_select):
    if clickData is None:
        figure_mel = generate_figure_mel(X[0])
        return [figure_mel, {'autoPlay': False, 'src': ''}]
    else:
        point = np.array(
            [clickData['points'][0]['x'], clickData['points'][0]['y']])