Example #1
0
def test_data_processor():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    test_recipe = 'mfcc=0-5'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)

    feature_repository = FeatureRepository(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_container = feature_stacker.process(
        feature_data=feature_repository)

    ds = DataSequencer(frames=10, hop=10, padding=False)
    target_data = ds.process(data=feature_container.feat[0])

    dp = DataProcessor(feature_processing_chain=ProcessingChain(
        [feature_stacker]),
                       data_processing_chain=ProcessingChain([ds]))
    processed_data, feature_matrix_size = dp.process(
        feature_data=feature_repository)

    numpy.testing.assert_array_equal(target_data, processed_data[:, 0, :, :])
Example #2
0
def test_processing_chain():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    test_recipe = 'mfcc=0-5'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)

    feature_repository = FeatureRepository(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_container = feature_stacker.process(
        feature_data=feature_repository)

    feature_chain = ProcessingChain()
    feature_chain.append(feature_stacker)

    feature_container_chain = feature_chain.process(data=feature_repository)

    numpy.testing.assert_array_equal(feature_container.feat,
                                     feature_container_chain.feat)
def test_with_statement():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle'))
    with FeatureNormalizer() as feature_normalizer:
        feature_normalizer.accumulate(feature_container)

    nose.tools.eq_(feature_normalizer['N'][0], 501)

    numpy.testing.assert_array_equal(feature_normalizer['mean'][0][0],
                                     numpy.mean(feature_container.feat[0], axis=0))
    numpy.testing.assert_array_equal(feature_normalizer['S1'][0],
                                     numpy.sum(feature_container.feat[0], axis=0))
    numpy.testing.assert_array_equal(feature_normalizer['S2'][0],
                                     numpy.sum(feature_container.feat[0] ** 2, axis=0))

    test_accumulate_finalize()
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test model count
    nose.tools.eq_(len(sc.model), 2)

    # Test model dimensions
    nose.tools.eq_(sc.model['scene1'].means_.shape[0], 6)
Example #5
0
def test_wrong_extractor():
    extractor_name = 'mf'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }})
Example #6
0
def test_save():
    extractor_name = 'mfcc'
    feature_repository = FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
Example #7
0
def test_save():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_container.save(
        filename=os.path.join('material', 'saved.mfcc.cpickle'))
def test():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    feature_masker = FeatureMasker(hop_length_seconds=1)
    mask_events = MetaDataContainer([{
        'event_onset': 10.0,
        'event_offset': 50.0,
    }])
    feature_repository = FeatureRepository().load(
        filename_dict={
            'mfcc1': os.path.join('material', 'test.mfcc.cpickle'),
            'mfcc2': os.path.join('material', 'test.mfcc.cpickle')
        })
    original_length = feature_repository['mfcc1'].shape[0]
    feature_masker.set_mask(mask_events=mask_events)
    feature_masker.process(feature_data=feature_repository)

    nose.tools.eq_(feature_repository['mfcc1'].shape[0], original_length - 40)

    # Test #2
    feature_masker = FeatureMasker(hop_length_seconds=1)
    mask_events = MetaDataContainer([
        {
            'event_onset': 10.0,
            'event_offset': 50.0,
        },
        {
            'event_onset': 120.0,
            'event_offset': 150.0,
        },
    ])
    feature_repository = FeatureRepository().load(
        filename_dict={
            'mfcc1': os.path.join('material', 'test.mfcc.cpickle'),
            'mfcc2': os.path.join('material', 'test.mfcc.cpickle')
        })
    original_length = feature_repository['mfcc1'].shape[0]
    feature_masker.set_mask(mask_events=mask_events)
    feature_masker.process(feature_data=feature_repository)

    nose.tools.eq_(feature_repository['mfcc1'].shape[0],
                   original_length - 40 - 30)
Example #9
0
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test epochs
    nose.tools.eq_(len(sc['learning_history']['loss']), learner_params['training']['epochs'])
def test_get_target_matrix_dict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifier(
        class_labels=['scene1', 'scene2'],
        disable_progress_bar=True,
    )
    target_matrix = sc._get_target_matrix_dict(data=data,
                                               annotations=annotations)

    # Test shape
    nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2))
    nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2))

    # Test content
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 0] == 1), True)
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 1] == 1), False)

    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 0] == 1), False)
    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 1] == 1), True)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(result, 'scene1')

    # Test errors
    recognizer_params['frame_accumulation']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_accumulation']['type'] = 'sum'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
Example #12
0
def test_extract():
    # MFCC
    extractor_name = 'mfcc'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mfcc': {
            'n_mfcc': 12
        }})

    nose.tools.eq_(len(feature_repository), 1)
    nose.tools.assert_list_equal(sorted(list(feature_repository.keys())),
                                 [extractor_name])

    # Meta
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].keys())),
        ['feat', 'meta', 'stat'])
    nose.tools.eq_(
        posix_path(feature_repository[extractor_name]['meta']['audio_file']),
        'material/test.wav')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']['n_mfcc'], 12)

    # Stat
    nose.tools.eq_(feature_repository[extractor_name].stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[1], 12)

    nose.tools.eq_(feature_repository[extractor_name].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].shape[1], 12)

    # MFCC - delta
    extractor_name = 'mfcc_delta'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mfcc': {
            'n_mfcc': 12
        }})

    nose.tools.eq_(len(feature_repository), 1)
    nose.tools.assert_list_equal(list(feature_repository.keys()),
                                 [extractor_name])

    # Meta
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].keys())),
        ['feat', 'meta', 'stat'])
    nose.tools.eq_(
        posix_path(feature_repository[extractor_name]['meta']['audio_file']),
        'material/test.wav')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']
        ['dependency_method'], 'mfcc')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']
        ['dependency_parameters']['n_mfcc'], 12)

    # Stat
    nose.tools.eq_(feature_repository[extractor_name].stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[1], 12)

    nose.tools.eq_(feature_repository[extractor_name].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].shape[1], 12)

    # MFCC - acceleration
    extractor_name = 'mfcc_acceleration'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mfcc': {
            'n_mfcc': 12
        }})

    nose.tools.eq_(len(feature_repository), 1)
    nose.tools.assert_list_equal(list(feature_repository.keys()),
                                 [extractor_name])

    # Meta
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].keys())),
        ['feat', 'meta', 'stat'])
    nose.tools.eq_(
        posix_path(feature_repository[extractor_name]['meta']['audio_file']),
        'material/test.wav')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']
        ['dependency_method'], 'mfcc')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']
        ['dependency_parameters']['n_mfcc'], 12)

    # Stat
    nose.tools.eq_(feature_repository[extractor_name].stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[1], 12)

    nose.tools.eq_(feature_repository[extractor_name].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].shape[1], 12)

    # MEL
    extractor_name = 'mel'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name=extractor_name,
        extractor_params={'mel': {
            'n_mels': 10
        }})

    nose.tools.eq_(len(feature_repository), 1)
    nose.tools.assert_list_equal(list(feature_repository.keys()),
                                 [extractor_name])

    # Meta
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].keys())),
        ['feat', 'meta', 'stat'])
    nose.tools.eq_(
        posix_path(feature_repository[extractor_name]['meta']['audio_file']),
        'material/test.wav')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']['n_mels'], 10)

    # Stat
    nose.tools.eq_(feature_repository[extractor_name].stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[1], 10)

    nose.tools.eq_(feature_repository[extractor_name].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].shape[1], 10)

    # MFCC
    extractor_name = 'mfcc'
    feature_repository = FeatureExtractor(store=False).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_params={'mfcc': {
            'n_mfcc': 12
        }})

    nose.tools.eq_(len(feature_repository), 1)
    nose.tools.assert_list_equal(list(feature_repository.keys()),
                                 [extractor_name])

    # Meta
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].keys())),
        ['feat', 'meta', 'stat'])
    nose.tools.eq_(
        posix_path(feature_repository[extractor_name]['meta']['audio_file']),
        'material/test.wav')
    nose.tools.eq_(
        feature_repository[extractor_name]['meta']['parameters']['n_mfcc'], 12)

    # Stat
    nose.tools.eq_(feature_repository[extractor_name].stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_repository[extractor_name].stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].feat[0].shape[1], 12)

    nose.tools.eq_(feature_repository[extractor_name].shape[0], 501)
    nose.tools.eq_(feature_repository[extractor_name].shape[1], 12)
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 4.0,
            'event_offset': 5.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'a',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 3.0,
            'event_offset': 4.0,
            'event_label': 'event2',
            'identifier': 'b',
        }])
    }

    ed = EventDetectorMLP(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters': learner_params
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    # Test epochs
    nose.tools.eq_(len(ed['learning_history']['loss']),
                   learner_params['training']['epochs'])
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 4.0,
            'event_offset': 5.0,
            'event_label': 'event1',
            'identifier': 'a',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'a',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 2.0,
            'event_offset': 3.0,
            'event_label': 'event2',
            'identifier': 'b',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 3.0,
            'event_offset': 4.0,
            'event_label': 'event2',
            'identifier': 'b',
        }])
    }

    ed = EventDetectorMLP(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters': learner_params,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'global_threshold',
            'threshold': 0.5,
        },
        'event_activity_processing': {
            'enable': True,
            'type': 'median_filtering',
            'window_length_frames': 11,
        }
    }
    # Frame probabilities
    frame_probabilities = ed.predict(feature_data=feature_container, )
    # Event recognizer
    result = EventRecognizer(
        hop_length_seconds=0.02,
        params=recognizer_params,
        class_labels=['event1', 'event2'],
    ).process(frame_probabilities=frame_probabilities)

    # Test result
    nose.tools.eq_(len(result) > 0, True)
def test_process():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    test_recipe = 'mfcc=0-5'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)

    feature_repository = FeatureRepository(
        filename_list={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_container = feature_stacker.process(
        feature_repository=feature_repository)

    nose.tools.assert_list_equal(sorted(list(feature_container.keys())),
                                 ['feat', 'meta', 'stat'])

    nose.tools.eq_(feature_container.channels, 1)
    nose.tools.eq_(feature_container.frames, 501)
    nose.tools.eq_(feature_container.vector_length, 6)

    nose.tools.eq_(feature_container.meta['audio_file'], 'material/test.wav')

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_container.stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 6)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 6)

    # Test #2
    test_recipe = 'mfcc=1,2,3,4'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)

    feature_repository = FeatureRepository(
        filename_list={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_container = feature_stacker.process(
        feature_repository=feature_repository)

    nose.tools.assert_list_equal(sorted(list(feature_container.keys())),
                                 ['feat', 'meta', 'stat'])

    nose.tools.eq_(feature_container.channels, 1)
    nose.tools.eq_(feature_container.frames, 501)
    nose.tools.eq_(feature_container.vector_length, 4)

    nose.tools.eq_(feature_container.meta['audio_file'], 'material/test.wav')

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_container.stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 4)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 4)

    # Test #1
    test_recipe = 'mfcc'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)

    feature_repository = FeatureRepository(
        filename_list={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_container = feature_stacker.process(
        feature_repository=feature_repository)

    nose.tools.assert_list_equal(sorted(list(feature_container.keys())),
                                 ['feat', 'meta', 'stat'])

    nose.tools.eq_(feature_container.channels, 1)
    nose.tools.eq_(feature_container.frames, 501)
    nose.tools.eq_(feature_container.vector_length, 10)

    nose.tools.eq_(feature_container.meta['audio_file'], 'material/test.wav')

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(
        sorted(list(feature_container.stat[0].keys())),
        ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 10)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 10)
def test_normalizer():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test 1
    test_recipe = 'mfcc=0-5'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 6)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 6)

    # Test 2
    test_recipe = 'mfcc=1,2,3,4'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 4)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 4)

    # Test 3
    test_recipe = 'mfcc'
    test_recipe_parsed = ParameterContainer()._parse_recipe(recipe=test_recipe)
    feature_container = FeatureContainer().load(
        filename=os.path.join('material', 'test.mfcc.cpickle'))
    feature_normalizer = FeatureNormalizer().accumulate(
        feature_container=feature_container).finalize()

    feature_stacker = FeatureStacker(recipe=test_recipe_parsed)
    feature_normalizer = feature_stacker.normalizer(
        normalizer_list={'mfcc': feature_normalizer})

    nose.tools.eq_(feature_normalizer['N'][0][0], 501)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['mean'][0].shape[1], 10)

    nose.tools.eq_(feature_normalizer['std'][0].shape[0], 1)
    nose.tools.eq_(feature_normalizer['std'][0].shape[1], 10)
Example #17
0
def test_get_target_matrix_dict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataContainer([{
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event1',
        }, {
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event2',
        }]),
        'file2.wav':
        MetaDataContainer([{
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 0.0,
            'event_offset': 1.0,
            'event_label': 'event2',
        }, {
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'event_onset': 1.0,
            'event_offset': 2.0,
            'event_label': 'event1',
        }]),
    }

    ed = EventDetector(class_labels=['event1', 'event2'],
                       disable_progress_bar=True,
                       params={
                           'hop_length_seconds': 0.02,
                       })
    target_matrix = ed._get_target_matrix_dict(data=data,
                                               annotations=annotations)

    # Test shape
    nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2))
    nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2))

    # Test content
    nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 0] == 1), 50)
    nose.tools.eq_(numpy.sum(target_matrix['file1.wav'][:, 1] == 1), 50)

    nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 0] == 1), 50)
    nose.tools.eq_(numpy.sum(target_matrix['file2.wav'][:, 1] == 1), 50)
Example #18
0
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataContainer([
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 4.0,
                    'event_offset': 5.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                }
            ]
        ),
        'file2.wav': MetaDataContainer([
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 3.0,
                    'event_offset': 4.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                }
            ]
        )
    }

    ed = EventDetectorGMM(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters':{
                'n_components': 6,
                'covariance_type': 'diag',
                'tol': 0.001,
                'reg_covar': 0,
                'max_iter': 40,
                'n_init': 1,
                'init_params': 'kmeans',
                'random_state': 0,
            }
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)

    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
            'type': 'sliding_sum',
            'window_length_frames': 2,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'global_threshold',
            'threshold': 10,
        }
    }
    result = ed.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(len(result) > 5, True)

    # Test errors
    recognizer_params['frame_binarization']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, ed.predict, feature_container, recognizer_params)
def test_load():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    # Test #1
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    # Meta
    nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat'])

    nose.tools.eq_(feature_container.channels, 1)
    nose.tools.eq_(feature_container.frames, 501)
    nose.tools.eq_(feature_container.vector_length, 10)

    nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav')
    nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40)
    nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10)

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 10)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 10)

    # Test #2
    feature_container = FeatureContainer().load(filename=os.path.join('material', 'test.mfcc.cpickle'))

    # Meta
    nose.tools.assert_list_equal(sorted(list(feature_container.keys())), ['feat', 'meta', 'stat'])

    nose.tools.eq_(posix_path(feature_container.meta['audio_file']), 'material/test.wav')
    nose.tools.eq_(feature_container.meta['parameters']['n_mels'], 40)
    nose.tools.eq_(feature_container.meta['parameters']['n_mfcc'], 10)

    # Stat
    nose.tools.eq_(feature_container.stat[0]['N'], 501)
    nose.tools.assert_list_equal(sorted(list(feature_container.stat[0].keys())), ['N', 'S1', 'S2', 'mean', 'std'])

    # Feat
    # Shape
    nose.tools.eq_(feature_container.feat[0].shape[0], 501)
    nose.tools.eq_(feature_container.feat[0].shape[1], 10)

    nose.tools.eq_(feature_container.shape[0], 501)
    nose.tools.eq_(feature_container.shape[1], 10)

    # Test #3
    feature_repository = FeatureContainer().load(filename_dict={'mfcc1': os.path.join('material', 'test.mfcc.cpickle'),
                                                                'mfcc2': os.path.join('material', 'test.mfcc.cpickle')})

    nose.tools.assert_list_equal(sorted(list(feature_repository.keys())), ['mfcc1', 'mfcc2'])
Example #20
0
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )
    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataContainer([
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 4.0,
                    'event_offset': 5.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                },
                {
                    'file': 'file1.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'a',
                }
            ]
        ),
        'file2.wav': MetaDataContainer([
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 0.0,
                    'event_offset': 1.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 1.0,
                    'event_offset': 2.0,
                    'event_label': 'event1',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 2.0,
                    'event_offset': 3.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                },
                {
                    'file': 'file2.wav',
                    'scene_label': 'scene1',
                    'event_onset': 3.0,
                    'event_offset': 4.0,
                    'event_label': 'event2',
                    'identifier': 'b',
                }
            ]
        )
    }

    ed = EventDetectorGMM(
        method='gmm',
        class_labels=['event1', 'event2'],
        params={
            'hop_length_seconds': 0.02,
            'parameters':{
                'n_components': 6,
                'covariance_type': 'diag',
                'tol': 0.001,
                'reg_covar': 0,
                'max_iter': 40,
                'n_init': 1,
                'init_params': 'kmeans',
                'random_state': 0,
            }
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    ed.learn(data=data, annotations=annotations)
    # Test model count
    nose.tools.eq_(len(ed.model), 2)

    # Test model dimensions
    nose.tools.eq_(ed.model['event1']['positive'].means_.shape[0], 6)
Example #21
0
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'frame_max',
        },
        'decision_making': {
            'enable': True,
            'type': 'majority_vote',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(len(result) > 0, True)

    # Test errors
    recognizer_params['frame_binarization']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_binarization']['type'] = 'frame_max'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    # Test #1
    feature_aggregator = FeatureAggregator(
        recipe=['mean'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10)

    # Test #2
    feature_aggregator = FeatureAggregator(
        recipe=['mean', 'std'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 2 * 10)

    # Test #3
    feature_aggregator = FeatureAggregator(
        recipe=['mean', 'std', 'kurtosis', 'skew'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 4 * 10)

    # Test #4
    feature_aggregator = FeatureAggregator(
        recipe=['cov'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10 * 10)

    # Test #5
    feature_aggregator = FeatureAggregator(
        recipe=['flatten'],
        win_length_frames=10,
        hop_length_frames=1,
    )

    feature_stacker = FeatureStacker(recipe=[{'method': 'mfcc'}])
    feature_repository = FeatureContainer().load(
        filename_dict={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})
    feature_matrix = feature_stacker.process(feature_data=feature_repository)
    feature_matrix = feature_aggregator.process(feature_data=feature_matrix)

    nose.tools.eq_(feature_matrix.shape[0], 501)
    nose.tools.eq_(feature_matrix.shape[1], 10 * 10)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifierGMMdeprecated(
        method='gmm_deprecated',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'random_state': 0,
            'tol': 0.001,
            'min_covar': 0.001,
            'n_iter': 40,
            'n_init': 1,
            'params': 'wmc',
            'init_params': 'wmc',
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    # Frame probabilities
    frame_probabilities = sc.predict(feature_data=feature_container)

    # Scene recognizer
    result = SceneRecognizer(
        params=recognizer_params,
        class_labels=['scene1', 'scene2'],
    ).process(frame_probabilities=frame_probabilities)

    # Test result
    nose.tools.eq_(result, 'scene1')