def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test model count
    nose.tools.eq_(len(sc.model), 2)

    # Test model dimensions
    nose.tools.eq_(sc.model['scene1'].means_.shape[0], 6)
Esempio n. 2
0
def test_learn():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)

    # Test epochs
    nose.tools.eq_(len(sc['learning_history']['loss']), learner_params['training']['epochs'])
def test_get_target_matrix_dict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifier(
        class_labels=['scene1', 'scene2'],
        disable_progress_bar=True,
    )
    target_matrix = sc._get_target_matrix_dict(data=data,
                                               annotations=annotations)

    # Test shape
    nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2))
    nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2))

    # Test content
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 0] == 1), True)
    nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 1] == 1), False)

    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 0] == 1), False)
    nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 1] == 1), True)
def test_generate_validation():

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
            'identifier': 'a',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene1',
            'identifier': 'b',
        }),
        'file3.wav':
        MetaDataItem({
            'file': 'file3.wav',
            'scene_label': 'scene1',
            'identifier': 'c',
        }),
        'file4.wav':
        MetaDataItem({
            'file': 'file4.wav',
            'scene_label': 'scene2',
            'identifier': 'd',
        }),
        'file5.wav':
        MetaDataItem({
            'file': 'file5.wav',
            'scene_label': 'scene2',
            'identifier': 'e',
        }),
        'file6.wav':
        MetaDataItem({
            'file': 'file6.wav',
            'scene_label': 'scene2',
            'identifier': 'f',
        }),
    }
    sc = SceneClassifier(class_labels=['scene1', 'scene2'])

    validation_set = sc._generate_validation(
        annotations=annotations,
        validation_type='generated_scene_balanced',
        valid_percentage=0.50,
        seed=0)
    nose.tools.eq_(len(validation_set), 4)

    nose.tools.assert_raises(AssertionError, sc._generate_validation,
                             annotations, 'test', 0.5)
    def _after_extract(self, to_return=None):
        if not self.meta_container.exists():
            meta_data = MetaDataContainer()
            for file in self.audio_files:
                meta_data.append(MetaDataItem({
                    'file': os.path.split(file)[1],
                    'scene_label': os.path.splitext(os.path.split(file)[1])[0][:-2]
                }))
            self.meta_container.update(meta_data)
            self.meta_container.save()

        all_folds_found = True
        for fold in range(1, self.crossvalidation_folds):
            if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='train', fold=fold)):
                all_folds_found = False
            if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='test', fold=fold)):
                all_folds_found = False

        if not all_folds_found:
            if not os.path.isdir(self.evaluation_setup_path):
                os.makedirs(self.evaluation_setup_path)

            classes = self.meta.slice_field('scene_label')
            files = numpy.array(self.meta.slice_field('file'))

            from sklearn.model_selection import StratifiedShuffleSplit
            sss = StratifiedShuffleSplit(n_splits=self.crossvalidation_folds, test_size=0.3, random_state=0)

            fold = 1
            for train_index, test_index in sss.split(y=classes, X=classes):
                MetaDataContainer(self.meta.filter(file_list=list(files[train_index])),
                                  filename=self._get_evaluation_setup_filename(setup_part='train', fold=fold)).save()

                MetaDataContainer(self.meta.filter(file_list=list(files[test_index])).remove_field('scene_label'),
                                  filename=self._get_evaluation_setup_filename(setup_part='test', fold=fold)).save()

                MetaDataContainer(self.meta.filter(file_list=list(files[test_index])),
                                  filename=self._get_evaluation_setup_filename(setup_part='evaluate', fold=fold)).save()
                fold += 1
Esempio n. 6
0
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierMLP(
        method='mlp',
        class_labels=['scene1', 'scene2'],
        params=learner_params,
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': False,
        },
        'frame_binarization': {
            'enable': True,
            'type': 'frame_max',
        },
        'decision_making': {
            'enable': True,
            'type': 'majority_vote',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(len(result) > 0, True)

    # Test errors
    recognizer_params['frame_binarization']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_binarization']['type'] = 'frame_max'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={
            'mfcc': {
                'n_mfcc': 10
            }
        },
        storage_paths={
            'mfcc': os.path.join('material', 'test.mfcc.cpickle')
        }
    )

    feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav': MetaDataItem(
            {
                'file': 'file1.wav',
                'scene_label': 'scene1',
            }
        ),
        'file2.wav': MetaDataItem(
            {
                'file': 'file2.wav',
                'scene_label': 'scene2',
            }
        ),
    }

    sc = SceneClassifierGMM(
        method='gmm',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'tol': 0.001,
            'reg_covar': 0,
            'max_iter': 40,
            'n_init': 1,
            'init_params': 'kmeans',
            'random_state': 0,
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    result = sc.predict(
        feature_data=feature_container,
        recognizer_params=recognizer_params
    )

    # Test result
    nose.tools.eq_(result, 'scene1')

    # Test errors
    recognizer_params['frame_accumulation']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)

    recognizer_params['frame_accumulation']['type'] = 'sum'
    recognizer_params['decision_making']['type'] = 'test'
    nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test_predict():
    FeatureExtractor(store=True, overwrite=True).extract(
        audio_file=os.path.join('material', 'test.wav'),
        extractor_name='mfcc',
        extractor_params={'mfcc': {
            'n_mfcc': 10
        }},
        storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')})

    feature_container = FeatureContainer(
        filename=os.path.join('material', 'test.mfcc.cpickle'))

    data = {
        'file1.wav': feature_container,
        'file2.wav': feature_container,
    }

    annotations = {
        'file1.wav':
        MetaDataItem({
            'file': 'file1.wav',
            'scene_label': 'scene1',
        }),
        'file2.wav':
        MetaDataItem({
            'file': 'file2.wav',
            'scene_label': 'scene2',
        }),
    }

    sc = SceneClassifierGMMdeprecated(
        method='gmm_deprecated',
        class_labels=['scene1', 'scene2'],
        params={
            'n_components': 6,
            'covariance_type': 'diag',
            'random_state': 0,
            'tol': 0.001,
            'min_covar': 0.001,
            'n_iter': 40,
            'n_init': 1,
            'params': 'wmc',
            'init_params': 'wmc',
        },
        filename=os.path.join('material', 'test.model.cpickle'),
        disable_progress_bar=True,
    )

    sc.learn(data=data, annotations=annotations)
    recognizer_params = {
        'frame_accumulation': {
            'enable': True,
            'type': 'sum',
        },
        'frame_binarization': {
            'enable': False,
        },
        'decision_making': {
            'enable': True,
            'type': 'maximum',
        }
    }
    # Frame probabilities
    frame_probabilities = sc.predict(feature_data=feature_container)

    # Scene recognizer
    result = SceneRecognizer(
        params=recognizer_params,
        class_labels=['scene1', 'scene2'],
    ).process(frame_probabilities=frame_probabilities)

    # Test result
    nose.tools.eq_(result, 'scene1')