def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierGMM( method='gmm', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) # Test model count nose.tools.eq_(len(sc.model), 2) # Test model dimensions nose.tools.eq_(sc.model['scene1'].means_.shape[0], 6)
def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierMLP( method='mlp', class_labels=['scene1', 'scene2'], params=learner_params, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) # Test epochs nose.tools.eq_(len(sc['learning_history']['loss']), learner_params['training']['epochs'])
def test_get_target_matrix_dict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem({ 'file': 'file1.wav', 'scene_label': 'scene1', }), 'file2.wav': MetaDataItem({ 'file': 'file2.wav', 'scene_label': 'scene2', }), } sc = SceneClassifier( class_labels=['scene1', 'scene2'], disable_progress_bar=True, ) target_matrix = sc._get_target_matrix_dict(data=data, annotations=annotations) # Test shape nose.tools.eq_(target_matrix['file1.wav'].shape, (501, 2)) nose.tools.eq_(target_matrix['file2.wav'].shape, (501, 2)) # Test content nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 0] == 1), True) nose.tools.eq_(numpy.any(target_matrix['file1.wav'][:, 1] == 1), False) nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 0] == 1), False) nose.tools.eq_(numpy.any(target_matrix['file2.wav'][:, 1] == 1), True)
def test_generate_validation(): annotations = { 'file1.wav': MetaDataItem({ 'file': 'file1.wav', 'scene_label': 'scene1', 'identifier': 'a', }), 'file2.wav': MetaDataItem({ 'file': 'file2.wav', 'scene_label': 'scene1', 'identifier': 'b', }), 'file3.wav': MetaDataItem({ 'file': 'file3.wav', 'scene_label': 'scene1', 'identifier': 'c', }), 'file4.wav': MetaDataItem({ 'file': 'file4.wav', 'scene_label': 'scene2', 'identifier': 'd', }), 'file5.wav': MetaDataItem({ 'file': 'file5.wav', 'scene_label': 'scene2', 'identifier': 'e', }), 'file6.wav': MetaDataItem({ 'file': 'file6.wav', 'scene_label': 'scene2', 'identifier': 'f', }), } sc = SceneClassifier(class_labels=['scene1', 'scene2']) validation_set = sc._generate_validation( annotations=annotations, validation_type='generated_scene_balanced', valid_percentage=0.50, seed=0) nose.tools.eq_(len(validation_set), 4) nose.tools.assert_raises(AssertionError, sc._generate_validation, annotations, 'test', 0.5)
def _after_extract(self, to_return=None): if not self.meta_container.exists(): meta_data = MetaDataContainer() for file in self.audio_files: meta_data.append(MetaDataItem({ 'file': os.path.split(file)[1], 'scene_label': os.path.splitext(os.path.split(file)[1])[0][:-2] })) self.meta_container.update(meta_data) self.meta_container.save() all_folds_found = True for fold in range(1, self.crossvalidation_folds): if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='train', fold=fold)): all_folds_found = False if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='test', fold=fold)): all_folds_found = False if not all_folds_found: if not os.path.isdir(self.evaluation_setup_path): os.makedirs(self.evaluation_setup_path) classes = self.meta.slice_field('scene_label') files = numpy.array(self.meta.slice_field('file')) from sklearn.model_selection import StratifiedShuffleSplit sss = StratifiedShuffleSplit(n_splits=self.crossvalidation_folds, test_size=0.3, random_state=0) fold = 1 for train_index, test_index in sss.split(y=classes, X=classes): MetaDataContainer(self.meta.filter(file_list=list(files[train_index])), filename=self._get_evaluation_setup_filename(setup_part='train', fold=fold)).save() MetaDataContainer(self.meta.filter(file_list=list(files[test_index])).remove_field('scene_label'), filename=self._get_evaluation_setup_filename(setup_part='test', fold=fold)).save() MetaDataContainer(self.meta.filter(file_list=list(files[test_index])), filename=self._get_evaluation_setup_filename(setup_part='evaluate', fold=fold)).save() fold += 1
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierMLP( method='mlp', class_labels=['scene1', 'scene2'], params=learner_params, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': False, }, 'frame_binarization': { 'enable': True, 'type': 'frame_max', }, 'decision_making': { 'enable': True, 'type': 'majority_vote', } } result = sc.predict( feature_data=feature_container, recognizer_params=recognizer_params ) # Test result nose.tools.eq_(len(result) > 0, True) # Test errors recognizer_params['frame_binarization']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params) recognizer_params['frame_binarization']['type'] = 'frame_max' recognizer_params['decision_making']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={ 'mfcc': { 'n_mfcc': 10 } }, storage_paths={ 'mfcc': os.path.join('material', 'test.mfcc.cpickle') } ) feature_container = FeatureContainer(filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem( { 'file': 'file1.wav', 'scene_label': 'scene1', } ), 'file2.wav': MetaDataItem( { 'file': 'file2.wav', 'scene_label': 'scene2', } ), } sc = SceneClassifierGMM( method='gmm', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': True, 'type': 'sum', }, 'frame_binarization': { 'enable': False, }, 'decision_making': { 'enable': True, 'type': 'maximum', } } result = sc.predict( feature_data=feature_container, recognizer_params=recognizer_params ) # Test result nose.tools.eq_(result, 'scene1') # Test errors recognizer_params['frame_accumulation']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params) recognizer_params['frame_accumulation']['type'] = 'sum' recognizer_params['decision_making']['type'] = 'test' nose.tools.assert_raises(AssertionError, sc.predict, feature_container, recognizer_params)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataItem({ 'file': 'file1.wav', 'scene_label': 'scene1', }), 'file2.wav': MetaDataItem({ 'file': 'file2.wav', 'scene_label': 'scene2', }), } sc = SceneClassifierGMMdeprecated( method='gmm_deprecated', class_labels=['scene1', 'scene2'], params={ 'n_components': 6, 'covariance_type': 'diag', 'random_state': 0, 'tol': 0.001, 'min_covar': 0.001, 'n_iter': 40, 'n_init': 1, 'params': 'wmc', 'init_params': 'wmc', }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) sc.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': True, 'type': 'sum', }, 'frame_binarization': { 'enable': False, }, 'decision_making': { 'enable': True, 'type': 'maximum', } } # Frame probabilities frame_probabilities = sc.predict(feature_data=feature_container) # Scene recognizer result = SceneRecognizer( params=recognizer_params, class_labels=['scene1', 'scene2'], ).process(frame_probabilities=frame_probabilities) # Test result nose.tools.eq_(result, 'scene1')