def test_learn(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([{ 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', }]), 'file2.wav': MetaDataContainer([{ 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', }]) } ed = EventDetectorGMM( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters': { 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, } }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) # Test model count nose.tools.eq_(len(ed.model), 2) # Test model dimensions nose.tools.eq_(ed.model['event1']['positive'].means_.shape[0], 6)
def test_formats(): delimiters = [',', ';', '\t'] for delimiter in delimiters: tmp = tempfile.NamedTemporaryFile('r+', suffix='.txt', dir='/tmp', delete=False) try: tmp.write('0.5' + delimiter + '0.7\n') tmp.write('2.5' + delimiter + '2.7\n') tmp.close() item = MetaDataContainer().load(filename=tmp.name)[0] nose.tools.eq_(item.onset, 0.5) nose.tools.eq_(item.offset, 0.7) finally: os.unlink(tmp.name) tmp = tempfile.NamedTemporaryFile('r+', suffix='.txt', dir='/tmp', delete=False) try: tmp.write('0.5' + delimiter + '0.7' + delimiter + 'event\n') tmp.write('2.5' + delimiter + '2.7' + delimiter + 'event\n') tmp.close() item = MetaDataContainer().load(filename=tmp.name)[0] nose.tools.eq_(item.onset, 0.5) nose.tools.eq_(item.offset, 0.7) nose.tools.eq_(item.event_label, 'event') finally: os.unlink(tmp.name) tmp = tempfile.NamedTemporaryFile('r+', suffix='.txt', dir='/tmp', delete=False) try: tmp.write('file.wav' + delimiter + 'scene' + delimiter + '0.5' + delimiter + '0.7' + delimiter + 'event\n') tmp.write('file.wav' + delimiter + 'scene' + delimiter + '0.5' + delimiter + '0.7' + delimiter + 'event\n') tmp.close() item = MetaDataContainer().load(filename=tmp.name)[0] nose.tools.eq_(item.onset, 0.5) nose.tools.eq_(item.offset, 0.7) nose.tools.eq_(item.event_label, 'event') nose.tools.eq_(item.file, 'file.wav') nose.tools.eq_(item.scene_label, 'scene') finally: os.unlink(tmp.name) tmp = tempfile.NamedTemporaryFile('r+', suffix='.txt', dir='/tmp', delete=False) try: tmp.write('file.wav' + delimiter + 'scene' + delimiter + '0.5' + delimiter + '0.7' + delimiter + 'event' + delimiter + 'm' + delimiter + 'a1\n') tmp.write('file.wav' + delimiter + 'scene' + delimiter + '0.5' + delimiter + '0.7' + delimiter + 'event' + delimiter + 'm' + delimiter + 'a2\n') tmp.close() item = MetaDataContainer().load(filename=tmp.name)[0] nose.tools.eq_(item.onset, 0.5) nose.tools.eq_(item.offset, 0.7) nose.tools.eq_(item.event_label, 'event') nose.tools.eq_(item.file, 'file.wav') nose.tools.eq_(item.scene_label, 'scene') nose.tools.eq_(item.identifier, 'a1') nose.tools.eq_(item.source_label, 'm') finally: os.unlink(tmp.name)
def test_predict(): FeatureExtractor(store=True, overwrite=True).extract( audio_file=os.path.join('material', 'test.wav'), extractor_name='mfcc', extractor_params={'mfcc': { 'n_mfcc': 10 }}, storage_paths={'mfcc': os.path.join('material', 'test.mfcc.cpickle')}) feature_container = FeatureContainer( filename=os.path.join('material', 'test.mfcc.cpickle')) data = { 'file1.wav': feature_container, 'file2.wav': feature_container, } annotations = { 'file1.wav': MetaDataContainer([{ 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 4.0, 'event_offset': 5.0, 'event_label': 'event1', 'identifier': 'a', }, { 'file': 'file1.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'a', }]), 'file2.wav': MetaDataContainer([{ 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 0.0, 'event_offset': 1.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 1.0, 'event_offset': 2.0, 'event_label': 'event1', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 2.0, 'event_offset': 3.0, 'event_label': 'event2', 'identifier': 'b', }, { 'file': 'file2.wav', 'scene_label': 'scene1', 'event_onset': 3.0, 'event_offset': 4.0, 'event_label': 'event2', 'identifier': 'b', }]) } ed = EventDetectorGMM( method='gmm', class_labels=['event1', 'event2'], params={ 'hop_length_seconds': 0.02, 'parameters': { 'n_components': 6, 'covariance_type': 'diag', 'tol': 0.001, 'reg_covar': 0, 'max_iter': 40, 'n_init': 1, 'init_params': 'kmeans', 'random_state': 0, } }, filename=os.path.join('material', 'test.model.cpickle'), disable_progress_bar=True, ) ed.learn(data=data, annotations=annotations) recognizer_params = { 'frame_accumulation': { 'enable': False, 'type': 'sliding_sum', 'window_length_frames': 2, }, 'frame_binarization': { 'enable': True, 'type': 'global_threshold', 'threshold': 10, } } # Frame probabilities frame_probabilities = ed.predict(feature_data=feature_container) # Event recognizer result = EventRecognizer( hop_length_seconds=0.02, params=recognizer_params, class_labels=['event1', 'event2'], ).process(frame_probabilities=frame_probabilities) # Test result nose.tools.eq_(len(result) > 5, True)
def test_unique_scene_labels(): scenes = MetaDataContainer(content).unique_scene_labels nose.tools.eq_(len(scenes), 2) nose.tools.eq_(scenes[0], 'meeting') nose.tools.eq_(scenes[1], 'office')
def test_max_event_offset(): nose.tools.eq_(MetaDataContainer(content).max_offset, 10) #embed()
def test_unique_event_labels(): events = MetaDataContainer(content).unique_event_labels nose.tools.eq_(len(events), 3) nose.tools.eq_(events[0], 'mouse clicking') nose.tools.eq_(events[1], 'printer') nose.tools.eq_(events[2], 'speech')
def test_event_label_count(): nose.tools.eq_(MetaDataContainer(content).event_label_count, 3)
def test_scene_label_count(): nose.tools.eq_(MetaDataContainer(content).scene_label_count, 2)
def test_event_count(): nose.tools.eq_(MetaDataContainer(content).event_count, len(content))
def test_file_list(): files = MetaDataContainer(content).file_list nose.tools.eq_(len(files), 2) nose.tools.eq_(files[0], 'audio_001.wav') nose.tools.eq_(files[1], 'audio_002.wav')