def clean_transcript(input_transcript, input_media, onset=None, offset=None): stim = load_stims([input_media])[0] if not isinstance(stim, AudioStim): conv = VideoToAudioConverter() stim = conv.transform(stim) input_media = '/tmp/input_audio.wav' stim.save(input_media) _, extension = splitext(input_transcript) clean_transcript = '/tmp/clean_transcript.txt' with open(clean_transcript, 'w') as new_file: if extension == 'srt': txt = ComplexTextStim(input_transcript) for el in txt.elements: _clean_save(el.text, new_file, el.onset, el.duration) else: # Treat as a singe block of text if onset is None or offset is None: raise Exception("Onset and offset must be declared") txt = TextStim(input_transcript) _clean_save(txt.text, new_file, onset, stim.duration - offset) return clean_transcript, input_media
def test_video_to_audio_converter(): filename = join(get_test_data_path(), 'video', 'small.mp4') video = VideoStim(filename) conv = VideoToAudioConverter() audio = conv.transform(video) assert audio.history.source_class == 'VideoStim' assert audio.history.source_file == filename assert np.isclose(video.duration, audio.duration, 1e-2)
def test_video_to_audio_converter(): filename = join(VIDEO_DIR, 'small.mp4') video = VideoStim(filename, onset=4.2) conv = VideoToAudioConverter() audio = conv.transform(video) assert audio.history.source_class == 'VideoStim' assert audio.history.source_file == filename assert audio.onset == 4.2 assert np.isclose(video.duration, audio.duration, 1e-2)
def test_stim_history_tracking(): video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4')) assert video.history is None conv = VideoToAudioConverter() stim = conv.transform(video) assert str(stim.history) == 'VideoStim->VideoToAudioConverter/AudioStim' conv = WitTranscriptionConverter() stim = conv.transform(stim) assert str( stim.history) == 'VideoStim->VideoToAudioConverter/AudioStim->WitTranscriptionConverter/ComplexTextStim'
def test_stim_history_tracking(): video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4')) assert video.history is None conv = VideoToAudioConverter() stim = conv.transform(video) assert str(stim.history) == 'VideoStim->VideoToAudioConverter/AudioStim' conv = WitTranscriptionConverter() stim = conv.transform(stim) assert str( stim.history ) == 'VideoStim->VideoToAudioConverter/AudioStim->WitTranscriptionConverter/ComplexTextStim'
def test_big_pipeline(): pytest.importorskip('pygraphviz') filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) visual_nodes = [(FrameSamplingFilter(every=15), [ (TesseractConverter(), [LengthExtractor()]), VibranceExtractor(), 'BrightnessExtractor', ])] audio_nodes = [(VideoToAudioConverter(), [WitTranscriptionConverter(), 'LengthExtractor'], 'video_to_audio')] graph = Graph() graph.add_nodes(visual_nodes) graph.add_nodes(audio_nodes) results = graph.run(video, merge=False) result = merge_results(results, format='wide', extractor_names='multi') # Test that pygraphviz outputs a file drawfile = next(tempfile._get_candidate_names()) graph.draw(drawfile) assert exists(drawfile) os.remove(drawfile) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim_name'].values assert 'frame[90]' in result['stim_name'].values
def test_video_to_audio_converter(): filename = join(VIDEO_DIR, 'small.mp4') video = VideoStim(filename, onset=4.2) conv = VideoToAudioConverter() audio = conv.transform(video) assert audio.history.source_class == 'VideoStim' assert audio.history.source_file == filename assert audio.onset == 4.2 assert audio.sampling_rate == 48000 assert np.isclose(video.duration, audio.duration, 1e-2) filename = join(VIDEO_DIR, 'obama_speech.mp4') video = VideoStim(filename, onset=1.0) audio = conv.transform(video) assert audio.history.source_class == 'VideoStim' assert audio.history.source_file == filename assert audio.onset == 1.0 assert audio.sampling_rate == 24000 assert np.isclose(video.duration, audio.duration, 1e-2)
def extract_audio_energy(video): aud = VideoToAudioConverter().transform(video) frame_length = int(aud.sampling_rate * TR) ext = RMSEExtractor(frame_length=frame_length, hop_length=frame_length, center=False) res = ext.transform(aud).to_df(metadata=False, format='long') res['onset'] += TR res = res.drop(['object_id', 'order'], axis=1) res.rename(columns={ 'value': 'modulation', 'feature': 'trial_type' }, inplace=True) res.to_csv('events/audio_energy_events.csv')
def test_big_pipeline(): filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) visual_nodes = [(FrameSamplingConverter(every=15), [ (TesseractConverter(), [LengthExtractor()]), VibranceExtractor(), 'BrightnessExtractor', ])] audio_nodes = [(VideoToAudioConverter(), [ WitTranscriptionConverter(), 'LengthExtractor'], 'video_to_audio')] graph = Graph() graph.add_nodes(visual_nodes) graph.add_nodes(audio_nodes) result = graph.run(video) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim'].values assert 'frame[90]' in result['stim'].values