def test_audio_stim(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'barber.wav')) assert round(stim.duration) == 57 assert stim.sampling_rate == 11025 stim = AudioStim(join(audio_dir, 'homer.wav')) assert round(stim.duration) == 3 assert stim.sampling_rate == 11025
def test_get_filename(): url = 'https://archive.org/download/999WavFiles/TANKEN.WAV' audio = AudioStim(url=url) with audio.get_filename() as filename: assert exists(filename) assert not exists(filename) url = 'https://archive.org/download/NIX-C-1987-11903/1987_11903L.jpg' image = ImageStim(url=url) with image.get_filename() as filename: assert exists(filename) assert not exists(filename)
def test_get_filename(): url = 'http://www.bobainsworth.com/wav/simpsons/themodyn.wav' audio = AudioStim(url=url) with audio.get_filename() as filename: assert exists(filename) assert not exists(filename) url = 'https://via.placeholder.com/350x150' image = ImageStim(url=url) with image.get_filename() as filename: assert exists(filename) assert not exists(filename)
def test_get_filename(): url = 'http://www.bobainsworth.com/wav/simpsons/themodyn.wav' audio = AudioStim(url=url) with audio.get_filename() as filename: assert exists(filename) assert not exists(filename) url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png' image = ImageStim(url=url) with image.get_filename() as filename: assert exists(filename) assert not exists(filename)
def test_googleAPI_converter(): stim = AudioStim(join(AUDIO_DIR, 'homer.wav')) conv = GoogleSpeechAPIConverter() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim text = [elem.text for elem in out_stim] assert 'thermodynamics' in text or 'obey' in text
def test_IBMSpeechAPIConverter(): stim = AudioStim(join(AUDIO_DIR, 'homer.wav'), onset=4.2) conv = IBMSpeechAPIConverter() out_stim = conv.transform(stim) assert conv.validate_keys() assert isinstance(out_stim, ComplexTextStim) first_word = next(w for w in out_stim) assert isinstance(first_word, TextStim) assert first_word.duration > 0 assert first_word.onset is not None second_word = [w for w in out_stim][1] assert second_word.onset > 4.2 num_words = len(out_stim.elements) full_text = [elem.text for elem in out_stim] assert 'thermodynamics' in full_text or 'obey' in full_text conv2 = IBMSpeechAPIConverter(resolution='phrases') out_stim = conv2.transform(stim) assert isinstance(out_stim, ComplexTextStim) first_phrase = next(w for w in out_stim) assert isinstance(first_phrase, TextStim) full_text = first_phrase.text assert len(full_text.split()) > 1 assert 'thermodynamics' in full_text or 'obey' in full_text assert len(out_stim.elements) < num_words conv = IBMSpeechAPIConverter(username='******', password='******') assert not conv.validate_keys()
def test_ibmAPI_converter(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2) conv = IBMSpeechAPIConverter() out_stim = conv.transform(stim) assert isinstance(out_stim, ComplexTextStim) first_word = next(w for w in out_stim) assert isinstance(first_word, TextStim) assert first_word.duration > 0 assert first_word.onset is not None second_word = [w for w in out_stim][1] assert second_word.onset > 4.2 num_words = len(out_stim.elements) full_text = [elem.text for elem in out_stim] assert 'thermodynamics' in full_text or 'obey' in full_text conv2 = IBMSpeechAPIConverter(resolution='phrases') out_stim = conv2.transform(stim) assert isinstance(out_stim, ComplexTextStim) first_phrase = next(w for w in out_stim) assert isinstance(first_phrase, TextStim) full_text = first_phrase.text assert len(full_text.split()) > 1 assert 'thermodynamics' in full_text or 'obey' in full_text assert len(out_stim.elements) < num_words
def test_mfcc_energy_extractor(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) n_mels = 48 ext = MFCCEnergyExtractor(register='low', n_mels=n_mels) data = ext.transform(audio).data.iloc[:, 4:] assert data.shape == (1221, n_mels) assert np.isclose(data.iloc[0].sum(), 695.7308991156441) ext = MFCCEnergyExtractor(register='high', n_mels=n_mels) data = ext.transform(audio).data.iloc[:, 4:] assert data.shape == (1221, n_mels) assert np.isclose(data.iloc[100].sum(), 107.06728965998656) ext2 = MFCCEnergyExtractor(n_mfcc=64, n_coefs=8, hop_length=512, n_mels=n_mels, register='low') data = ext2.transform(audio).data.iloc[:, 4:] assert data.shape == (1221, n_mels) assert np.isclose(data.iloc[650].sum(), 884.4713338141073) ext2 = MFCCEnergyExtractor(n_mfcc=64, n_coefs=8, hop_length=512, n_mels=n_mels, register='high') data = ext2.transform(audio).data.iloc[:, 4:] assert data.shape == (1221, n_mels) assert np.isclose(data.iloc[601].sum(), 210.857157)
def test_chroma_extractors(): audio = AudioStim(join(AUDIO_DIR, "barber.wav")) ext = ChromaSTFTExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 14) assert np.isclose(df['onset'][1], 0.01161) assert np.isclose(df['duration'][0], 0.01161) assert np.isclose(df['chroma_2'][0], 0.417595) ext2 = ChromaSTFTExtractor(n_chroma=6, n_fft=1024, hop_length=256) df = ext2.transform(audio).to_df() assert df.shape == (9763, 8) assert np.isclose(df['onset'][1], 0.005805) assert np.isclose(df['duration'][0], 0.005805) assert np.isclose(df['chroma_5'][0], 0.732480) ext = ChromaCQTExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 14) assert np.isclose(df['chroma_cqt_2'][0], 0.286443) ext = ChromaCENSExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 14) assert np.isclose(df['chroma_cens_2'][0], 0.217814)
def test_spectral_extractors(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) ext = SpectralCentroidExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 5) assert np.isclose(df['onset'][1], 0.04644) assert np.isclose(df['duration'][0], 0.04644) assert np.isclose(df['spectral_centroid'][0], 1144.98145) ext2 = SpectralCentroidExtractor(n_fft=1024, hop_length=256) df = ext2.transform(audio).to_df() assert df.shape == (2441, 5) assert np.isclose(df['onset'][1], 0.02322) assert np.isclose(df['duration'][0], 0.02322) assert np.isclose(df['spectral_centroid'][0], 866.20176) ext = SpectralBandwidthExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 5) assert np.isclose(df['spectral_bandwidth'][0], 1172.96090) ext = SpectralContrastExtractor(fmin=100.0) df = ext.transform(audio).to_df() assert df.shape == (1221, 11) assert np.isclose(df['spectral_contrast_band_4'][0], 25.637166) ext = SpectralRolloffExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 5) assert np.isclose(df['spectral_rolloff'][0], 2492.46826)
def test_compound_stim(): audio_dir = join(get_test_data_path(), 'audio') audio = AudioStim(join(audio_dir, 'crowd.mp3')) image1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) image2 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) filename = join(get_test_data_path(), 'video', 'small.mp4') video = VideoStim(filename) text = ComplexTextStim(text="The quick brown fox jumped...") stim = CompoundStim([audio, image1, image2, video, text]) assert len(stim.elements) == 5 assert isinstance(stim.video, VideoStim) assert isinstance(stim.complex_text, ComplexTextStim) assert isinstance(stim.image, ImageStim) with pytest.raises(AttributeError): stim.nonexistent_type assert stim.video_frame is None # Test iteration len([e for e in stim]) == 5 imgs = stim.get_stim(ImageStim, return_all=True) assert len(imgs) == 2 assert all([isinstance(im, ImageStim) for im in imgs]) also_imgs = stim.get_stim('image', return_all=True) assert imgs == also_imgs
def test_spectral_extractors(): audio = AudioStim(join(AUDIO_DIR, "barber.wav")) ext = SpectralCentroidExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 3) assert np.isclose(df['onset'][1], 0.01161) assert np.isclose(df['duration'][0], 0.01161) assert np.isclose(df['spectral_centroid'][0], 817.53095) ext2 = SpectralCentroidExtractor(n_fft=1024, hop_length=256) df = ext2.transform(audio).to_df() assert df.shape == (9763, 3) assert np.isclose(df['onset'][1], 0.005805) assert np.isclose(df['duration'][0], 0.005805) assert np.isclose(df['spectral_centroid'][0], 1492.00515) ext = SpectralBandwidthExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 3) assert np.isclose(df['spectral_bandwidth'][0], 1056.66227) ext = SpectralContrastExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 9) assert np.isclose(df['spectral_contrast_band_4'][0], 25.09001) ext = SpectralRolloffExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 3) assert np.isclose(df['spectral_rolloff'][0], 1550.39063)
def test_transcribed_audio_stim(): audio = AudioStim(join(get_test_data_path(), 'audio', "barber_edited.wav")) text_file = join(get_test_data_path(), 'text', "wonderful_edited.srt") text = ComplexTextStim(text_file) stim = TranscribedAudioCompoundStim(audio=audio, text=text) assert isinstance(stim.audio, AudioStim) assert isinstance(stim.complex_text, ComplexTextStim)
def test_chroma_extractors(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) ext = ChromaSTFTExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 16) assert np.isclose(df['onset'][1], 0.04644) assert np.isclose(df['duration'][0], 0.04644) assert np.isclose(df['chroma_2'][0], 0.53129) ext2 = ChromaSTFTExtractor(n_chroma=6, n_fft=1024, hop_length=256) df = ext2.transform(audio).to_df() assert df.shape == (2441, 10) assert np.isclose(df['onset'][1], 0.02322) assert np.isclose(df['duration'][0], 0.02322) assert np.isclose(df['chroma_5'][0], 0.86870) ext = ChromaCQTExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 16) assert np.isclose(df['chroma_cqt_2'][0], 0.336481) ext = ChromaCENSExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 16) assert np.isclose(df['chroma_cens_2'][0], 0.136409)
def test_tempo_extractor(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) ext = TempoExtractor() df = ext.transform(audio).to_df() assert df.shape == (1, 5) assert np.isclose(df['onset'][0], 0.0) assert np.isclose(df['duration'][0], 0.04644) assert np.isclose(df['tempo'][0], 117.453835)
def test_googleAPI_converter(): stim = AudioStim(join(AUDIO_DIR, 'obama_speech.wav')) conv = GoogleSpeechAPIConverter() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim text = [elem.text for elem in out_stim] assert 'today' in text assert 'United' in text
def test_audio_resampling_filter(target_sr, resample_type): stim = AudioStim(join(AUDIO_DIR, 'homer.wav')) filt = AudioResamplingFilter(target_sr, resample_type) resampled = filt.transform(stim) assert resampled.sampling_rate == target_sr assert np.abs(target_sr * stim.duration - resampled.data.shape[0]) <= 1
def test_implicit_stim_conversion2(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav')) ext = LengthExtractor() result = ext.transform(stim) first_word = result[0].to_df() assert 'text_length' in first_word.columns assert first_word['text_length'][0] > 0
def test_stft_extractor(): stim = AudioStim(join(AUDIO_DIR, 'barber.wav'), onset=4.2) ext = STFTAudioExtractor(frame_size=1., spectrogram=False, freq_bins=[(100, 300), (300, 3000), (3000, 20000)]) result = ext.transform(stim) df = result.to_df() assert df.shape == (557, 5) assert df['onset'][0] == 4.2
def test_tonnetz_extractor(): audio = AudioStim(join(AUDIO_DIR, "barber.wav")) ext = TonnetzExtractor() df = ext.transform(audio).to_df() assert df.shape == (4882, 8) assert np.isclose(df['onset'][1], 0.01161) assert np.isclose(df['duration'][0], 0.01161) assert np.isclose(df['tonal_centroid_0'][0], -0.0264436)
def test_googleAPI_converter(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav')) conv = GoogleSpeechAPIConverter() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim text = [elem.text for elem in out_stim] assert 'thermodynamics' in text or 'obey' in text
def test_stft_extractor(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'barber.wav')) ext = STFTAudioExtractor(frame_size=1., spectrogram=False, freq_bins=[(100, 300), (300, 3000), (3000, 20000)]) result = ext.transform(stim) df = result.to_df() assert df.shape == (557, 5)
def test_librosa_extractor(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) ext = LibrosaFeatureExtractor(feature='rmse') df = ext.transform(audio).to_df() assert df.shape == (1221, 5) assert np.isclose(df['onset'][1], 0.04644) assert np.isclose(df['duration'][0], 0.04644) assert np.isclose(df['rmse'][0], 0.25663)
def test_tonnetz_extractor(): audio = AudioStim(join(AUDIO_DIR, 'barber.wav')) ext = TonnetzExtractor() df = ext.transform(audio).to_df() assert df.shape == (1221, 10) assert np.isclose(df['onset'][1], 0.04644) assert np.isclose(df['duration'][0], 0.04644) assert np.isclose(df['tonal_centroid_0'][0], -0.0391266)
def test_audioset_extractor(hop_size, top_n, target_sr): verify_dependencies(['tensorflow']) def compute_expected_length(stim, ext): stft_par = ext.params.STFT_WINDOW_SECONDS - ext.params.STFT_HOP_SECONDS tot_window = ext.params.PATCH_WINDOW_SECONDS + stft_par ons = np.arange(start=0, stop=stim.duration - tot_window, step=hop_size) return len(ons) audio_stim = AudioStim(join(AUDIO_DIR, 'crowd.mp3')) audio_filter = AudioResamplingFilter(target_sr=target_sr) audio_resampled = audio_filter.transform(audio_stim) # test with defaults and 44100 stimulus ext = AudiosetLabelExtractor(hop_size=hop_size) r_orig = ext.transform(audio_stim).to_df() assert r_orig.shape[0] == compute_expected_length(audio_stim, ext) assert r_orig.shape[1] == 525 assert np.argmax(r_orig.to_numpy()[:, 4:].mean(axis=0)) == 0 assert r_orig['duration'][0] == .975 assert all([ np.isclose(r_orig['onset'][i] - r_orig['onset'][i - 1], hop_size) for i in range(1, r_orig.shape[0]) ]) # test resampled audio length and errors if target_sr >= 14500: r_resampled = ext.transform(audio_resampled).to_df() assert r_orig.shape[0] == r_resampled.shape[0] else: with pytest.raises(ValueError) as sr_error: ext.transform(audio_resampled) assert all([ substr in str(sr_error.value) for substr in ['Upsample', str(target_sr)] ]) # test top_n option ext_top_n = AudiosetLabelExtractor(top_n=top_n) r_top_n = ext_top_n.transform(audio_stim).to_df() assert r_top_n.shape[1] == ext_top_n.top_n + 4 assert np.argmax(r_top_n.to_numpy()[:, 4:].mean(axis=0)) == 0 # test label subset labels = [ 'Speech', 'Silence', 'Harmonic', 'Bark', 'Music', 'Bell', 'Steam', 'Rain' ] ext_labels_only = AudiosetLabelExtractor(labels=labels) r_labels_only = ext_labels_only.transform(audio_stim).to_df() assert r_labels_only.shape[1] == len(labels) + 4 # test top_n/labels error with pytest.raises(ValueError) as err: AudiosetLabelExtractor(top_n=10, labels=labels) assert 'Top_n and labels are mutually exclusive' in str(err.value)
def test_mean_amplitude_extractor(): audio = AudioStim(join(AUDIO_DIR, 'barber_edited.wav')) text_file = join(get_test_data_path(), 'text', 'wonderful_edited.srt') text = ComplexTextStim(text_file) stim = TranscribedAudioCompoundStim(audio=audio, text=text) ext = MeanAmplitudeExtractor() result = ext.transform(stim).to_df() targets = [-0.154661, 0.121521] assert np.allclose(result['mean_amplitude'], targets)
def test_witaiAPI_converter(): audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav')) conv = WitTranscriptionConverter() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim first_word = next(w for w in out_stim) assert type(first_word) == TextStim #assert '_' in first_word.name text = [elem.text for elem in out_stim] assert 'thermodynamics' in text or 'obey' in text
def test_ibm_speech_converter_large(): default = config.get_option('allow_large_jobs') config.set_option('allow_large_jobs', False) conv = IBMSpeechAPIConverter() audio = AudioStim(join(AUDIO_DIR, 'silence.wav')) with pytest.raises(ValueError): conv.transform(audio) config.set_option('allow_large_jobs', default)
def test_googleAPI_converter(): stim = AudioStim(join(AUDIO_DIR, 'obama_speech.wav')) conv = GoogleSpeechAPIConverter() assert conv.validate_keys() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim text = [elem.text for elem in out_stim] assert 'today' in text assert 'United' in text conv = GoogleSpeechAPIConverter(discovery_file='no/good.json') assert not conv.validate_keys()
def test_WitTranscriptionConverter(): stim = AudioStim(join(AUDIO_DIR, 'homer.wav'), onset=4.2) conv = WitTranscriptionConverter() out_stim = conv.transform(stim) assert type(out_stim) == ComplexTextStim first_word = next(w for w in out_stim) assert type(first_word) == TextStim assert first_word.onset == 4.2 second_word = [w for w in out_stim][1] assert second_word.onset == 4.2 text = [elem.text for elem in out_stim] assert 'thermodynamics' in text or 'obey' in text