def test_validation_levels(caplog): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) ext = BrightnessExtractor() stim = TextStim(text='hello world') with pytest.raises(TypeError): ext.transform(stim) res = ext.transform(stim, validation='warn') log_message = caplog.records[0].message assert log_message == ( "Transformers of type BrightnessExtractor can " "only be applied to stimuli of type(s) <class 'pliers" ".stimuli.image.ImageStim'> (not type TextStim), and no " "applicable Converter was found.") assert not res res = ext.transform(stim, validation='loose') assert not res stim2 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) res = ext.transform([stim, stim2], validation='loose') assert len(res) == 1 assert np.isclose(res[0].to_df()['brightness'][0], 0.88784294, 1e-5) config.set_option('cache_transformers', cache_default)
def test_validation_levels(caplog): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) ext = BrightnessExtractor() stim = TextStim(text='hello world') with pytest.raises(TypeError): ext.transform(stim) res = ext.transform(stim, validation='warn') log_message = caplog.records[0].message assert log_message == ("Transformers of type BrightnessExtractor can " "only be applied to stimuli of type(s) <class 'pliers" ".stimuli.image.ImageStim'> (not type TextStim), and no " "applicable Converter was found.") assert not res res = ext.transform(stim, validation='loose') assert not res stim2 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) res = ext.transform([stim, stim2], validation='loose') assert len(res) == 1 assert np.isclose(res[0].to_df()['brightness'][0], 0.88784294, 1e-5) config.set_option('cache_transformers', cache_default)
def test_ibm_speech_converter_large(): default = config.get_option('allow_large_jobs') config.set_option('allow_large_jobs', False) conv = IBMSpeechAPIConverter() audio = AudioStim(join(AUDIO_DIR, 'silence.wav')) with pytest.raises(ValueError): conv.transform(audio) config.set_option('allow_large_jobs', default)
def test_implicit_stim_conversion2(): def_conv = config.get_option('default_converters') config.set_option('default_converters', {'AudioStim->TextStim': ('WitTranscriptionConverter', )}) audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2) ext = LengthExtractor() result = ext.transform(stim) first_word = result[0].to_df() assert 'text_length' in first_word.columns assert first_word['text_length'][0] > 0 assert first_word['onset'][0] >= 4.2 config.set_option('default_converters', def_conv)
def test_implicit_stim_conversion2(): def_conv = config.get_option('default_converters') config.set_option('default_converters', {'AudioStim->TextStim': ('WitTranscriptionConverter',)}) audio_dir = join(get_test_data_path(), 'audio') stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2) ext = LengthExtractor() result = ext.transform(stim) first_word = result[0].to_df() assert 'text_length' in first_word.columns assert first_word['text_length'][0] > 0 assert first_word['onset'][0] >= 4.2 config.set_option('default_converters', def_conv)
def test_caching(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) ext = DummyExtractor() res = ext.transform(img1) assert ext.num_calls == 1 res2 = ext.transform(img1) assert ext.num_calls == 1 assert res == res2 config.set_option('cache_transformers', False) res3 = ext.transform(img1) assert ext.num_calls == 2 assert res != res3 config.set_option('cache_transformers', True) ext.num_calls = 0 res = ext.transform(join(get_test_data_path(), 'image', 'apple.jpg')) assert ext.num_calls == 1 res2 = ext.transform(join(get_test_data_path(), 'image', 'apple.jpg')) assert ext.num_calls == 1 assert res == res2 config.set_option('cache_transformers', cache_default)
def test_microsoft_api_face_emotion_extractor(): ext = MicrosoftAPIFaceEmotionExtractor() img = ImageStim(join(IMAGE_DIR, 'obama.jpg')) res = ext.transform(img).to_df(timing=False, object_id=False) assert res.shape == (1, 8) assert res['face_emotion_happiness'][0] > 0.5 assert res['face_emotion_anger'][0] < 0.5 ext = MicrosoftAPIFaceEmotionExtractor(subscription_key='nogood') assert not ext.validate_keys() default = config.get_option('api_key_validation') config.set_option('api_key_validation', True) with pytest.raises(ValueError): ext.transform(img) config.set_option('api_key_validation', default)
def test_batch_transformer(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) img2 = ImageStim(join(get_test_data_path(), 'image', 'button.jpg')) img3 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) ext = DummyBatchExtractor() res = merge_results(ext.transform([img1, img2, img3])) assert ext.num_calls == 1 assert res.shape == (3, 10) ext = DummyBatchExtractor(batch_size=1) res2 = merge_results(ext.transform([img1, img2, img3])) assert ext.num_calls == 3 assert res.equals(res2) config.set_option('cache_transformers', cache_default)
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIExtractor() images = [ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIExtractor#apple' in results.columns assert results.shape == (1, 29) # not 2 cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIImageExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIImageExtractor#apple' in results.columns assert results.shape == (2, 49) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (1, 15) # not 2 rows cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_batch_transformer_caching(): cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) img1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg')) ext = DummyBatchExtractor(name='penguin') res = ext.transform(img1).to_df(timing=False, object_id=False) assert ext.num_calls == 1 assert res.shape == (1, 1) img2 = ImageStim(join(get_test_data_path(), 'image', 'button.jpg')) img3 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) res2 = ext.transform([img1, img2, img2, img3, img3, img1, img2]) assert ext.num_calls == 3 assert len(res2) == 7 assert res2[0] == res2[5] and res2[1] == res2[2] and res2[3] == res2[4] res2 = merge_results(res2) assert res2.shape == (3, 10) config.set_option('cache_transformers', cache_default)
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (2, 15) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_microsoft_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 3) ext = MicrosoftVisionAPITagExtractor() video = VideoStim(join(VIDEO_DIR, 'small.mp4')) with pytest.raises(ValueError): merge_results(ext.transform(video)) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_parallelization(): # TODO: test that parallelization actually happened (this will likely # require some new logging functionality, or introspection). For now we # just make sure the parallelized version produces the same result. default = config.get_option('parallelize') cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) filename = join(get_test_data_path(), 'video', 'small.mp4') video = VideoStim(filename) ext = BrightnessExtractor() # With parallelization config.set_option('parallelize', True) result1 = ext.transform(video) # Without parallelization config.set_option('parallelize', False) result2 = ext.transform(video) assert result1 == result2 config.set_option('parallelize', default) config.set_option('cache_transformers', cache_default)
def test_progress_bar(capfd): video_dir = join(get_test_data_path(), 'video') video = VideoStim(join(video_dir, 'obama_speech.mp4')) conv = FrameSamplingFilter(hertz=2) old_val = config.get_option('progress_bar') config.set_option('progress_bar', True) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' in err and '100%' in err config.set_option('progress_bar', False) derived = conv.transform(video) out, err = capfd.readouterr() assert 'Video frame:' not in err and '100%' not in err config.set_option('progress_bar', old_val)
def test_google_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') default_cache = config.get_option('cache_transformers') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) config.set_option('cache_transformers', False) ext = GoogleVisionAPILabelExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'GoogleVisionAPILabelExtractor#apple' in results.columns assert results.shape == (2, 32) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large) config.set_option('cache_transformers', default_cache)
def test_google_vision_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') default_cache = config.get_option('cache_transformers') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) config.set_option('cache_transformers', False) ext = GoogleVisionAPILabelExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'GoogleVisionAPILabelExtractor#Apple' in results.columns assert results.shape == (2, 32) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large) config.set_option('cache_transformers', default_cache)
selected_frames.n_frames ```{note} `FrameSamplingFilter` expects a *collection* of `ImageStims` as input, and returns a subsampled collection of `ImageStims`. However, here it can take `VideoStim` as input, as *pliers* will *impliclty* convert `VideoStim` -> `ImageStim`. Since there are no important parameters to modify in this step, we can let *pliers* handle it for us, instead of doing it explicitly. ``` Next, we can use the `FaceRecognitionFaceLocationsExtractor` to detect and label face locations in the subset of frames Note that since we transformed a collection of frames, the result of this operation is a *list* of `ExtractedResult` objects. To merge these objects into a single pandas DataFrame, we can use the helper function `merge_results` from pliers.extractors import merge_results from pliers import config # Disable progress bar for Jupyter Book config.set_option('progress_bar', False) # Detect faces in selected frames face_features = face_ext.transform(selected_frames) merged_faces = merge_results(face_features, metadata=False) # Show only first few rows merged_faces.head(12) len(merged_faces.onset.unique()) There are 89 unique onsets, which indicates that faces were found in 89/143 frames.
import json import numpy as np import pytest from pliers.tests.utils import (get_test_data_path, DummyExtractor, ClashingFeatureExtractor) from pliers.extractors import (LengthExtractor, BrightnessExtractor, SharpnessExtractor, VibranceExtractor) from pliers.stimuli import (ComplexTextStim, ImageStim, VideoStim, AudioStim) from pliers.support.download import download_nltk_data from pliers.extractors.base import ExtractorResult, merge_results from pliers import config cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', True) TEXT_DIR = join(get_test_data_path(), 'text') @pytest.fixture(scope='module') def get_nltk(): download_nltk_data() def test_check_target_type(): stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'), columns='to', default_duration=1) td = SharpnessExtractor() with pytest.raises(TypeError):
from os import environ from pliers.tests.utils import get_test_data_path from pliers import config from pliers.extractors import (TensorFlowKerasApplicationExtractor, TFHubExtractor, TFHubImageExtractor, TFHubTextExtractor, BertExtractor, BertSequenceEncodingExtractor, BertLMExtractor, BertSentimentExtractor, AudiosetLabelExtractor) from pliers.filters import AudioResamplingFilter from pliers.stimuli import (ImageStim, TextStim, ComplexTextStim, AudioStim) from pliers.extractors.base import merge_results from transformers import BertTokenizer from pliers.utils import verify_dependencies cache_default = config.get_option('cache_transformers') config.set_option('cache_transformers', False) IMAGE_DIR = join(get_test_data_path(), 'image') TEXT_DIR = join(get_test_data_path(), 'text') AUDIO_DIR = join(get_test_data_path(), 'audio') EFFNET_URL = 'https://tfhub.dev/tensorflow/efficientnet/b7/classification/1' MNET_URL = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4' SENTENC_URL = 'https://tfhub.dev/google/universal-sentence-encoder/4' GNEWS_URL = 'https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2' TOKENIZER_URL = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2' ELECTRA_URL = 'https://tfhub.dev/google/electra_small/2' SPEECH_URL = 'https://tfhub.dev/google/speech_embedding/1' pytestmark = pytest.mark.skipif(environ.get('skip_high_memory', False) == 'true',