def test_word_embedding_extractor(): pytest.importorskip('gensim') stims = [TextStim(text='this'), TextStim(text='sentence')] ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'), binary=True) result = merge_results(ext.transform(stims), extractor_names='multi', format='wide') assert ('WordEmbeddingExtractor', 'embedding_dim99') in result.columns assert np.allclose(0.0010911, result[('WordEmbeddingExtractor', 'embedding_dim0')][0]) unk = TextStim(text='nowaythisinvocab') result = ext.transform(unk).to_df() assert result['embedding_dim10'][0] == 0.0 ones = np.ones(100) ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'), binary=True, unk_vector=ones) result = ext.transform(unk).to_df() assert result['embedding_dim10'][0] == 1.0 ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'), binary=True, unk_vector='random') result = ext.transform(unk).to_df() assert result['embedding_dim10'][0] <= 1.0 assert result['embedding_dim10'][0] >= -1.0 ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'), binary=True, unk_vector='nothing') result = ext.transform(unk).to_df() assert result['embedding_dim10'][0] == 0.0
def test_indico_api_text_extractor(): ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['emotion', 'personality']) # With ComplexTextStim input srtfile = join(get_test_data_path(), 'text', 'wonderful.srt') srt_stim = ComplexTextStim(srtfile, onset=4.2) result = merge_results(ext.transform(srt_stim), extractor_names=False) outdfKeysCheck = { 'onset', 'duration', 'order', 'object_id', 'emotion_anger', 'emotion_fear', 'emotion_joy', 'emotion_sadness', 'emotion_surprise', 'personality_openness', 'personality_extraversion', 'personality_agreeableness', 'personality_conscientiousness' } meta_columns = {'source_file', 'history', 'class', 'filename'} assert set(result.columns) - set(['stim_name' ]) == outdfKeysCheck | meta_columns assert result['onset'][1] == 92.622 # With TextStim input ts = TextStim(text="It's a wonderful life.") result = ext.transform(ts).to_df(object_id=True) assert set(result.columns) == outdfKeysCheck assert len(result) == 1
def test_indico_api_image_extractor(): ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['fer', 'content_filtering']) stim1 = ImageStim(join(IMAGE_DIR, 'apple.jpg')) result1 = merge_results(ext.transform([stim1, stim1]), extractor_names=False) outdfKeysCheck = { 'object_id', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy', 'fer_Angry', 'fer_Fear', 'content_filtering' } meta_columns = { 'source_file', 'history', 'class', 'filename', 'onset', 'duration', 'order' } assert set(result1.columns) - set(['stim_name' ]) == outdfKeysCheck | meta_columns assert result1['content_filtering'][0] < 0.2 stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg')) result2 = ext.transform(stim2).to_df(timing=False, object_id=True) assert set(result2.columns) == outdfKeysCheck assert result2['fer_Happy'][0] > 0.7 url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png' stim = ImageStim(url=url) result = ext.transform(stim).to_df() assert result['fer_Neutral'][0] > 0.1
def test_indico_api_image_extractor(): ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['fer', 'content_filtering']) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) result1 = merge_results(ext.transform([stim1, stim1]), extractor_names=False) outdfKeysCheck = { 'object_id', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy', 'fer_Angry', 'fer_Fear', 'content_filtering' } meta_columns = { 'source_file', 'history', 'class', 'filename', 'onset', 'duration', 'order' } assert set(result1.columns) - set(['stim_name' ]) == outdfKeysCheck | meta_columns assert result1['content_filtering'][0] < 0.2 stim2 = ImageStim(join(image_dir, 'obama.jpg')) result2 = ext.transform(stim2).to_df(timing=False, object_id=True) assert set(result2.columns) == outdfKeysCheck assert result2['fer_Happy'][0] > 0.7
def test_indico_api_image_extractor(): ext = IndicoAPIImageExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['fer', 'content_filtering']) stim1 = ImageStim(join(IMAGE_DIR, 'apple.jpg')) result1 = merge_results(ext.transform([stim1, stim1]), extractor_names=False) outdfKeysCheck = { 'object_id', 'fer_Surprise', 'fer_Neutral', 'fer_Sad', 'fer_Happy', 'fer_Angry', 'fer_Fear', 'content_filtering'} meta_columns = {'source_file', 'history', 'class', 'filename', 'onset', 'duration', 'order'} assert set(result1.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns assert result1['content_filtering'][0] < 0.2 stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg')) result2 = ext.transform(stim2).to_df(timing=False, object_id=True) assert set(result2.columns) == outdfKeysCheck assert result2['fer_Happy'][0] > 0.7 url = 'https://via.placeholder.com/350x150' stim = ImageStim(url=url) result = ext.transform(stim).to_df() assert result['fer_Neutral'][0] > 0.
def test_clarifai_api_extractor_batch(): stim = ImageStim(join(IMAGE_DIR, 'apple.jpg')) stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg')) ext = ClarifaiAPIImageExtractor() results = ext.transform([stim, stim2]) results = merge_results(results) assert results['ClarifaiAPIImageExtractor#apple'][0] > 0.5 or \ results['ClarifaiAPIImageExtractor#apple'][1] > 0.5
def test_word_embedding_extractor(): pytest.importorskip('gensim') stims = [TextStim(text='this'), TextStim(text='sentence')] ext = WordEmbeddingExtractor(join(TEXT_DIR, 'simple_vectors.bin'), binary=True) result = merge_results(ext.transform(stims)) assert ('WordEmbeddingExtractor', 'embedding_dim99') in result.columns assert 0.001091 in result[('WordEmbeddingExtractor', 'embedding_dim0')]
def test_part_of_speech_extractor(): import nltk nltk.download('tagsets') stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt')) result = merge_results(PartOfSpeechExtractor().transform(stim), extractor_names=False) assert result.shape == (4, 52) assert result['NN'].sum() == 1 assert result['VBD'][3] == 1
def test_tfhub_image_reshape(): stim = ImageStim(join(IMAGE_DIR, 'apple.jpg')) stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg')) ext = TFHubImageExtractor(MNET_URL, reshape_input=(224, 224, 3), features='feature_vector') df = merge_results(ext.transform([stim, stim2]), extractor_names=False) assert df.shape[0] == 2 assert all([len(v) == 1280 for v in df['feature_vector']])
def test_clarifai_api_extractor_batch(): image_dir = join(get_test_data_path(), 'image') stim = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) ext = ClarifaiAPIExtractor() results = ext.transform([stim, stim2]) results = merge_results(results) assert results['ClarifaiAPIExtractor#apple'][0] > 0.5 or \ results['ClarifaiAPIExtractor#apple'][1] > 0.5
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIExtractor() images = [ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIExtractor#apple' in results.columns assert results.shape == (1, 29) # not 2 cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_clarifai_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = ClarifaiAPIImageExtractor() images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'ClarifaiAPIImageExtractor#apple' in results.columns assert results.shape == (2, 49) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_part_of_speech_extractor(): import nltk nltk.download('tagsets') stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt')) result = merge_results(PartOfSpeechExtractor().transform(stim), format='wide', extractor_names=False) assert result.shape == (4, 54) assert result['NN'].sum() == 1 result = result.sort_values('onset') assert result['VBD'].iloc[3] == 1
def test_tensor_flow_inception_v3_extractor(): image_dir = join(get_test_data_path(), 'image') imgs = [join(image_dir, f) for f in ['apple.jpg', 'obama.jpg']] imgs = [ImageStim(im) for im in imgs] ext = TensorFlowInceptionV3Extractor() results = ext.transform(imgs) df = merge_results(results) assert len(df) == 2 assert df.iloc[0][('TensorFlowInceptionV3Extractor', 'label_1')] == 'Granny Smith' assert df.iloc[1][('TensorFlowInceptionV3Extractor', 'score_2')] == '0.17326'
def test_tfhub_text_transformer_tokens(): cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt')) tkn_ext = TFHubTextExtractor(ELECTRA_URL, features='token_encodings', output_key='sequence_output', preprocessor_url_or_path=TOKENIZER_URL) tkn_df = merge_results(tkn_ext.transform(cstim.elements[:3]), extractor_names=False) assert all([tkn_df['token_encodings'][i].shape == (128, 256) \ for i in range(tkn_df.shape[0])])
def test_tfhub_text_one_feature(): stim = TextStim(join(TEXT_DIR, 'scandal.txt')) cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt')) ext = TFHubTextExtractor(GNEWS_URL, output_key=None, features='embedding') df = merge_results(ext.transform(cstim), extractor_names=False) assert df.shape[0] == len(cstim.elements) true = hub.KerasLayer(GNEWS_URL)([cstim.elements[3].text])[0, 2].numpy() assert np.isclose(df['embedding'][3][2], true) with pytest.raises(ValueError) as err: TFHubTextExtractor(GNEWS_URL, output_key='key').transform(stim) assert 'not a dictionary' in str(err.value)
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg'))] * 2 with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (1, 15) # not 2 rows cause all the same instance config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_tensor_flow_inception_v3_extractor(): imgs = [join(IMAGE_DIR, f) for f in ['apple.jpg', 'obama.jpg']] imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs] ext = TensorFlowInceptionV3Extractor() results = ext.transform(imgs) df = merge_results(results, format='wide', extractor_names='multi') assert len(df) == 2 assert ('TensorFlowInceptionV3Extractor', 'Granny Smith') in df.columns assert 0.22610 in df[ ('TensorFlowInceptionV3Extractor', 'Windsor tie')].values assert 4.2 in df[('onset', np.nan)].values assert 1 in df[('duration', np.nan)].values
def test_indico_api_extractor_large(): default = config.get_option('allow_large_jobs') default_large = config.get_option('large_job') config.set_option('allow_large_jobs', False) config.set_option('large_job', 1) ext = IndicoAPIImageExtractor(models=['fer']) images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')), ImageStim(join(IMAGE_DIR, 'obama.jpg'))] with pytest.raises(ValueError): merge_results(ext.transform(images)) config.set_option('allow_large_jobs', True) results = merge_results(ext.transform(images)) assert 'IndicoAPIImageExtractor#fer_Neutral' in results.columns assert results.shape == (2, 15) config.set_option('allow_large_jobs', default) config.set_option('large_job', default_large)
def test_merge_extractor_results(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de_names = ['Extractor1', 'Extractor2', 'Extractor3'] des = [DummyExtractor(name=name) for name in de_names] results = [de.transform(stim1) for de in des] results += [de.transform(stim2) for de in des] df = merge_results(results, format='wide') assert df.shape == (200, 18) cols = [ 'onset', 'duration', 'order', 'class', 'filename', 'history', 'stim_name', 'source_file' ] assert not set(cols) - set(df.columns) assert 'Extractor2#feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='drop') assert df.shape == (200, 12) assert not set(cols) - set(df.columns) assert 'feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='multi') assert df.shape == (200, 18) _cols = [(c, np.nan) for c in cols] assert not set(_cols) - set(df.columns) assert ('Extractor2', 'feature_3') in df.columns with pytest.raises(ValueError): merge_results(results, format='long', extractor_names='multi') df = merge_results(results, format='long', extractor_names='column') assert df.shape == (1800, 12) _cols = cols + ['feature', 'extractor', 'value'] assert not set(_cols) - set(df.columns) row = df.iloc[523, :] assert row['feature'] == 'feature_2' assert row['value'] == 475 assert row['extractor'] == 'Extractor1' df = merge_results(results, format='long', extractor_names='drop') assert df.shape == (1800, 11) assert set(_cols) - set(df.columns) == {'extractor'} df = merge_results(results, format='long', extractor_names='prepend') assert df.shape == (1800, 11) row = df.iloc[523, :] assert row['feature'] == 'Extractor1#feature_2'
def test_merge_extractor_results(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de_names = ['Extractor1', 'Extractor2', 'Extractor3'] des = [DummyExtractor(name=name) for name in de_names] results = [de.transform(stim1) for de in des] results += [de.transform(stim2) for de in des] df = merge_results(results, format='wide') assert df.shape == (200, 18) cols = ['onset', 'duration', 'order', 'class', 'filename', 'history', 'stim_name', 'source_file'] assert not set(cols) - set(df.columns) assert 'Extractor2#feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='drop') assert df.shape == (200, 12) assert not set(cols) - set(df.columns) assert 'feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='multi') assert df.shape == (200, 18) _cols = [(c, np.nan) for c in cols] assert not set(_cols) - set(df.columns) assert ('Extractor2', 'feature_3') in df.columns with pytest.raises(ValueError): merge_results(results, format='long', extractor_names='multi') df = merge_results(results, format='long', extractor_names='column') assert df.shape == (1800, 12) _cols = cols + ['feature', 'extractor', 'value'] assert not set(_cols) - set(df.columns) row = df.iloc[523, :] assert row['feature'] == 'feature_2' assert row['value'] == 475 assert row['extractor'] == 'Extractor1' df = merge_results(results, format='long', extractor_names='drop') assert df.shape == (1800, 11) assert set(_cols) - set(df.columns) == {'extractor'} df = merge_results(results, format='long', extractor_names='prepend') assert df.shape == (1800, 11) row = df.iloc[523, :] assert row['feature'] == 'Extractor1#feature_2'
def test_tensor_flow_inception_v3_extractor(): image_dir = join(get_test_data_path(), 'image') imgs = [join(image_dir, f) for f in ['apple.jpg', 'obama.jpg']] imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs] ext = TensorFlowInceptionV3Extractor() results = ext.transform(imgs) df = merge_results(results) assert len(df) == 2 assert 'Granny Smith' in df[('TensorFlowInceptionV3Extractor', 'label_1')].values assert '0.22610' in df[('TensorFlowInceptionV3Extractor', 'score_2')].values assert 4.2 in df[('onset', '')].values assert 1 in df[('duration', '')].values
def test_tensorflow_keras_inception_v3_extractor(): imgs = [join(IMAGE_DIR, f) for f in ['apple.jpg', 'obama.jpg']] imgs = [ImageStim(im, onset=4.2, duration=1) for im in imgs] ext = TensorFlowKerasInceptionV3Extractor() results = ext.transform(imgs) df = merge_results(results, format='wide', extractor_names='multi') assert df.shape == (2, 19) true = 0.9737075 pred = df['TensorFlowKerasInceptionV3Extractor'].loc[0, 'Granny_Smith'] np.isclose(true, pred, 1e-05) true = 0.64234024 pred = df['TensorFlowKerasInceptionV3Extractor'].loc[1, 'Windsor_tie'] np.isclose(true, pred, 1e-05) assert 4.2 in df[('onset', np.nan)].values assert 1 in df[('duration', np.nan)].values
def test_merge_extractor_results(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de = DummyExtractor() de_names = ['Extractor1', 'Extractor2', 'Extractor3'] results = [de.transform(stim1, name) for name in de_names] results += [de.transform(stim2, name) for name in de_names] df = merge_results(results) assert df.shape == (355, 14) cols = ['onset', 'class', 'filename', 'history', 'stim'] assert df.columns.levels[0].unique().tolist() == de_names + cols assert df.columns.levels[1].unique().tolist() == ['duration', 0, 1, 2, ''] assert set(df['stim'].unique()) == set(['obama.jpg', 'apple.jpg'])
def run(self, stim, merge=True, **merge_kwargs): ''' Executes the graph by calling all Transformers in sequence. Args: stim (str, Stim, list): One or more valid inputs to any Transformer's 'transform' call. merge (bool): If True, all results are merged into a single pandas DataFrame before being returned. If False, a list of ExtractorResult objects is returned (one per Extractor/Stim combination). merge_kwargs: Optional keyword arguments to pass onto the merge_results() call. ''' results = list(chain(*[self.run_node(n, stim) for n in self.roots])) results = list(flatten(results)) self._results = results # For use in plotting return merge_results(results, **merge_kwargs) if merge else results
def test_merge_extractor_results_flattened(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de_names = ['Extractor1', 'Extractor2', 'Extractor3'] des = [DummyExtractor(name=name) for name in de_names] results = [de.transform(stim1) for de in des] results += [de.transform(stim2) for de in des] df = merge_results(results, flatten_columns=True) de_cols = ['Extractor1_0', 'Extractor1_1', 'Extractor1_2', 'Extractor2_0', 'Extractor2_1', 'Extractor2_2', 'Extractor3_0', 'Extractor3_1', 'Extractor3_2'] assert df.shape == (354, 16) cols = ['onset', 'class', 'filename', 'history', 'stim_name', 'duration', 'source_file'] assert set(df.columns.unique().tolist()) == set(cols + de_cols)
def test_tfhub_text_transformer_sentence(): stim = TextStim(join(TEXT_DIR, 'scandal.txt')) cstim = ComplexTextStim(join(TEXT_DIR, 'wonderful.txt')) ext = TFHubTextExtractor(ELECTRA_URL, features='sent_encoding', preprocessor_url_or_path=TOKENIZER_URL) res = ext.transform(cstim.elements[:6]) df = merge_results(res, extractor_names=False) pmod = hub.KerasLayer(TOKENIZER_URL) mmod = hub.KerasLayer(ELECTRA_URL) true = mmod(pmod([cstim.elements[5].text]))\ ['pooled_output'][0,20].numpy() assert np.isclose(df['sent_encoding'][5][20], true) with pytest.raises(ValueError) as err: TFHubTextExtractor(ELECTRA_URL, preprocessor_url_or_path=TOKENIZER_URL, output_key='key').transform(stim) assert 'Check which keys' in str(err.value)
def test_vectorizer_extractor(): pytest.importorskip('sklearn') stim = TextStim(join(TEXT_DIR, 'scandal.txt')) result = TextVectorizerExtractor().transform(stim).to_df() assert 'woman' in result.columns assert result['woman'][0] == 3 from sklearn.feature_extraction.text import TfidfVectorizer custom_vectorizer = TfidfVectorizer() ext = TextVectorizerExtractor(vectorizer=custom_vectorizer) stim2 = TextStim(join(TEXT_DIR, 'simple_text.txt')) result = merge_results(ext.transform([stim, stim2])) assert ('TextVectorizerExtractor', 'woman') in result.columns assert 0.129568189476 in result[('TextVectorizerExtractor', 'woman')] ext = TextVectorizerExtractor(vectorizer='CountVectorizer', analyzer='char_wb', ngram_range=(2, 2)) result = ext.transform(stim).to_df() assert 'wo' in result.columns assert result['wo'][0] == 6
def test_indico_api_text_extractor(): ext = IndicoAPITextExtractor(api_key=os.environ['INDICO_APP_KEY'], models=['emotion', 'personality']) assert ext.validate_keys() # With ComplexTextStim input srtfile = join(get_test_data_path(), 'text', 'wonderful.srt') srt_stim = ComplexTextStim(srtfile, onset=4.2) result = merge_results(ext.transform(srt_stim), extractor_names=False) outdfKeysCheck = { 'onset', 'duration', 'order', 'object_id', 'emotion_anger', 'emotion_fear', 'emotion_joy', 'emotion_sadness', 'emotion_surprise', 'personality_openness', 'personality_extraversion', 'personality_agreeableness', 'personality_conscientiousness'} meta_columns = {'source_file', 'history', 'class', 'filename'} assert set(result.columns) - set(['stim_name']) == outdfKeysCheck | meta_columns assert result['onset'][1] == 92.622 # With TextStim input ts = TextStim(text="It's a wonderful life.") result = ext.transform(ts).to_df(object_id=True) assert set(result.columns) == outdfKeysCheck assert len(result) == 1 ext = IndicoAPITextExtractor(api_key='nogood', models=['language']) assert not ext.validate_keys()
def test_vectorizer_extractor(): pytest.importorskip('sklearn') stim = TextStim(join(TEXT_DIR, 'scandal.txt')) result = TextVectorizerExtractor().transform(stim).to_df() assert 'woman' in result.columns assert result['woman'][0] == 3 from sklearn.feature_extraction.text import TfidfVectorizer custom_vectorizer = TfidfVectorizer() ext = TextVectorizerExtractor(vectorizer=custom_vectorizer) stim2 = TextStim(join(TEXT_DIR, 'simple_text.txt')) result = merge_results(ext.transform([stim, stim2]), format='wide', extractor_names='multi') assert ('TextVectorizerExtractor', 'woman') in result.columns assert np.allclose(0.129568189476, result[('TextVectorizerExtractor', 'woman')][0]) ext = TextVectorizerExtractor(vectorizer='CountVectorizer', analyzer='char_wb', ngram_range=(2, 2)) result = ext.transform(stim).to_df() assert 'wo' in result.columns assert result['wo'][0] == 6
def test_merge_extractor_results(): np.random.seed(100) image_dir = join(get_test_data_path(), 'image') stim1 = ImageStim(join(image_dir, 'apple.jpg')) stim2 = ImageStim(join(image_dir, 'obama.jpg')) de_names = ['Extractor1', 'Extractor2', 'Extractor3'] des = [DummyExtractor(name=name) for name in de_names] not_features = ['object_id'] for de in des: not_features.append(de._log_attributes) results = [de.transform(stim1) for de in des] results += [de.transform(stim2) for de in des] df = merge_results(results, format='wide') assert df.shape == (200, 18) cols = [ 'onset', 'duration', 'order', 'class', 'filename', 'history', 'stim_name', 'source_file' ] assert not set(cols) - set(df.columns) assert 'Extractor2#feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='drop') assert df.shape == (200, 12) assert not set(cols) - set(df.columns) assert 'feature_3' in df.columns df = merge_results(results, format='wide', extractor_names='multi') assert df.shape == (200, 18) _cols = [(c, np.nan) for c in cols] assert not set(_cols) - set(df.columns) assert ('Extractor2', 'feature_3') in df.columns with pytest.raises(ValueError): merge_results(results, format='long', extractor_names='multi') df = merge_results(results, format='long', extractor_names='column') assert df.shape == (1800, 12) _cols = cols + ['feature', 'extractor', 'value'] assert not set(_cols) - set(df.columns) row = df.iloc[523, :] assert row['feature'] == 'feature_2' assert row['value'] == 475 assert row['extractor'] == 'Extractor1' assert not set(not_features).intersection(set(df['feature'])) df = merge_results(results, format='long', extractor_names='drop') assert df.shape == (1800, 11) assert set(_cols) - set(df.columns) == {'extractor'} assert not set(not_features).intersection(set(df['feature'])) df = merge_results(results, format='long', extractor_names='prepend') assert df.shape == (1800, 11) row = df.iloc[523, :] assert row['feature'] == 'Extractor1#feature_2' assert not set(not_features).intersection(set(df['feature'])) df = merge_results(results, format='wide', extractor_params=True) logattr = {} for de in des: logattr[ de. name] = de._log_attributes #stores log attributes to be found for each extractor for feat in ['feature_1', 'feature_2', 'feature_3']: idx_str = f'{de.name}#{feat}#extractor_params' assert idx_str in df.columns df_log_attr = json.loads(df[idx_str][0]) for l in logattr[de.name]: assert l in df_log_attr.keys() df = merge_results(results, format='long', extractor_params=True) for idx, row in df.iterrows(): de_name = row['feature'].split('#')[0] logs = logattr[de_name] df_logs = row['extractor_params'] for l in logs: assert l in json.loads(df_logs).keys()
def run(self, stim, merge=True): results = list(chain(*[self.run_node(n, stim) for n in self.roots])) results = list(flatten(results)) self._results = results # For use in plotting return merge_results(results) if merge else results