def test_big_pipeline(): pytest.importorskip('pygraphviz') filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) visual_nodes = [(FrameSamplingFilter(every=15), [ (TesseractConverter(), [LengthExtractor()]), VibranceExtractor(), 'BrightnessExtractor', ])] audio_nodes = [(VideoToAudioConverter(), [WitTranscriptionConverter(), 'LengthExtractor'], 'video_to_audio')] graph = Graph() graph.add_nodes(visual_nodes) graph.add_nodes(audio_nodes) results = graph.run(video, merge=False) result = merge_results(results, format='wide', extractor_names='multi') # Test that pygraphviz outputs a file drawfile = next(tempfile._get_candidate_names()) graph.draw(drawfile) assert exists(drawfile) os.remove(drawfile) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim_name'].values assert 'frame[90]' in result['stim_name'].values
def test_small_pipeline(): pytest.importorskip('pytesseract') filename = join(get_test_data_path(), 'image', 'button.jpg') stim = ImageStim(filename) nodes = [(TesseractConverter(), [LengthExtractor()])] graph = Graph(nodes) result = list(graph.run([stim], merge=False)) history = result[0].history.to_df() assert history.shape == (2, 8) assert history.iloc[0]['result_class'] == 'TextStim' result = merge_results(result, format='wide', extractor_names='prepend') assert (0, 'text[Exit]') in result['stim_name'].values assert 'LengthExtractor#text_length' in result.columns assert result['LengthExtractor#text_length'].values[0] == 4
def test_small_pipeline2(): filename = join(get_test_data_path(), 'image', 'button.jpg') nodes = [BrightnessExtractor(), VibranceExtractor()] graph = Graph(nodes) result = list(graph.run([filename], merge=False)) history = result[0].history.to_df() assert history.shape == (1, 8) result = merge_results(result, format='wide', extractor_names='multi') assert ('BrightnessExtractor', 'brightness') in result.columns brightness = result[('BrightnessExtractor', 'brightness')].values[0] vibrance = result[('VibranceExtractor', 'vibrance')].values[0] assert_almost_equal(brightness, 0.746965, 5) assert ('VibranceExtractor', 'vibrance') in result.columns assert_almost_equal(vibrance, 841.577274, 5)
def test_small_pipeline_json_spec2(): pytest.importorskip('pytesseract') filename = join(get_test_data_path(), 'image', 'button.jpg') stim = ImageStim(filename) spec = join(get_test_data_path(), 'graph', 'simple_graph.json') graph = Graph(spec=spec) result = list(graph.run([stim], merge=False)) history = result[0].history.to_df() assert history.shape == (2, 8) assert history.iloc[0]['result_class'] == 'TextStim' result = merge_results(result, format='wide', extractor_names='multi') assert (0, 'text[Exit]') in result['stim_name'].values assert ('LengthExtractor', 'text_length') in result.columns assert result[('LengthExtractor', 'text_length')].values[0] == 4
def test_save_graph(): graph = Graph( spec=join(get_test_data_path(), 'graph', 'simple_graph.json')) filename = tempfile.mkstemp()[1] graph.save(filename) assert os.path.exists(filename) same_graph = Graph(spec=filename) os.remove(filename) assert graph.to_json() == same_graph.to_json() img = join(get_test_data_path(), 'image', 'button.jpg') res = same_graph.run(img) assert res['LengthExtractor#text_length'][0] == 4
def test_big_pipeline_json(): pytest.importorskip('pygraphviz') filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) nodes = { "roots": [{ "transformer": "FrameSamplingFilter", "parameters": { "every": 15 }, "children": [{ "transformer": "TesseractConverter", "children": [{ "transformer": "LengthExtractor" }] }, { "transformer": "VibranceExtractor" }, { "transformer": "BrightnessExtractor" }] }, { "transformer": "VideoToAudioConverter", "children": [{ "transformer": "WitTranscriptionConverter", "children": [{ "transformer": "LengthExtractor" }] }] }] } graph = Graph(nodes) results = graph.run(video, merge=False) result = merge_results(results, format='wide', extractor_names='multi') # Test that pygraphviz outputs a file drawfile = next(tempfile._get_candidate_names()) graph.draw(drawfile) assert exists(drawfile) os.remove(drawfile) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim_name'].values assert 'frame[90]' in result['stim_name'].values
def test_node_arg_parsing(): n1, n2 = 'MyLovelyExtractor', ['MyLovelyExtractor'] args1 = Graph._parse_node_args(n1) args2 = Graph._parse_node_args(n2) assert args1 == args2 == {'transformer': 'MyLovelyExtractor'} node = ('saliencyextractor', []) args = Graph._parse_node_args(node) assert set(args.keys()) == {'transformer', 'children'} node = ('saliencyextractor', [('child1'), ('child2')], 'my_name') args = Graph._parse_node_args(node) assert set(args.keys()) == {'transformer', 'name', 'children'} assert len(args['children']) == 2 node = {'transformer': '...', 'name': '...'} args = Graph._parse_node_args(node) assert args == node
def test_multiple_text_filters(): stim = TextStim(text='testing the filtering features') filt1 = TokenizingFilter() filt2 = WordStemmingFilter() stemmed_tokens = filt2.transform(filt1.transform(stim)) full_text = ' '.join([s.text for s in stemmed_tokens]) assert full_text == 'test the filter featur' stim = TextStim(text='ARTICLE ONE: Rights') g = Graph() g.add_node(LowerCasingFilter()) filt1 = LowerCasingFilter() filt2 = PunctuationRemovalFilter() filt3 = TokenizingFilter() final_texts = filt3.transform(filt2.transform(filt1.transform(stim))) assert len(final_texts) == 3 assert final_texts[0].text == 'article' assert final_texts[0].order == 0 assert final_texts[1].text == 'one' assert final_texts[2].text == 'rights' assert final_texts[2].order == 2
def test_small_pipeline_json_spec(): pytest.importorskip('pytesseract') filename = join(get_test_data_path(), 'image', 'button.jpg') stim = ImageStim(filename) nodes = { "roots": [{ "transformer": "TesseractConverter", "children": [{ "transformer": "LengthExtractor", "children": [] }] }] } graph = Graph(nodes) result = list(graph.run([stim], merge=False)) history = result[0].history.to_df() assert history.shape == (2, 8) assert history.iloc[0]['result_class'] == 'TextStim' result = merge_results(result, format='wide', extractor_names='multi') assert (0, 'text[Exit]') in result['stim_name'].values assert ('LengthExtractor', 'text_length') in result.columns assert result[('LengthExtractor', 'text_length')].values[0] == 4
def test_graph_scikit(): pytest.importorskip('pytesseract') pytest.importorskip('sklearn') image_dir = join(get_test_data_path(), 'image') stim1 = join(image_dir, 'apple.jpg') stim2 = join(image_dir, 'button.jpg') graph_spec = join(get_test_data_path(), 'graph', 'simple_graph.json') graph = Graph(spec=graph_spec) trans = PliersTransformer(graph) res = trans.fit_transform([stim1, stim2]) assert res.shape == (2, 1) assert res[0][0] == 4 or res[1][0] == 4 meta = trans.metadata_ assert 'history' in meta.columns assert meta['history'][1] == 'ImageStim->TesseractConverter/TextStim'
def test_within_pipeline(): pytest.importorskip('cv2') pytest.importorskip('sklearn') from sklearn.pipeline import Pipeline from sklearn.preprocessing import Normalizer stim = join(get_test_data_path(), 'image', 'apple.jpg') graph = Graph([BrightnessExtractor(), SharpnessExtractor()]) trans = PliersTransformer(graph) normalizer = Normalizer() pipeline = Pipeline([('pliers', trans), ('normalizer', normalizer)]) res = pipeline.fit_transform(stim) assert res.shape == (1, 2) assert np.isclose(res[0][0], 0.66393, 1e-5) assert np.isclose(res[0][1], 0.74780, 1e-5) meta = trans.metadata_ assert 'onset' in meta.columns assert meta['class'][0] == 'ImageStim'
def test_to_json(): nodes = { "roots": [{ "transformer": "FrameSamplingFilter", "parameters": { "every": 15 }, "children": [{ "transformer": "TesseractConverter", "children": [{ "transformer": "LengthExtractor" }] }, { "transformer": "VibranceExtractor" }, { "transformer": "BrightnessExtractor" }] }, { "transformer": "VideoToAudioConverter", "children": [{ "transformer": "WitTranscriptionConverter", "children": [{ "transformer": "LengthExtractor" }] }] }] } graph = Graph(nodes) assert graph.to_json() == nodes graph = Graph( spec=join(get_test_data_path(), 'graph', 'simple_graph.json')) simple_graph = { "roots": [{ "transformer": "TesseractConverter", "children": [{ "transformer": "LengthExtractor" }] }] } assert graph.to_json() == simple_graph filename = join(get_test_data_path(), 'image', 'button.jpg') res = graph.run(filename) assert res['LengthExtractor#text_length'][0] == 4
def test_big_pipeline(): filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') video = VideoStim(filename) visual_nodes = [(FrameSamplingConverter(every=15), [ (TesseractConverter(), [LengthExtractor()]), VibranceExtractor(), 'BrightnessExtractor', ])] audio_nodes = [(VideoToAudioConverter(), [ WitTranscriptionConverter(), 'LengthExtractor'], 'video_to_audio')] graph = Graph() graph.add_nodes(visual_nodes) graph.add_nodes(audio_nodes) result = graph.run(video) assert ('LengthExtractor', 'text_length') in result.columns assert ('VibranceExtractor', 'vibrance') in result.columns # assert not result[('onset', '')].isnull().any() assert 'text[negotiations]' in result['stim'].values assert 'frame[90]' in result['stim'].values
``` from pliers.graph import Graph # Define nodes nodes = [ (FrameSamplingFilter(hertz=0.1), ['FaceRecognitionFaceLandmarksExtractor', 'BrightnessExtractor']), (STFTAudioExtractor(freq_bins=[(100, 300)])), ('RevAISpeechAPIConverter', [PredefinedDictionaryExtractor(['affect/V.Mean.Sum','subtlexusfrequency/Lg10WF']), 'VADERSentimentExtractor']) ] # Initialize and execute Graph g = Graph(nodes) # Arguments to merge_results can be passed in here df = g.transform(sherlock_video) ``` This short example demostrates a powerful way to express complex extraction workflows in *pliers*. If you'd like to learn more about the Graph API, you can head over to the [documentation](http://tyarkoni.github.io/pliers/graphs.html). # Where to go from here? *Pliers* is a powerful, yet easy to use Python library for multi-modal feature extraction. In this short tutorial, you've seen how to use *pliers* to modify, convert and extract features from stimuli. To see the full range of `Transformers` implemented in *pliers*, [refer to this listing](http://tyarkoni.github.io/pliers/transformers.html)
def test_stim_results(): stim = TextStim(text='some, example the text.') g = Graph() g.add_nodes( ['PunctuationRemovalFilter', 'TokenRemovalFilter', 'TokenizingFilter'], mode='vertical') final_stims = g.run(stim, merge=False) assert len(final_stims) == 2 assert final_stims[1].text == 'text' n = Node('PunctuationRemovalFilter', name='punc') g = Graph([n]) g.add_nodes(['TokenizingFilter', 'LengthExtractor'], parent=n) results = g.run(stim) assert isinstance(results, pd.DataFrame) assert results['LengthExtractor#text_length'][0] == 21 with pytest.raises(ValueError): g.run(stim, invalid_results='fail')
def test_add_children(): graph = Graph() de1, de2, de3 = DummyExtractor(), DummyExtractor(), DummyExtractor() graph.add_nodes([de1, de2, de3]) assert len(graph.roots) == 3 assert all([isinstance(c, Node) for c in graph.roots])
def test_adding_nodes(): graph = Graph() graph.add_children(['VibranceExtractor', 'BrightnessExtractor']) assert len(graph.roots) == 2 assert len(graph.nodes) == 2 for r in graph.roots: assert len(r.children) == 0 img = ImageStim(join(get_test_data_path(), 'image', 'button.jpg')) results = graph.run(img, merge=False) assert len(results) == 2 assert_almost_equal(results[0].to_df()['vibrance'][0], 841.577274, 5) assert_almost_equal(results[1].to_df()['brightness'][0], 0.746965, 5) graph = Graph() graph.add_chain(['PunctuationRemovalFilter', 'LengthExtractor']) txt = TextStim(text='the.best.text.') results = graph.run(txt, merge=False) assert len(results) == 1 assert results[0].to_df()['text_length'][0] == 11 with pytest.raises(ValueError): graph.add_nodes(['LengthExtractor'], mode='invalid')