Beispiel #1
0
def test_big_pipeline():
    pytest.importorskip('pygraphviz')
    filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
    video = VideoStim(filename)
    visual_nodes = [(FrameSamplingFilter(every=15), [
        (TesseractConverter(), [LengthExtractor()]),
        VibranceExtractor(),
        'BrightnessExtractor',
    ])]
    audio_nodes = [(VideoToAudioConverter(),
                    [WitTranscriptionConverter(),
                     'LengthExtractor'], 'video_to_audio')]
    graph = Graph()
    graph.add_nodes(visual_nodes)
    graph.add_nodes(audio_nodes)
    results = graph.run(video, merge=False)
    result = merge_results(results, format='wide', extractor_names='multi')
    # Test that pygraphviz outputs a file
    drawfile = next(tempfile._get_candidate_names())
    graph.draw(drawfile)
    assert exists(drawfile)
    os.remove(drawfile)
    assert ('LengthExtractor', 'text_length') in result.columns
    assert ('VibranceExtractor', 'vibrance') in result.columns
    # assert not result[('onset', '')].isnull().any()
    assert 'text[negotiations]' in result['stim_name'].values
    assert 'frame[90]' in result['stim_name'].values
Beispiel #2
0
def test_small_pipeline():
    pytest.importorskip('pytesseract')
    filename = join(get_test_data_path(), 'image', 'button.jpg')
    stim = ImageStim(filename)
    nodes = [(TesseractConverter(), [LengthExtractor()])]
    graph = Graph(nodes)
    result = list(graph.run([stim], merge=False))
    history = result[0].history.to_df()
    assert history.shape == (2, 8)
    assert history.iloc[0]['result_class'] == 'TextStim'
    result = merge_results(result, format='wide', extractor_names='prepend')
    assert (0, 'text[Exit]') in result['stim_name'].values
    assert 'LengthExtractor#text_length' in result.columns
    assert result['LengthExtractor#text_length'].values[0] == 4
Beispiel #3
0
def test_small_pipeline2():
    filename = join(get_test_data_path(), 'image', 'button.jpg')
    nodes = [BrightnessExtractor(), VibranceExtractor()]
    graph = Graph(nodes)
    result = list(graph.run([filename], merge=False))
    history = result[0].history.to_df()
    assert history.shape == (1, 8)
    result = merge_results(result, format='wide', extractor_names='multi')
    assert ('BrightnessExtractor', 'brightness') in result.columns
    brightness = result[('BrightnessExtractor', 'brightness')].values[0]
    vibrance = result[('VibranceExtractor', 'vibrance')].values[0]
    assert_almost_equal(brightness, 0.746965, 5)
    assert ('VibranceExtractor', 'vibrance') in result.columns
    assert_almost_equal(vibrance, 841.577274, 5)
Beispiel #4
0
def test_small_pipeline_json_spec2():
    pytest.importorskip('pytesseract')
    filename = join(get_test_data_path(), 'image', 'button.jpg')
    stim = ImageStim(filename)
    spec = join(get_test_data_path(), 'graph', 'simple_graph.json')
    graph = Graph(spec=spec)
    result = list(graph.run([stim], merge=False))
    history = result[0].history.to_df()
    assert history.shape == (2, 8)
    assert history.iloc[0]['result_class'] == 'TextStim'
    result = merge_results(result, format='wide', extractor_names='multi')
    assert (0, 'text[Exit]') in result['stim_name'].values
    assert ('LengthExtractor', 'text_length') in result.columns
    assert result[('LengthExtractor', 'text_length')].values[0] == 4
Beispiel #5
0
def test_save_graph():
    graph = Graph(
        spec=join(get_test_data_path(), 'graph', 'simple_graph.json'))
    filename = tempfile.mkstemp()[1]
    graph.save(filename)
    assert os.path.exists(filename)
    same_graph = Graph(spec=filename)
    os.remove(filename)
    assert graph.to_json() == same_graph.to_json()
    img = join(get_test_data_path(), 'image', 'button.jpg')
    res = same_graph.run(img)
    assert res['LengthExtractor#text_length'][0] == 4
Beispiel #6
0
def test_big_pipeline_json():
    pytest.importorskip('pygraphviz')
    filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
    video = VideoStim(filename)
    nodes = {
        "roots": [{
            "transformer":
            "FrameSamplingFilter",
            "parameters": {
                "every": 15
            },
            "children": [{
                "transformer": "TesseractConverter",
                "children": [{
                    "transformer": "LengthExtractor"
                }]
            }, {
                "transformer": "VibranceExtractor"
            }, {
                "transformer": "BrightnessExtractor"
            }]
        }, {
            "transformer":
            "VideoToAudioConverter",
            "children": [{
                "transformer": "WitTranscriptionConverter",
                "children": [{
                    "transformer": "LengthExtractor"
                }]
            }]
        }]
    }
    graph = Graph(nodes)
    results = graph.run(video, merge=False)
    result = merge_results(results, format='wide', extractor_names='multi')
    # Test that pygraphviz outputs a file
    drawfile = next(tempfile._get_candidate_names())
    graph.draw(drawfile)
    assert exists(drawfile)
    os.remove(drawfile)
    assert ('LengthExtractor', 'text_length') in result.columns
    assert ('VibranceExtractor', 'vibrance') in result.columns
    # assert not result[('onset', '')].isnull().any()
    assert 'text[negotiations]' in result['stim_name'].values
    assert 'frame[90]' in result['stim_name'].values
Beispiel #7
0
def test_node_arg_parsing():
    n1, n2 = 'MyLovelyExtractor', ['MyLovelyExtractor']
    args1 = Graph._parse_node_args(n1)
    args2 = Graph._parse_node_args(n2)
    assert args1 == args2 == {'transformer': 'MyLovelyExtractor'}

    node = ('saliencyextractor', [])
    args = Graph._parse_node_args(node)
    assert set(args.keys()) == {'transformer', 'children'}

    node = ('saliencyextractor', [('child1'), ('child2')], 'my_name')
    args = Graph._parse_node_args(node)
    assert set(args.keys()) == {'transformer', 'name', 'children'}
    assert len(args['children']) == 2

    node = {'transformer': '...', 'name': '...'}
    args = Graph._parse_node_args(node)
    assert args == node
Beispiel #8
0
def test_multiple_text_filters():
    stim = TextStim(text='testing the filtering features')
    filt1 = TokenizingFilter()
    filt2 = WordStemmingFilter()
    stemmed_tokens = filt2.transform(filt1.transform(stim))
    full_text = ' '.join([s.text for s in stemmed_tokens])
    assert full_text == 'test the filter featur'

    stim = TextStim(text='ARTICLE ONE: Rights')
    g = Graph()
    g.add_node(LowerCasingFilter())
    filt1 = LowerCasingFilter()
    filt2 = PunctuationRemovalFilter()
    filt3 = TokenizingFilter()
    final_texts = filt3.transform(filt2.transform(filt1.transform(stim)))
    assert len(final_texts) == 3
    assert final_texts[0].text == 'article'
    assert final_texts[0].order == 0
    assert final_texts[1].text == 'one'
    assert final_texts[2].text == 'rights'
    assert final_texts[2].order == 2
Beispiel #9
0
def test_small_pipeline_json_spec():
    pytest.importorskip('pytesseract')
    filename = join(get_test_data_path(), 'image', 'button.jpg')
    stim = ImageStim(filename)
    nodes = {
        "roots": [{
            "transformer":
            "TesseractConverter",
            "children": [{
                "transformer": "LengthExtractor",
                "children": []
            }]
        }]
    }
    graph = Graph(nodes)
    result = list(graph.run([stim], merge=False))
    history = result[0].history.to_df()
    assert history.shape == (2, 8)
    assert history.iloc[0]['result_class'] == 'TextStim'
    result = merge_results(result, format='wide', extractor_names='multi')
    assert (0, 'text[Exit]') in result['stim_name'].values
    assert ('LengthExtractor', 'text_length') in result.columns
    assert result[('LengthExtractor', 'text_length')].values[0] == 4
Beispiel #10
0
def test_graph_scikit():
    pytest.importorskip('pytesseract')
    pytest.importorskip('sklearn')
    image_dir = join(get_test_data_path(), 'image')
    stim1 = join(image_dir, 'apple.jpg')
    stim2 = join(image_dir, 'button.jpg')
    graph_spec = join(get_test_data_path(), 'graph', 'simple_graph.json')
    graph = Graph(spec=graph_spec)
    trans = PliersTransformer(graph)
    res = trans.fit_transform([stim1, stim2])
    assert res.shape == (2, 1)
    assert res[0][0] == 4 or res[1][0] == 4
    meta = trans.metadata_
    assert 'history' in meta.columns
    assert meta['history'][1] == 'ImageStim->TesseractConverter/TextStim'
Beispiel #11
0
def test_within_pipeline():
    pytest.importorskip('cv2')
    pytest.importorskip('sklearn')
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import Normalizer
    stim = join(get_test_data_path(), 'image', 'apple.jpg')
    graph = Graph([BrightnessExtractor(), SharpnessExtractor()])
    trans = PliersTransformer(graph)
    normalizer = Normalizer()
    pipeline = Pipeline([('pliers', trans), ('normalizer', normalizer)])
    res = pipeline.fit_transform(stim)
    assert res.shape == (1, 2)
    assert np.isclose(res[0][0], 0.66393, 1e-5)
    assert np.isclose(res[0][1], 0.74780, 1e-5)
    meta = trans.metadata_
    assert 'onset' in meta.columns
    assert meta['class'][0] == 'ImageStim'
Beispiel #12
0
def test_to_json():
    nodes = {
        "roots": [{
            "transformer":
            "FrameSamplingFilter",
            "parameters": {
                "every": 15
            },
            "children": [{
                "transformer": "TesseractConverter",
                "children": [{
                    "transformer": "LengthExtractor"
                }]
            }, {
                "transformer": "VibranceExtractor"
            }, {
                "transformer": "BrightnessExtractor"
            }]
        }, {
            "transformer":
            "VideoToAudioConverter",
            "children": [{
                "transformer": "WitTranscriptionConverter",
                "children": [{
                    "transformer": "LengthExtractor"
                }]
            }]
        }]
    }
    graph = Graph(nodes)
    assert graph.to_json() == nodes
    graph = Graph(
        spec=join(get_test_data_path(), 'graph', 'simple_graph.json'))
    simple_graph = {
        "roots": [{
            "transformer": "TesseractConverter",
            "children": [{
                "transformer": "LengthExtractor"
            }]
        }]
    }
    assert graph.to_json() == simple_graph
    filename = join(get_test_data_path(), 'image', 'button.jpg')
    res = graph.run(filename)
    assert res['LengthExtractor#text_length'][0] == 4
Beispiel #13
0
def test_big_pipeline():
    filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
    video = VideoStim(filename)
    visual_nodes = [(FrameSamplingConverter(every=15), [
                        (TesseractConverter(), [LengthExtractor()]),
                        VibranceExtractor(), 'BrightnessExtractor',
                    ])]
    audio_nodes = [(VideoToAudioConverter(), [
                        WitTranscriptionConverter(), 'LengthExtractor'],
                        'video_to_audio')]
    graph = Graph()
    graph.add_nodes(visual_nodes)
    graph.add_nodes(audio_nodes)
    result = graph.run(video)
    assert ('LengthExtractor', 'text_length') in result.columns
    assert ('VibranceExtractor', 'vibrance') in result.columns
    # assert not result[('onset', '')].isnull().any()
    assert 'text[negotiations]' in result['stim'].values
    assert 'frame[90]' in result['stim'].values
Beispiel #14
0
```
from pliers.graph import Graph

# Define nodes
nodes = [
    (FrameSamplingFilter(hertz=0.1),
         ['FaceRecognitionFaceLandmarksExtractor', 'BrightnessExtractor']),
    (STFTAudioExtractor(freq_bins=[(100, 300)])),
    ('RevAISpeechAPIConverter',
         [PredefinedDictionaryExtractor(['affect/V.Mean.Sum','subtlexusfrequency/Lg10WF']),
          'VADERSentimentExtractor'])
]

# Initialize and execute Graph
g = Graph(nodes)

# Arguments to merge_results can be passed in here
df = g.transform(sherlock_video)
```

This short example demostrates a powerful way to express complex extraction workflows in *pliers*.

If you'd like to learn more about the Graph API, you can head over to the [documentation](http://tyarkoni.github.io/pliers/graphs.html).

# Where to go from here?

*Pliers* is a powerful, yet easy to use Python library for multi-modal feature extraction. In this short tutorial, you've seen how to use *pliers* to modify, convert and extract features from stimuli. 

To see the full range of `Transformers` implemented in *pliers*, [refer to this listing](http://tyarkoni.github.io/pliers/transformers.html) 
Beispiel #15
0
def test_stim_results():
    stim = TextStim(text='some, example the text.')
    g = Graph()
    g.add_nodes(
        ['PunctuationRemovalFilter', 'TokenRemovalFilter', 'TokenizingFilter'],
        mode='vertical')
    final_stims = g.run(stim, merge=False)
    assert len(final_stims) == 2
    assert final_stims[1].text == 'text'

    n = Node('PunctuationRemovalFilter', name='punc')
    g = Graph([n])
    g.add_nodes(['TokenizingFilter', 'LengthExtractor'], parent=n)
    results = g.run(stim)
    assert isinstance(results, pd.DataFrame)
    assert results['LengthExtractor#text_length'][0] == 21
    with pytest.raises(ValueError):
        g.run(stim, invalid_results='fail')
Beispiel #16
0
def test_add_children():
    graph = Graph()
    de1, de2, de3 = DummyExtractor(), DummyExtractor(), DummyExtractor()
    graph.add_nodes([de1, de2, de3])
    assert len(graph.roots) == 3
    assert all([isinstance(c, Node) for c in graph.roots])
Beispiel #17
0
def test_adding_nodes():
    graph = Graph()
    graph.add_children(['VibranceExtractor', 'BrightnessExtractor'])
    assert len(graph.roots) == 2
    assert len(graph.nodes) == 2
    for r in graph.roots:
        assert len(r.children) == 0
    img = ImageStim(join(get_test_data_path(), 'image', 'button.jpg'))
    results = graph.run(img, merge=False)
    assert len(results) == 2
    assert_almost_equal(results[0].to_df()['vibrance'][0], 841.577274, 5)
    assert_almost_equal(results[1].to_df()['brightness'][0], 0.746965, 5)

    graph = Graph()
    graph.add_chain(['PunctuationRemovalFilter', 'LengthExtractor'])
    txt = TextStim(text='the.best.text.')
    results = graph.run(txt, merge=False)
    assert len(results) == 1
    assert results[0].to_df()['text_length'][0] == 11

    with pytest.raises(ValueError):
        graph.add_nodes(['LengthExtractor'], mode='invalid')