Beispiel #1
0
def test_compound_stim():
    audio_dir = join(get_test_data_path(), 'audio')
    audio = AudioStim(join(audio_dir, 'crowd.mp3'))
    image1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))
    image2 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename)
    text = ComplexTextStim(text="The quick brown fox jumped...")
    stim = CompoundStim([audio, image1, image2, video, text])
    assert len(stim.elements) == 5
    assert isinstance(stim.video, VideoStim)
    assert isinstance(stim.complex_text, ComplexTextStim)
    assert isinstance(stim.image, ImageStim)
    with pytest.raises(AttributeError):
        stim.nonexistent_type
    assert stim.video_frame is None

    # Test iteration
    len([e for e in stim]) == 5

    imgs = stim.get_stim(ImageStim, return_all=True)
    assert len(imgs) == 2
    assert all([isinstance(im, ImageStim) for im in imgs])
    also_imgs = stim.get_stim('image', return_all=True)
    assert imgs == also_imgs
Beispiel #2
0
def test_transcribed_audio_stim():
    audio = AudioStim(join(get_test_data_path(), 'audio', "barber_edited.wav"))
    text_file = join(get_test_data_path(), 'text', "wonderful_edited.srt")
    text = ComplexTextStim(text_file)
    stim = TranscribedAudioCompoundStim(audio=audio, text=text)
    assert isinstance(stim.audio, AudioStim)
    assert isinstance(stim.complex_text, ComplexTextStim)
Beispiel #3
0
def test_complex_stim_from_srt():
    srtfile = join(get_test_data_path(), 'text', 'wonderful.srt')
    textfile = join(get_test_data_path(), 'text', 'wonderful.txt')
    df = pd.read_csv(textfile, sep='\t')
    target = df["text"].tolist()
    srt_stim = ComplexTextStim(srtfile)
    texts = [sent.text for sent in srt_stim.elements]
    assert texts == target
Beispiel #4
0
def test_transformations_on_compound_stim():
    image1 = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))
    image2 = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
    text = ComplexTextStim(text="The quick brown fox jumped...")
    stim = CompoundStim([image1, image2, text])

    ext = BrightnessExtractor()
    results = ext.transform(stim)
    assert len(results) == 2
    assert np.allclose(results[0]._data[0], 0.88784294)
Beispiel #5
0
def test_magic_loader():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    audio_file = join(get_test_data_path(), 'audio', 'barber.wav')
    video_file = join(get_test_data_path(), 'video', 'small.mp4')
    stim_files = [text_file, audio_file, video_file]
    stims = load_stims(stim_files)
    assert len(stims) == 3
    assert round(stims[1].duration) == 57
    assert isinstance(stims[0].text, str)
    assert stims[2].width == 560
Beispiel #6
0
def test_save():
    cts_file = join(get_test_data_path(), 'text', 'complex_stim_no_header.txt')
    complextext_stim = ComplexTextStim(cts_file, columns='ot',
                                       default_duration=0.2)
    text_stim = TextStim(text='hello')
    audio_stim = AudioStim(join(get_test_data_path(), 'audio', 'crowd.mp3'))
    image_stim = ImageStim(join(get_test_data_path(), 'image', 'apple.jpg'))

    # Video gives travis problems
    stims = [complextext_stim, text_stim, audio_stim, image_stim]
    for s in stims:
        path = tempfile.mktemp() + s._default_file_extension
        s.save(path)
        assert exists(path)
        os.remove(path)
Beispiel #7
0
def test_video_frame_stim():
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename, onset=4.2)
    frame = VideoFrameStim(video, 42)
    assert frame.onset == (5.6)
    assert np.array_equal(frame.data, video.get_frame(index=42).data)
    assert frame.name == 'frame[42]'
Beispiel #8
0
def test_implicit_stim_conversion():
    image_dir = join(get_test_data_path(), 'image')
    stim = ImageStim(join(image_dir, 'button.jpg'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim).to_df()
    assert 'text_length' in result.columns
    assert result['text_length'][0] == 4
    assert result['onset'][0] == 4.2
Beispiel #9
0
def test_implicit_stim_iteration():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de = DummyExtractor()
    results = de.transform([stim1, stim2])
    assert len(results) == 2
    assert isinstance(results[0], ExtractorResult)
Beispiel #10
0
def test_audio_stim():
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'barber.wav'))
    assert round(stim.duration) == 57
    assert stim.sampling_rate == 11025

    stim = AudioStim(join(audio_dir, 'homer.wav'))
    assert round(stim.duration) == 3
    assert stim.sampling_rate == 11025
Beispiel #11
0
def test_magic_loader2():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    video_url = 'https://archive.org/download/DisneyCastletest/Disney_Castle_512kb.mp4'
    audio_url = 'https://archive.org/download/999WavFiles/TANKEN.WAV'
    image_url = 'https://archive.org/download/NIX-C-1987-11903/1987_11903L.jpg'
    text_url = 'https://github.com/psychoinformaticslab/pliers/blob/master/README.rst'
    stims = load_stims([text_file, video_url, audio_url, image_url, text_url])
    assert len(stims) == 5
    assert stims[1].fps == 30.0
    assert stims[3].data.shape == (288, 360, 3)
Beispiel #12
0
def test_video_stim_bytestring():
    path = join(get_test_data_path(), 'video', 'small.mp4')
    vid = VideoStim(path)
    assert vid._bytestring is None
    bs = vid.get_bytestring()
    assert isinstance(bs, str)
    assert vid._bytestring is not None
    raw = bs.encode()
    with open(path, 'rb') as f:
        assert raw == base64.b64encode(f.read())
Beispiel #13
0
def test_implicit_stim_conversion3():
    video_dir = join(get_test_data_path(), 'video')
    stim = VideoStim(join(video_dir, 'obama_speech.mp4'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim)
    first_word = result[0].to_df()
    # The word should be "today"
    assert 'text_length' in first_word.columns
    assert first_word['text_length'][0] == 5
    assert first_word['onset'][0] >= 4.2
Beispiel #14
0
def test_image_stim_bytestring():
    path = join(get_test_data_path(), 'image', 'apple.jpg')
    img = ImageStim(path)
    assert img._bytestring is None
    bs = img.get_bytestring()
    assert isinstance(bs, str)
    assert img._bytestring is not None
    raw = bs.encode()
    with open(path, 'rb') as f:
        assert raw == base64.b64encode(f.read())
Beispiel #15
0
def test_video_stim():
    ''' Test VideoStim functionality. '''
    filename = join(get_test_data_path(), 'video', 'small.mp4')
    video = VideoStim(filename, onset=4.2)
    assert video.fps == 30
    assert video.n_frames == 168
    assert video.width == 560
    assert video.duration == 5.57

    # Test frame iterator
    frames = [f for f in video]
    assert len(frames) == 168
    f1 = frames[100]
    assert isinstance(f1, VideoFrameStim)
    assert isinstance(f1.onset, float)
    assert np.isclose(f1.duration, 1 / 30.0, 1e-5)
    f1.data.shape == (320, 560, 3)

    # Test getting of specific frame
    f2 = video.get_frame(index=100)
    assert isinstance(f2, VideoFrameStim)
    assert isinstance(f2.onset, float)
    assert f2.onset > 7.5
    f2.data.shape == (320, 560, 3)
    f2_copy = video.get_frame(onset=3.33334)
    assert isinstance(f2, VideoFrameStim)
    assert isinstance(f2.onset, float)
    assert f2.onset > 7.5
    assert np.array_equal(f2.data, f2_copy.data)

    # Try another video
    filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
    video = VideoStim(filename)
    assert video.fps == 12
    assert video.n_frames == 105
    assert video.width == 320
    assert video.duration == 8.71
    f3 = video.get_frame(index=104)
    assert isinstance(f3, VideoFrameStim)
    assert isinstance(f3.onset, float)
    assert f3.duration > 0.0
    assert f3.data.shape == (240, 320, 3)
Beispiel #16
0
def test_complex_stim_from_text():
    textfile = join(get_test_data_path(), 'text', 'scandal.txt')
    text = open(textfile).read().strip()
    stim = ComplexTextStim(text=text)
    target = ['To', 'Sherlock', 'Holmes']
    assert [w.text for w in stim.elements[:3]] == target
    assert len(stim.elements) == 231
    stim = ComplexTextStim(text=text, unit='sent')
    # Custom tokenizer
    stim = ComplexTextStim(text=text, tokenizer=r'(\w+)')
    assert len(stim.elements) == 209
Beispiel #17
0
def test_clashing_key_rename():
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))

    cf = ClashingFeatureExtractor()
    results = cf.transform(stim1)
    df = results.to_df()
    expected_keys = ['order', 'duration', 'onset', 'object_id',
                     'feature_1', 'feature_2', 'order_']
    for k in expected_keys:
        assert k in df.columns
Beispiel #18
0
def test_implicit_stim_conversion2():
    def_conv = config.get_option('default_converters')
    config.set_option('default_converters',
                      {'AudioStim->TextStim': ('WitTranscriptionConverter', )})
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'homer.wav'), onset=4.2)
    ext = LengthExtractor()
    result = ext.transform(stim)
    first_word = result[0].to_df()
    assert 'text_length' in first_word.columns
    assert first_word['text_length'][0] > 0
    assert first_word['onset'][0] >= 4.2
    config.set_option('default_converters', def_conv)
Beispiel #19
0
def test_loader_nonexistent():
    text_file = 'this/doesnt/exist.txt'
    with pytest.raises(IOError):
        stims = load_stims(text_file)

    audio_file = 'no/audio/here.wav'
    with pytest.raises(IOError):
        stims = load_stims([text_file, audio_file])

    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    stims = load_stims([text_file, audio_file], fail_silently=True)
    assert len(stims) == 1

    with pytest.raises(IOError):
        stims = load_stims(audio_file, fail_silently=True)
Beispiel #20
0
def test_complex_text_stim():
    text_dir = join(get_test_data_path(), 'text')
    stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'),
                           columns='ot', default_duration=0.2)
    assert len(stim.elements) == 4
    assert stim.elements[2].onset == 34
    assert stim.elements[2].duration == 0.2
    stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'),
                           columns='ot', default_duration=0.2, onset=4.2)
    assert stim.elements[2].onset == 38.2
    assert stim.elements[1].onset == 24.2
    stim = ComplexTextStim(join(text_dir, 'complex_stim_with_header.txt'))
    assert len(stim.elements) == 4
    assert stim.elements[2].duration == 0.1

    assert stim._to_sec((1.0, 42, 3, 0)) == 6123
    assert stim._to_tup(6123) == (1.0, 42, 3, 0)
Beispiel #21
0
def test_series():
    my_dict = {'a': 4, 'b': 2, 'c': 8}
    stim = SeriesStim(my_dict, onset=4, duration=2)
    ser = pd.Series([4, 2, 8], index=['a', 'b', 'c'])
    pd.testing.assert_series_equal(stim.data, ser)
    assert stim.onset == 4
    assert stim.duration == 2
    assert stim.order is None

    f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
    # multiple columns found and no column arg provided
    with pytest.raises(ValueError):
        stim = SeriesStim(filename=f, sep='\t')

    stim = SeriesStim(filename=f, column='frequency', sep='\t')
    assert stim.data.shape == (7,)
    assert stim.data[3] == 15.417

    # 2-d array should fail
    with pytest.raises(Exception):
        ser = SeriesStim(np.random.normal(size=(10, 2)))
Beispiel #22
0
def test_metric_extractor():
    def dummy(array):
        return array[0]

    def dummy_list(array):
        return array[0], array[1]

    f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
    stim = SeriesStim(data=np.linspace(1., 4., 20), onset=2., duration=.5)
    stim_file = SeriesStim(filename=f,
                           column='frequency',
                           sep='\t',
                           index_col='text')

    ext_single = MetricExtractor(functions='numpy.mean')
    ext_idx = MetricExtractor(functions='numpy.mean',
                              subset_idx=['for', 'testing', 'text'])
    ext_multiple = MetricExtractor(functions=[
        'numpy.mean', 'numpy.min', scipy.stats.entropy, dummy, dummy_list
    ])
    ext_names = MetricExtractor(
        functions=[
            'numpy.mean', 'numpy.min', scipy.stats.entropy, dummy, dummy_list,
            'tensorflow.reduce_mean'
        ],
        var_names=['mean', 'min', 'entropy', 'custom1', 'custom2', 'tf_mean'])
    ext_lambda = MetricExtractor(functions='lambda x: -np.max(x)',
                                 var_names='custom_function')

    r = ext_single.transform(stim)
    r_file = ext_single.transform(stim_file)
    r_file_idx = ext_idx.transform(stim_file)
    r_multiple = ext_multiple.transform(stim)
    r_names = ext_names.transform(stim)
    r_lambda = ext_lambda.transform(stim)

    r_df = r.to_df()
    r_file_df = r_file.to_df()
    r_file_idx_df = r_file_idx.to_df()
    r_multiple_df = r_multiple.to_df()
    r_long = r_multiple.to_df(format='long')
    r_names_df = r_names.to_df()
    r_lambda_df = r_lambda.to_df()

    for res in [r_df, r_file_df, r_multiple_df]:
        assert res.shape[0] == 1
    assert r_long.shape[0] == len(ext_multiple.functions)
    assert r_df['onset'][0] == 2
    assert r_df['duration'][0] == .5
    assert r_df['mean'][0] == 2.5
    assert np.isclose(r_file_df['mean'][0], 11.388, rtol=0.001)
    assert np.isclose(r_file_idx_df['mean'][0], 12.582, rtol=0.001)
    assert all([m in r_multiple_df.columns for m in ['mean', 'entropy']])
    assert r_multiple_df['amin'][0] == 1.
    assert r_multiple_df['dummy'][0] == 1.
    assert r_multiple_df['dummy_list'][0][0] == np.linspace(1., 4., 20)[0]
    assert r_multiple_df['dummy_list'][0][1] == np.linspace(1., 4., 20)[1]
    assert type(r_multiple_df['dummy_list'][0]) == np.ndarray
    assert r_names_df.columns[-3] == 'custom1'
    assert r_names_df.columns[-2] == 'custom2'
    assert r_names_df.columns[-1] == 'tf_mean'
    assert np.isclose(r_names_df['mean'][0], r_names_df['tf_mean'][0])
    assert r_lambda_df['custom_function'][0] == -4
Beispiel #23
0
def test_audio_formats():
    audio_dir = join(get_test_data_path(), 'audio')
    stim = AudioStim(join(audio_dir, 'crowd.mp3'))
    assert round(stim.duration) == 28
    assert stim.sampling_rate == 44100
Beispiel #24
0
def test_merge_extractor_results():
    np.random.seed(100)
    image_dir = join(get_test_data_path(), 'image')
    stim1 = ImageStim(join(image_dir, 'apple.jpg'))
    stim2 = ImageStim(join(image_dir, 'obama.jpg'))
    de_names = ['Extractor1', 'Extractor2', 'Extractor3']
    des = [DummyExtractor(name=name) for name in de_names]
    not_features = ['object_id']
    for de in des:
        not_features.append(de._log_attributes)
    results = [de.transform(stim1) for de in des]
    results += [de.transform(stim2) for de in des]

    df = merge_results(results, format='wide')
    assert df.shape == (200, 18)
    cols = [
        'onset', 'duration', 'order', 'class', 'filename', 'history',
        'stim_name', 'source_file'
    ]
    assert not set(cols) - set(df.columns)
    assert 'Extractor2#feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='drop')
    assert df.shape == (200, 12)
    assert not set(cols) - set(df.columns)
    assert 'feature_3' in df.columns

    df = merge_results(results, format='wide', extractor_names='multi')
    assert df.shape == (200, 18)
    _cols = [(c, np.nan) for c in cols]
    assert not set(_cols) - set(df.columns)
    assert ('Extractor2', 'feature_3') in df.columns

    with pytest.raises(ValueError):
        merge_results(results, format='long', extractor_names='multi')

    df = merge_results(results, format='long', extractor_names='column')
    assert df.shape == (1800, 12)
    _cols = cols + ['feature', 'extractor', 'value']
    assert not set(_cols) - set(df.columns)
    row = df.iloc[523, :]
    assert row['feature'] == 'feature_2'
    assert row['value'] == 475
    assert row['extractor'] == 'Extractor1'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='drop')
    assert df.shape == (1800, 11)
    assert set(_cols) - set(df.columns) == {'extractor'}
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='long', extractor_names='prepend')
    assert df.shape == (1800, 11)
    row = df.iloc[523, :]
    assert row['feature'] == 'Extractor1#feature_2'
    assert not set(not_features).intersection(set(df['feature']))

    df = merge_results(results, format='wide', extractor_params=True)
    logattr = {}
    for de in des:
        logattr[
            de.
            name] = de._log_attributes  #stores log attributes to be found for each extractor
        for feat in ['feature_1', 'feature_2', 'feature_3']:
            idx_str = f'{de.name}#{feat}#extractor_params'
            assert idx_str in df.columns
            df_log_attr = json.loads(df[idx_str][0])
            for l in logattr[de.name]:
                assert l in df_log_attr.keys()

    df = merge_results(results, format='long', extractor_params=True)
    for idx, row in df.iterrows():
        de_name = row['feature'].split('#')[0]
        logs = logattr[de_name]
        df_logs = row['extractor_params']
        for l in logs:
            assert l in json.loads(df_logs).keys()
Beispiel #25
0
import numpy as np
import pytest

from pliers.tests.utils import (get_test_data_path, DummyExtractor,
                                ClashingFeatureExtractor)
from pliers.extractors import (LengthExtractor, BrightnessExtractor,
                               SharpnessExtractor, VibranceExtractor)
from pliers.stimuli import (ComplexTextStim, ImageStim, VideoStim, AudioStim)
from pliers.support.download import download_nltk_data
from pliers.extractors.base import ExtractorResult, merge_results
from pliers import config

cache_default = config.get_option('cache_transformers')
config.set_option('cache_transformers', True)

TEXT_DIR = join(get_test_data_path(), 'text')


@pytest.fixture(scope='module')
def get_nltk():
    download_nltk_data()


def test_check_target_type():
    stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'),
                           columns='to',
                           default_duration=1)
    td = SharpnessExtractor()
    with pytest.raises(TypeError):
        td.transform(stim)
from pliers import config
from pliers.extractors import (TensorFlowKerasApplicationExtractor,
                               TFHubExtractor, TFHubImageExtractor,
                               TFHubTextExtractor, BertExtractor,
                               BertSequenceEncodingExtractor, BertLMExtractor,
                               BertSentimentExtractor, AudiosetLabelExtractor)
from pliers.filters import AudioResamplingFilter
from pliers.stimuli import (ImageStim, TextStim, ComplexTextStim, AudioStim)
from pliers.extractors.base import merge_results
from transformers import BertTokenizer
from pliers.utils import verify_dependencies

cache_default = config.get_option('cache_transformers')
config.set_option('cache_transformers', False)

IMAGE_DIR = join(get_test_data_path(), 'image')
TEXT_DIR = join(get_test_data_path(), 'text')
AUDIO_DIR = join(get_test_data_path(), 'audio')

EFFNET_URL = 'https://tfhub.dev/tensorflow/efficientnet/b7/classification/1'
MNET_URL = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4'
SENTENC_URL = 'https://tfhub.dev/google/universal-sentence-encoder/4'
GNEWS_URL = 'https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2'
TOKENIZER_URL = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2'
ELECTRA_URL = 'https://tfhub.dev/google/electra_small/2'
SPEECH_URL = 'https://tfhub.dev/google/speech_embedding/1'

pytestmark = pytest.mark.skipif(environ.get('skip_high_memory',
                                            False) == 'true',
                                reason='high memory')
Beispiel #27
0
def test_image_stim(dummy_iter_extractor):
    filename = join(get_test_data_path(), 'image', 'apple.jpg')
    stim = ImageStim(filename)
    assert stim.data.shape == (288, 420, 3)