Esempio n. 1
0
def test_loader_nonexistent():
    text_file = 'this/doesnt/exist.txt'
    with pytest.raises(IOError):
        stims = load_stims(text_file)

    audio_file = 'no/audio/here.wav'
    with pytest.raises(IOError):
        stims = load_stims([text_file, audio_file])

    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    stims = load_stims([text_file, audio_file], fail_silently=True)
    assert len(stims) == 1

    with pytest.raises(IOError):
        stims = load_stims(audio_file, fail_silently=True)
Esempio n. 2
0
def clean_transcript(input_transcript, input_media, onset=None, offset=None):
    stim = load_stims([input_media])[0]

    if not isinstance(stim, AudioStim):
        conv = VideoToAudioConverter()
        stim = conv.transform(stim)
        input_media = '/tmp/input_audio.wav'
        stim.save(input_media)

    _, extension = splitext(input_transcript)

    clean_transcript = '/tmp/clean_transcript.txt'
    with open(clean_transcript, 'w') as new_file:

        if extension == 'srt':
            txt = ComplexTextStim(input_transcript)
            for el in txt.elements:
                _clean_save(el.text, new_file, el.onset, el.duration)
        else:  # Treat as a singe block of text
            if onset is None or offset is None:
                raise Exception("Onset and offset must be declared")
            txt = TextStim(input_transcript)
            _clean_save(txt.text, new_file, onset, stim.duration - offset)

    return clean_transcript, input_media
Esempio n. 3
0
def hash_stim(stim, blocksize=65536):
    """ Hash a pliers stimulus """
    if isinstance(stim, Path):
        stim = stim.as_posix()
    if isinstance(stim, str):
        from pliers.stimuli import load_stims
        from os.path import isfile
        assert isfile(stim)
        stim = load_stims(stim)

    hasher = hashlib.sha1()

    if hasattr(stim, "data"):
        return hash_data(stim.data)
    else:
        filename = stim.history.source_file \
                    if stim.history \
                    else stim.filename
        with open(filename, 'rb') as afile:
            buf = afile.read(blocksize)
            while len(buf) > 0:
                hasher.update(buf)
                buf = afile.read(blocksize)

    return hasher.hexdigest()
Esempio n. 4
0
def test_magic_loader():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    audio_file = join(get_test_data_path(), 'audio', 'barber.wav')
    video_file = join(get_test_data_path(), 'video', 'small.mp4')
    stim_files = [text_file, audio_file, video_file]
    stims = load_stims(stim_files)
    assert len(stims) == 3
    assert round(stims[1].duration) == 57
    assert isinstance(stims[0].text, string_types)
    assert stims[2].width == 560
Esempio n. 5
0
def test_magic_loader2():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    video_url = 'http://www.obamadownloads.com/videos/iran-deal-speech.mp4'
    audio_url = 'http://www.bobainsworth.com/wav/simpsons/themodyn.wav'
    image_url = 'https://www.whitehouse.gov/sites/whitehouse.gov/files/images/twitter_cards_potus.jpg'
    text_url = 'https://github.com/tyarkoni/pliers/blob/master/README.md'
    stims = load_stims([text_file, video_url, audio_url, image_url, text_url])
    assert len(stims) == 5
    assert stims[1].fps == 12
    assert stims[3].data.shape == (240, 240, 3)
Esempio n. 6
0
def test_magic_loader2():
    text_file = join(get_test_data_path(), 'text', 'sample_text.txt')
    video_url = 'https://archive.org/download/DisneyCastletest/Disney_Castle_512kb.mp4'
    audio_url = 'https://archive.org/download/999WavFiles/TANKEN.WAV'
    image_url = 'https://archive.org/download/NIX-C-1987-11903/1987_11903L.jpg'
    text_url = 'https://github.com/psychoinformaticslab/pliers/blob/master/README.rst'
    stims = load_stims([text_file, video_url, audio_url, image_url, text_url])
    assert len(stims) == 5
    assert stims[1].fps == 30.0
    assert stims[3].data.shape == (288, 360, 3)
Esempio n. 7
0
def _load_stim_models(dataset_name, task_name):
    """ Given a dataset and task, load all available stimuli as Pliers
    stimuli, and pair them with original database stim object. """
    stim_models = Stimulus.query.filter_by(
        active=True).join(RunStimulus).join(Run).join(Task).filter_by(
            name=task_name).join(Dataset).filter_by(name=dataset_name)

    stims = []
    print("Loading stim models...")
    for stim_model in progressbar(stim_models):
        if stim_model.path is None:
            # Load both ways for Text stimuli
            stims.append(
                (stim_model, ComplexTextStim(text=stim_model.content)))
            stims.append((stim_model, TextStim(text=stims[-1][1].data)))
        else:
            stims.append((stim_model, load_stims(stim_model.path)))

    return stims
Esempio n. 8
0
def check_updates(transformers, datastore=None, stimuli=None):
    """ Run transformers through a battery of stimuli, and check if output has
    changed. Store results in csv file for comparison.

    Args:
        transformers (list): A list of tuples of transformer names and
            dictionary of parameters to instantiate with (or empty dict).
        datastore (str): Filepath of CSV file with results. Stored in home dir
            by default.
        stimuli (list): List of stimuli file paths to extract from. If None,
            use test data.
    """
    # Find datastore file
    datastore = datastore or expanduser('~/.pliers_updates')
    prior_data = pd.read_csv(datastore) if exists(datastore) else None

    # Load stimuli
    stimuli = stimuli or glob.glob(
        join(dirname(realpath(__file__)), '../tests/data/image/CC0/*'))
    stimuli = load_stims(stimuli)

    # Get transformers
    loaded_transformers = {
        get_transformer(name, **params): (name, params)
        for name, params in transformers
    }

    # Transform stimuli
    results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]})
    for trans in loaded_transformers.keys():
        for stim in stimuli:
            if trans._stim_matches_input_types(stim):
                res = trans.transform(stim)

                try:  # Add iterable
                    res = [res._data for r in res]
                except TypeError:
                    res = res._data

                res = hash_data(res) if isinstance(trans,
                                                   (Converter,
                                                    Filter)) else res[0][0]

                results["{}.{}".format(trans.__hash__(), stim.name)] = [res]

    # Check for mismatches
    mismatches = []
    if prior_data is not None:
        last = prior_data[
            prior_data.time_extracted == prior_data.time_extracted.max()]. \
            iloc[0].drop('time_extracted')

        for label, value in results.iteritems():
            old = last.get(label)
            new = value.values[0]

            if old is not None:
                if isinstance(new, str):
                    if new != old:
                        mismatches.append(label)
                elif not np.isclose(old, new):
                    mismatches.append(label)

        results = prior_data.append(results)

    results.to_csv(datastore, index=False)

    # Get corresponding transformer name and parameters
    def get_trans(hash_tr):
        for obj, attr in loaded_transformers.items():
            if str(obj.__hash__()) == hash_tr:
                return attr

    delta_t = set([m.split('.')[0] for m in mismatches])
    delta_t = [get_trans(dt) for dt in delta_t]

    return {'transformers': delta_t, 'mismatches': mismatches}
Esempio n. 9
0
def convert_stimuli(dataset_name, task_name, converters):
    """ Convert stimuli to different modality using pliers.
        Args:
            dataset_name - dataset name
            task_name - task name
            converters - dictionary of converter names to parameters
        Output:
            list of db ids of converted stimuli
    """
    print("Converting stimuli")

    dataset_id = Dataset.query.filter_by(name=dataset_name).one().id

    converters = [get_transformer(n, **p) for n, p in converters]

    # Load all active original stimuli for task
    stim_objects = Stimulus.query.filter_by(active=True, parent_id=None).join(
        RunStimulus).join(Run).join(Task).filter_by(name=task_name).join(
            Dataset).filter_by(name=dataset_name)

    total_new_stims = []
    # Extract new stimuli from original stimuli
    for stim in stim_objects:
        new_stims = []
        # Re-create new RS associations with newly created stims
        rs_orig = RunStimulus.query.filter_by(stimulus_id=stim.id).join(
            Run).join(Task).filter_by(name=task_name)
        loaded_stim = load_stims(stim.path)

        # Extract for each converter
        for conv in converters:
            results = []
            # Extract and flatten results (to a single unit)
            if conv._stim_matches_input_types(loaded_stim):
                cstim = conv.transform(loaded_stim)
                try:  # Add iterable
                    results += cstim
                except TypeError:
                    if hasattr(cstim, 'elements'):
                        results += cstim.elements
                    else:
                        results.append(cstim)

                results = [res for res in results
                           if hasattr(res, 'data') and res.data != '']
                new_stims += create_new_stimuli(
                    dataset_id, task_name, stim.id, results, rs_orig,
                    transformer=cstim.history.transformer_class,
                    transformer_params=cstim.history.transformer_params)

        # De-activate previously generated stimuli from these converters.
        update = Stimulus.query.filter_by(parent_id=stim.id).filter(
            Stimulus.id.notin_(new_stims),
            Stimulus.converter_name == cstim.history.transformer_class,
            Stimulus.converter_parameters == cstim.history.transformer_params)
        if update.count():
            update.update(dict(active=False), synchronize_session='fetch')
        db.session.commit()
        total_new_stims += new_stims

    return total_new_stims
Esempio n. 10
0
def check_updates(transformers, datastore=None, stimuli=None):
    """ Run transformers through a battery of stimuli, and check if output has
    changed. Store results in csv file for comparison.

    Args:
        transformers (list): A list of tuples of transformer names and
            dictionary of parameters to instantiate with (or empty dict).
        datastore (str): Filepath of CSV file with results. Stored in home dir
            by default.
        stimuli (list): List of stimuli file paths to extract from. If None,
            use test data.
    """
    # Find datastore file
    datastore = datastore or expanduser('~/.pliers_updates')
    prior_data = pd.read_csv(datastore) if exists(datastore) else None

    # Load stimuli
    stimuli = stimuli or glob.glob(
        join(dirname(realpath(__file__)), '../tests/data/image/CC0/*'))
    stimuli = load_stims(stimuli)

    # Get transformers
    loaded_transformers = {get_transformer(name, **params): (name, params)
                           for name, params in transformers}

    # Transform stimuli
    results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]})
    for trans in loaded_transformers.keys():
        for stim in stimuli:
            if trans._stim_matches_input_types(stim):
                res = trans.transform(stim)

                try: # Add iterable
                    res = [getattr(res, '_data', res.data) for r in res]
                except TypeError:
                    res = getattr(res, '_data', res.data)

                res = hash_data(res)

                results["{}.{}".format(trans.__hash__(), stim.name)] = [res]

    # Check for mismatches
    mismatches = []
    if prior_data is not None:
        last = prior_data[
            prior_data.time_extracted == prior_data.time_extracted.max()]. \
            iloc[0].drop('time_extracted')

        for label, value in results.iteritems():
            old = last.get(label)
            new = value.values[0]

            if old is not None:
                if isinstance(new, str):
                    if new != old:
                        mismatches.append(label)
                elif not np.isclose(old, new):
                    mismatches.append(label)

        results = prior_data.append(results)

    results.to_csv(datastore, index=False)

    # Get corresponding transformer name and parameters
    def get_trans(hash_tr):
        for obj, attr in loaded_transformers.items():
            if str(obj.__hash__()) == hash_tr:
                return attr

    delta_t = set([m.split('.')[0] for m in mismatches])
    delta_t = [get_trans(dt) for dt in delta_t]

    return {'transformers': delta_t, 'mismatches': mismatches}