Example #1
0
 def __init__(self, transformer, name=None, **parameters):
     self.name = name
     self.children = []
     if isinstance(transformer, string_types):
         transformer = get_transformer(transformer, **parameters)
     self.transformer = transformer
     if name is not None:
         self.transformer.name = name
     self.id = id(transformer)
Example #2
0
 def __init__(self, transformer, name=None, **parameters):
     self.name = name
     self.children = []
     if isinstance(transformer, string_types):
         transformer = get_transformer(transformer, **parameters)
     self.transformer = transformer
     self.parameters = parameters
     if name is not None:
         self.transformer.name = name
     self.id = id(transformer)
Example #3
0
def _extract(extractors, stims):
    results = []
    # For every extractor, extract from matching stims
    for name, parameters in extractors:
        print("Extractor: {}".format(name))
        ext = get_transformer(name, **parameters)
        valid_stims = []
        for sm, s in stims:
            if ext._stim_matches_input_types(s):
                # Hacky workaround. Look for compatible AVI
                if 'GoogleVideoAPIShotDetectionExtractor' in str(
                        ext.__class__):
                    s.filename = str(Path(s.filename).with_suffix('.avi'))
                valid_stims.append((sm, s))
        results += [(sm, ext.transform(s))
                    for sm, s in progressbar(valid_stims)]
    return results
Example #4
0
def check_updates(transformers, datastore=None, stimuli=None):
    """ Run transformers through a battery of stimuli, and check if output has
    changed. Store results in csv file for comparison.

    Args:
        transformers (list): A list of tuples of transformer names and
            dictionary of parameters to instantiate with (or empty dict).
        datastore (str): Filepath of CSV file with results. Stored in home dir
            by default.
        stimuli (list): List of stimuli file paths to extract from. If None,
            use test data.
    """
    # Find datastore file
    datastore = datastore or expanduser('~/.pliers_updates')
    prior_data = pd.read_csv(datastore) if exists(datastore) else None

    # Load stimuli
    stimuli = stimuli or glob.glob(
        join(dirname(realpath(__file__)), '../tests/data/image/CC0/*'))
    stimuli = load_stims(stimuli)

    # Get transformers
    loaded_transformers = {
        get_transformer(name, **params): (name, params)
        for name, params in transformers
    }

    # Transform stimuli
    results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]})
    for trans in loaded_transformers.keys():
        for stim in stimuli:
            if trans._stim_matches_input_types(stim):
                res = trans.transform(stim)

                try:  # Add iterable
                    res = [res._data for r in res]
                except TypeError:
                    res = res._data

                res = hash_data(res) if isinstance(trans,
                                                   (Converter,
                                                    Filter)) else res[0][0]

                results["{}.{}".format(trans.__hash__(), stim.name)] = [res]

    # Check for mismatches
    mismatches = []
    if prior_data is not None:
        last = prior_data[
            prior_data.time_extracted == prior_data.time_extracted.max()]. \
            iloc[0].drop('time_extracted')

        for label, value in results.iteritems():
            old = last.get(label)
            new = value.values[0]

            if old is not None:
                if isinstance(new, str):
                    if new != old:
                        mismatches.append(label)
                elif not np.isclose(old, new):
                    mismatches.append(label)

        results = prior_data.append(results)

    results.to_csv(datastore, index=False)

    # Get corresponding transformer name and parameters
    def get_trans(hash_tr):
        for obj, attr in loaded_transformers.items():
            if str(obj.__hash__()) == hash_tr:
                return attr

    delta_t = set([m.split('.')[0] for m in mismatches])
    delta_t = [get_trans(dt) for dt in delta_t]

    return {'transformers': delta_t, 'mismatches': mismatches}
Example #5
0
def test_get_transformer_by_name():
    tda = get_transformer('stFtAudioeXtrActOr', base='extractors')
    assert isinstance(tda, STFTAudioExtractor)

    with pytest.raises(KeyError):
        tda = get_transformer('NotRealExtractor')
Example #6
0
def test_get_transformer_by_name():
    tda = get_transformer('stFtAudioeXtrActOr', base='extractors')
    assert isinstance(tda, STFTAudioExtractor)
Example #7
0
 def __init__(self, transformer):
     if isinstance(transformer, string_types):
         self.transformer = get_transformer(transformer)
     else:
         self.transformer = transformer
Example #8
0
def test_get_transformer_by_name():
    tda = get_transformer('stFtAudioeXtrActOr', base='extractors')
    assert isinstance(tda, STFTAudioExtractor)

    with pytest.raises(KeyError):
        tda = get_transformer('NotRealExtractor')
Example #9
0
 def __init__(self, transformer, name, **parameters):
     self.name = name
     self.children = []
     if isinstance(transformer, string_types):
         transformer = get_transformer(transformer, **parameters)
     self.transformer = transformer
Example #10
0
def convert_stimuli(dataset_name, task_name, converters):
    """ Convert stimuli to different modality using pliers.
        Args:
            dataset_name - dataset name
            task_name - task name
            converters - dictionary of converter names to parameters
        Output:
            list of db ids of converted stimuli
    """
    print("Converting stimuli")

    dataset_id = Dataset.query.filter_by(name=dataset_name).one().id

    converters = [get_transformer(n, **p) for n, p in converters]

    # Load all active original stimuli for task
    stim_objects = Stimulus.query.filter_by(active=True, parent_id=None).join(
        RunStimulus).join(Run).join(Task).filter_by(name=task_name).join(
            Dataset).filter_by(name=dataset_name)

    total_new_stims = []
    # Extract new stimuli from original stimuli
    for stim in stim_objects:
        new_stims = []
        # Re-create new RS associations with newly created stims
        rs_orig = RunStimulus.query.filter_by(stimulus_id=stim.id).join(
            Run).join(Task).filter_by(name=task_name)
        loaded_stim = load_stims(stim.path)

        # Extract for each converter
        for conv in converters:
            results = []
            # Extract and flatten results (to a single unit)
            if conv._stim_matches_input_types(loaded_stim):
                cstim = conv.transform(loaded_stim)
                try:  # Add iterable
                    results += cstim
                except TypeError:
                    if hasattr(cstim, 'elements'):
                        results += cstim.elements
                    else:
                        results.append(cstim)

                results = [res for res in results
                           if hasattr(res, 'data') and res.data != '']
                new_stims += create_new_stimuli(
                    dataset_id, task_name, stim.id, results, rs_orig,
                    transformer=cstim.history.transformer_class,
                    transformer_params=cstim.history.transformer_params)

        # De-activate previously generated stimuli from these converters.
        update = Stimulus.query.filter_by(parent_id=stim.id).filter(
            Stimulus.id.notin_(new_stims),
            Stimulus.converter_name == cstim.history.transformer_class,
            Stimulus.converter_parameters == cstim.history.transformer_params)
        if update.count():
            update.update(dict(active=False), synchronize_session='fetch')
        db.session.commit()
        total_new_stims += new_stims

    return total_new_stims
Example #11
0
def check_updates(transformers, datastore=None, stimuli=None):
    """ Run transformers through a battery of stimuli, and check if output has
    changed. Store results in csv file for comparison.

    Args:
        transformers (list): A list of tuples of transformer names and
            dictionary of parameters to instantiate with (or empty dict).
        datastore (str): Filepath of CSV file with results. Stored in home dir
            by default.
        stimuli (list): List of stimuli file paths to extract from. If None,
            use test data.
    """
    # Find datastore file
    datastore = datastore or expanduser('~/.pliers_updates')
    prior_data = pd.read_csv(datastore) if exists(datastore) else None

    # Load stimuli
    stimuli = stimuli or glob.glob(
        join(dirname(realpath(__file__)), '../tests/data/image/CC0/*'))
    stimuli = load_stims(stimuli)

    # Get transformers
    loaded_transformers = {get_transformer(name, **params): (name, params)
                           for name, params in transformers}

    # Transform stimuli
    results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]})
    for trans in loaded_transformers.keys():
        for stim in stimuli:
            if trans._stim_matches_input_types(stim):
                res = trans.transform(stim)

                try: # Add iterable
                    res = [getattr(res, '_data', res.data) for r in res]
                except TypeError:
                    res = getattr(res, '_data', res.data)

                res = hash_data(res)

                results["{}.{}".format(trans.__hash__(), stim.name)] = [res]

    # Check for mismatches
    mismatches = []
    if prior_data is not None:
        last = prior_data[
            prior_data.time_extracted == prior_data.time_extracted.max()]. \
            iloc[0].drop('time_extracted')

        for label, value in results.iteritems():
            old = last.get(label)
            new = value.values[0]

            if old is not None:
                if isinstance(new, str):
                    if new != old:
                        mismatches.append(label)
                elif not np.isclose(old, new):
                    mismatches.append(label)

        results = prior_data.append(results)

    results.to_csv(datastore, index=False)

    # Get corresponding transformer name and parameters
    def get_trans(hash_tr):
        for obj, attr in loaded_transformers.items():
            if str(obj.__hash__()) == hash_tr:
                return attr

    delta_t = set([m.split('.')[0] for m in mismatches])
    delta_t = [get_trans(dt) for dt in delta_t]

    return {'transformers': delta_t, 'mismatches': mismatches}
Example #12
0
 def __init__(self, transformer):
     if isinstance(transformer, string_types):
         self.transformer = get_transformer(transformer)
     else:
         self.transformer = transformer