def __init__(self, transformer, name=None, **parameters): self.name = name self.children = [] if isinstance(transformer, string_types): transformer = get_transformer(transformer, **parameters) self.transformer = transformer if name is not None: self.transformer.name = name self.id = id(transformer)
def __init__(self, transformer, name=None, **parameters): self.name = name self.children = [] if isinstance(transformer, string_types): transformer = get_transformer(transformer, **parameters) self.transformer = transformer self.parameters = parameters if name is not None: self.transformer.name = name self.id = id(transformer)
def _extract(extractors, stims): results = [] # For every extractor, extract from matching stims for name, parameters in extractors: print("Extractor: {}".format(name)) ext = get_transformer(name, **parameters) valid_stims = [] for sm, s in stims: if ext._stim_matches_input_types(s): # Hacky workaround. Look for compatible AVI if 'GoogleVideoAPIShotDetectionExtractor' in str( ext.__class__): s.filename = str(Path(s.filename).with_suffix('.avi')) valid_stims.append((sm, s)) results += [(sm, ext.transform(s)) for sm, s in progressbar(valid_stims)] return results
def check_updates(transformers, datastore=None, stimuli=None): """ Run transformers through a battery of stimuli, and check if output has changed. Store results in csv file for comparison. Args: transformers (list): A list of tuples of transformer names and dictionary of parameters to instantiate with (or empty dict). datastore (str): Filepath of CSV file with results. Stored in home dir by default. stimuli (list): List of stimuli file paths to extract from. If None, use test data. """ # Find datastore file datastore = datastore or expanduser('~/.pliers_updates') prior_data = pd.read_csv(datastore) if exists(datastore) else None # Load stimuli stimuli = stimuli or glob.glob( join(dirname(realpath(__file__)), '../tests/data/image/CC0/*')) stimuli = load_stims(stimuli) # Get transformers loaded_transformers = { get_transformer(name, **params): (name, params) for name, params in transformers } # Transform stimuli results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]}) for trans in loaded_transformers.keys(): for stim in stimuli: if trans._stim_matches_input_types(stim): res = trans.transform(stim) try: # Add iterable res = [res._data for r in res] except TypeError: res = res._data res = hash_data(res) if isinstance(trans, (Converter, Filter)) else res[0][0] results["{}.{}".format(trans.__hash__(), stim.name)] = [res] # Check for mismatches mismatches = [] if prior_data is not None: last = prior_data[ prior_data.time_extracted == prior_data.time_extracted.max()]. \ iloc[0].drop('time_extracted') for label, value in results.iteritems(): old = last.get(label) new = value.values[0] if old is not None: if isinstance(new, str): if new != old: mismatches.append(label) elif not np.isclose(old, new): mismatches.append(label) results = prior_data.append(results) results.to_csv(datastore, index=False) # Get corresponding transformer name and parameters def get_trans(hash_tr): for obj, attr in loaded_transformers.items(): if str(obj.__hash__()) == hash_tr: return attr delta_t = set([m.split('.')[0] for m in mismatches]) delta_t = [get_trans(dt) for dt in delta_t] return {'transformers': delta_t, 'mismatches': mismatches}
def test_get_transformer_by_name(): tda = get_transformer('stFtAudioeXtrActOr', base='extractors') assert isinstance(tda, STFTAudioExtractor) with pytest.raises(KeyError): tda = get_transformer('NotRealExtractor')
def test_get_transformer_by_name(): tda = get_transformer('stFtAudioeXtrActOr', base='extractors') assert isinstance(tda, STFTAudioExtractor)
def __init__(self, transformer): if isinstance(transformer, string_types): self.transformer = get_transformer(transformer) else: self.transformer = transformer
def __init__(self, transformer, name, **parameters): self.name = name self.children = [] if isinstance(transformer, string_types): transformer = get_transformer(transformer, **parameters) self.transformer = transformer
def convert_stimuli(dataset_name, task_name, converters): """ Convert stimuli to different modality using pliers. Args: dataset_name - dataset name task_name - task name converters - dictionary of converter names to parameters Output: list of db ids of converted stimuli """ print("Converting stimuli") dataset_id = Dataset.query.filter_by(name=dataset_name).one().id converters = [get_transformer(n, **p) for n, p in converters] # Load all active original stimuli for task stim_objects = Stimulus.query.filter_by(active=True, parent_id=None).join( RunStimulus).join(Run).join(Task).filter_by(name=task_name).join( Dataset).filter_by(name=dataset_name) total_new_stims = [] # Extract new stimuli from original stimuli for stim in stim_objects: new_stims = [] # Re-create new RS associations with newly created stims rs_orig = RunStimulus.query.filter_by(stimulus_id=stim.id).join( Run).join(Task).filter_by(name=task_name) loaded_stim = load_stims(stim.path) # Extract for each converter for conv in converters: results = [] # Extract and flatten results (to a single unit) if conv._stim_matches_input_types(loaded_stim): cstim = conv.transform(loaded_stim) try: # Add iterable results += cstim except TypeError: if hasattr(cstim, 'elements'): results += cstim.elements else: results.append(cstim) results = [res for res in results if hasattr(res, 'data') and res.data != ''] new_stims += create_new_stimuli( dataset_id, task_name, stim.id, results, rs_orig, transformer=cstim.history.transformer_class, transformer_params=cstim.history.transformer_params) # De-activate previously generated stimuli from these converters. update = Stimulus.query.filter_by(parent_id=stim.id).filter( Stimulus.id.notin_(new_stims), Stimulus.converter_name == cstim.history.transformer_class, Stimulus.converter_parameters == cstim.history.transformer_params) if update.count(): update.update(dict(active=False), synchronize_session='fetch') db.session.commit() total_new_stims += new_stims return total_new_stims
def check_updates(transformers, datastore=None, stimuli=None): """ Run transformers through a battery of stimuli, and check if output has changed. Store results in csv file for comparison. Args: transformers (list): A list of tuples of transformer names and dictionary of parameters to instantiate with (or empty dict). datastore (str): Filepath of CSV file with results. Stored in home dir by default. stimuli (list): List of stimuli file paths to extract from. If None, use test data. """ # Find datastore file datastore = datastore or expanduser('~/.pliers_updates') prior_data = pd.read_csv(datastore) if exists(datastore) else None # Load stimuli stimuli = stimuli or glob.glob( join(dirname(realpath(__file__)), '../tests/data/image/CC0/*')) stimuli = load_stims(stimuli) # Get transformers loaded_transformers = {get_transformer(name, **params): (name, params) for name, params in transformers} # Transform stimuli results = pd.DataFrame({'time_extracted': [datetime.datetime.now()]}) for trans in loaded_transformers.keys(): for stim in stimuli: if trans._stim_matches_input_types(stim): res = trans.transform(stim) try: # Add iterable res = [getattr(res, '_data', res.data) for r in res] except TypeError: res = getattr(res, '_data', res.data) res = hash_data(res) results["{}.{}".format(trans.__hash__(), stim.name)] = [res] # Check for mismatches mismatches = [] if prior_data is not None: last = prior_data[ prior_data.time_extracted == prior_data.time_extracted.max()]. \ iloc[0].drop('time_extracted') for label, value in results.iteritems(): old = last.get(label) new = value.values[0] if old is not None: if isinstance(new, str): if new != old: mismatches.append(label) elif not np.isclose(old, new): mismatches.append(label) results = prior_data.append(results) results.to_csv(datastore, index=False) # Get corresponding transformer name and parameters def get_trans(hash_tr): for obj, attr in loaded_transformers.items(): if str(obj.__hash__()) == hash_tr: return attr delta_t = set([m.split('.')[0] for m in mismatches]) delta_t = [get_trans(dt) for dt in delta_t] return {'transformers': delta_t, 'mismatches': mismatches}