def record(self, record): if self.name not in record.outputs: raise ArcanaNameError( self.name, "{} was not found in outputs {} of provenance record {}". format(self.name, record.outputs.keys(), record)) self._record = record
def visit(self, id): try: return self._visits[str(id)] except KeyError: raise ArcanaNameError( id, ("{} doesn't have a visit named '{}' ('{}')".format( self, id, "', '".join(self._visits))))
def record(self, pipeline_name, from_study): """ Returns the provenance record for a given pipeline Parameters ---------- pipeline_name : str The name of the pipeline that generated the record from_study : str The name of the study that the pipeline was generated from Returns ------- record : arcana.provenance.Record The provenance record generated by the specified pipeline """ try: return self._records[(pipeline_name, from_study)] except KeyError: found = [] for sname, pnames in groupby(sorted(self._records, key=itemgetter(1)), key=itemgetter(1)): found.append("'{}' for '{}'".format( "', '".join(p for p, _ in pnames), sname)) raise ArcanaNameError( (pipeline_name, from_study), ("{} doesn't have a provenance record for pipeline '{}' " "for '{}' study (found {})".format( self, pipeline_name, from_study, '; '.join(found))))
def session(self, subject_id): try: return self._sessions[str(subject_id)] except KeyError: raise ArcanaNameError( subject_id, ("{} doesn't have a session named '{}' ('{}')".format( self, subject_id, "', '".join(self._sessions))))
def substudy_spec(cls, name): try: return cls._substudy_specs[name] except KeyError: raise ArcanaNameError( name, "'{}' not found in sub-studes ('{}')" .format(name, "', '".join(cls._substudy_specs)))
def substudy(self, name): try: return self._substudies[name] except KeyError: raise ArcanaNameError( name, "'{}' not found in sub-studes ('{}')" .format(name, "', '".join(self._substudies)))
def param_spec(cls, name): try: return cls._param_specs[name] except KeyError: raise ArcanaNameError( name, "No parameter spec named '{}' in {}, available:\n{}".format( name, cls.__name__, "\n".join(list(cls._param_specs.keys()))))
def _get_parameter(self, name): try: parameter = self._parameters[name] except KeyError: try: parameter = self._param_specs[name] except KeyError: raise ArcanaNameError( name, "Invalid parameter, '{}', in {} (valid '{}')".format( name, self._param_error_location, "', '".join(self.param_spec_names()))) return parameter
def __init__(self, name, repository, processor, inputs, parameters=None, **kwargs): try: # This works for PY3 as the metaclass inserts it itself if # it isn't provided metaclass = type(self).__dict__['__metaclass__'] if not issubclass(metaclass, MultiStudyMetaClass): raise KeyError except KeyError: raise ArcanaUsageError( "Need to set MultiStudyMetaClass (or sub-class) as " "the metaclass of all classes derived from " "MultiStudy") super(MultiStudy, self).__init__( name, repository, processor, inputs, parameters=parameters, **kwargs) self._substudies = {} for substudy_spec in self.substudy_specs(): substudy_cls = substudy_spec.study_class # Map inputs, data_specs to the substudy mapped_inputs = {} for data_name in substudy_cls.data_spec_names(): mapped_name = substudy_spec.map(data_name) if mapped_name in self.input_names: mapped_inputs[data_name] = self.input(mapped_name) else: try: inpt = self.spec(mapped_name) except ArcanaMissingDataException: pass else: if inpt.derived: mapped_inputs[data_name] = inpt # Map parameters to the substudy mapped_parameters = {} for param_name in substudy_cls.param_spec_names(): mapped_name = substudy_spec.map(param_name) parameter = self._get_parameter(mapped_name) mapped_parameters[param_name] = parameter # Create sub-study substudy = substudy_spec.study_class( name + '_' + substudy_spec.name, repository, processor, mapped_inputs, parameters=mapped_parameters, enforce_inputs=False, subject_ids=self.subject_ids, visit_ids=self.visit_ids, clear_caches=False) # Append to dictionary of substudies if substudy_spec.name in self._substudies: raise ArcanaNameError( substudy_spec.name, "Duplicate sub-study names '{}'" .format(substudy_spec.name)) self._substudies[substudy_spec.name] = substudy
def map(self, name): try: return self._name_map[name] except KeyError: if name not in self.study_class.spec_names(): raise ArcanaNameError( name, ("'{}' doesn't match any filesets, fields, parameters " + "in the study class {} ('{}')") .format(name, self.name, self.study_class.__name__, "', '".join(self.study_class.spec_names()))) return self.apply_prefix(name)
def field(self, name, from_study=None): """ Gets the field named 'name' produced by the Study named 'study' if provided. If a spec is passed instead of a str to the name argument, then the study will be set from the spec iff it is derived Parameters ---------- name : str | BaseField The name of the field or a spec matching the given name study : str | None Name of the study that produced the field if derived. If None and a spec is passed instaed of string to the name argument then the study name will be taken from the spec instead. """ if isinstance(name, BaseField): if from_study is None and name.derived: from_study = name.study.name name = name.name try: return self._fields[(name, from_study)] except KeyError: available = [ d.name for d in self.fields if d.from_study == from_study ] other_studies = [ (d.from_study if d.from_study is not None else '<root>') for d in self.fields if d.name == name ] if other_studies: msg = (". NB: matching field(s) found for '{}' study(ies) " "('{}')".format(name, "', '".join(other_studies))) else: msg = '' raise ArcanaNameError(name, ("{} doesn't have a field named '{}'{} " + "(available '{}')").format( self, name, (" from study '{}'".format(from_study) if from_study is not None else ''), "', '".join(available), msg))
def data_spec(cls, name): """ Return the fileset_spec, i.e. the template of the fileset expected to be supplied or generated corresponding to the fileset_spec name. Parameters ---------- name : Str Name of the fileset_spec to return """ # If the provided "name" is actually a data item or parameter then # replace it with its name. if isinstance(name, BaseData): name = name.name try: return cls._data_specs[name] except KeyError: raise ArcanaNameError( name, "No fileset spec named '{}' in {}, available:\n{}".format( name, cls.__name__, "\n".join(list(cls._data_specs.keys()))))
def fileset(self, id, from_study=None, format=None): """ Gets the fileset with the ID 'id' produced by the Study named 'study' if provided. If a spec is passed instead of a str to the name argument, then the study will be set from the spec iff it is derived Parameters ---------- id : str | FilesetSpec The name of the fileset or a spec matching the given name from_study : str | None Name of the study that produced the fileset if derived. If None and a spec is passed instaed of string to the name argument then the study name will be taken from the spec instead. format : FileFormat | str | None Either the format of the fileset to return or the name of the format. If None and only a single fileset is found for the given name and study then that is returned otherwise an exception is raised """ if isinstance(id, BaseFileset): if from_study is None and id.derived: from_study = id.study.name id = id.name try: format_dct = self._filesets[(id, from_study)] except KeyError: available = [('{}(format={})'.format(f.id, f._resource_name) if f._resource_name is not None else f.id) for f in self.filesets if f.from_study == from_study] other_studies = [ (f.from_study if f.from_study is not None else '<root>') for f in self.filesets if f.id == id ] if other_studies: msg = (". NB: matching fileset(s) found for '{}' study(ies) " "('{}')".format(id, "', '".join(other_studies))) else: msg = '' raise ArcanaNameError( id, ("{} doesn't have a fileset named '{}'{} " "(available '{}'){}".format( self, id, (" from study '{}'".format(from_study) if from_study is not None else ''), "', '".join(available), msg))) else: if format is None: all_formats = list(format_dct.values()) if len(all_formats) > 1: raise ArcanaNameError( id, "Multiple filesets found for '{}'{} in {} with formats" " {}. Need to specify a format".format( id, ("in '{}'".format(from_study) if from_study is not None else ''), self, "', '".join(format_dct.keys()))) fileset = all_formats[0] else: try: if isinstance(format, str): fileset = format_dct[format] else: try: fileset = format_dct[format.ext] except KeyError: fileset = None for rname, rfileset in format_dct.items(): if rname in format.resource_names( self.tree.repository.type): # noqa pylint: disable=no-member fileset = rfileset break if fileset is None: raise except KeyError: raise ArcanaNameError( format, ("{} doesn't have a fileset named '{}'{} with " "format '{}' (available '{}'){}".format( self, id, (" from study '{}'".format(from_study) if from_study is not None else ''), format, "', '".join(format_dct.keys()), msg))) return fileset
def node(self, name): node = self.workflow.get_node('{}_{}'.format(self.name, name)) if node is None: raise ArcanaNameError( name, "{} doesn't have node named '{}'".format(self, name)) return node
def input(self, name): try: return self._inputs[name] except KeyError: raise ArcanaNameError( name, "{} doesn't have an input named '{}'".format(self, name))
def __init__(self, name, repository, processor, inputs, environment=None, parameters=None, subject_ids=None, visit_ids=None, enforce_inputs=True, fill_tree=False, clear_caches=True): try: # This works for PY3 as the metaclass inserts it itself if # it isn't provided metaclass = type(self).__dict__['__metaclass__'] if not issubclass(metaclass, StudyMetaClass): raise KeyError except KeyError: raise ArcanaUsageError( "Need to have StudyMetaClass (or a sub-class) as " "the metaclass of all classes derived from Study") if isinstance(repository, basestring): repository = BasicRepo(repository, depth=None) if isinstance(processor, basestring): processor = SingleProc(processor) if environment is None: environment = StaticEnv() self._name = name self._repository = repository self._processor = processor.bind(self) self._environment = environment self._inputs = {} self._subject_ids = (tuple(subject_ids) if subject_ids is not None else None) self._visit_ids = tuple(visit_ids) if visit_ids is not None else None self._fill_tree = fill_tree # Initialise caches for data collection and pipeline objects if clear_caches: self.clear_caches() # Set parameters if parameters is None: parameters = {} elif not isinstance(parameters, dict): # Convert list of parameters into dictionary parameters = {o.name: o for o in parameters} self._parameters = {} for param_name, param in list(parameters.items()): if not isinstance(param, Parameter): param = Parameter(param_name, param) try: param_spec = self._param_specs[param_name] except KeyError: raise ArcanaNameError( param_name, "Provided parameter '{}' is not present in the " "allowable parameters for {} classes ('{}')".format( param_name, type(self).__name__, "', '".join(self.param_spec_names()))) param_spec.check_valid(param, context=' {}(name={})'.format( type(self).__name__, name)) self._parameters[param_name] = param # Convert inputs to a dictionary if passed in as a list/tuple if not isinstance(inputs, dict): inputs = {i.name: i for i in inputs} else: # Convert string patterns into Input objects for inpt_name, inpt in list(inputs.items()): if isinstance(inpt, basestring): spec = self.data_spec(inpt_name) if spec.is_fileset: inpt = InputFilesets(inpt_name, pattern=inpt, is_regex=True) else: inpt = InputFields(inpt_name, pattern=inpt, dtype=spec.dtype, is_regex=True) inputs[inpt_name] = inpt # Check validity of study inputs for inpt_name, inpt in inputs.items(): try: spec = self.data_spec(inpt_name) except ArcanaNameError: raise ArcanaNameError( inpt.name, "Input name '{}' isn't in data specs of {} ('{}')".format( inpt.name, self.__class__.__name__, "', '".join(self._data_specs))) else: if spec.is_fileset: if inpt.is_field: raise ArcanaUsageError( "Passed field ({}) as input to fileset spec" " {}".format(inpt, spec)) elif not inpt.is_field: raise ArcanaUsageError( "Passed fileset ({}) as input to field spec {}".format( inpt, spec)) # "Bind" input selectors to the current study object, and attempt to # match with data in the repository input_errors = [] with self.repository: if not self.subject_ids: raise ArcanaUsageError( "No subject IDs provided and destination repository " "is empty") if not self.visit_ids: raise ArcanaUsageError( "No visit IDs provided and destination repository " "is empty") for inpt_name, inpt in list(inputs.items()): try: try: self._inputs[inpt_name] = bound_inpt = inpt.bind( self, spec_name=inpt_name) except ArcanaInputMissingMatchError as e: if not inpt.drop_if_missing: raise e else: spec = self.data_spec(inpt_name) if spec.is_fileset: if spec.derived: try: spec.format.converter_from( bound_inpt.format) except ArcanaNoConverterError as e: e.msg += ( ", which is requried to convert:\n" + "{} to\n{}.").format( e, bound_inpt, spec) raise e else: if bound_inpt.format not in spec.valid_formats: raise ArcanaUsageError( "Cannot pass {} as an input to {} as " "it is not in one of the valid formats" " ('{}')".format( bound_inpt, spec, "', '".join( f.name for f in spec.valid_formats))) except ArcanaInputError as e: # Collate errors across all inputs into a single error # message input_errors.append(e) if input_errors: raise ArcanaInputError('\n'.join(str(e) for e in input_errors)) # Check remaining specs are optional or have default values for spec in self.data_specs(): if spec.name not in self.input_names: if not spec.derived and spec.default is None: # Emit a warning if an acquired fileset has not been # provided for an "acquired fileset" msg = ( " input fileset '{}' was not provided to {}.".format( spec.name, self)) if spec.optional: logger.info('Optional' + msg) else: if enforce_inputs: raise ArcanaMissingInputError( 'Non-optional' + msg + " Pipelines depending " "on this fileset will not run")