Exemple #1
0
 def record(self, record):
     if self.name not in record.outputs:
         raise ArcanaNameError(
             self.name,
             "{} was not found in outputs {} of provenance record {}".
             format(self.name, record.outputs.keys(), record))
     self._record = record
Exemple #2
0
 def visit(self, id):
     try:
         return self._visits[str(id)]
     except KeyError:
         raise ArcanaNameError(
             id, ("{} doesn't have a visit named '{}' ('{}')".format(
                 self, id, "', '".join(self._visits))))
Exemple #3
0
    def record(self, pipeline_name, from_study):
        """
        Returns the provenance record for a given pipeline

        Parameters
        ----------
        pipeline_name : str
            The name of the pipeline that generated the record
        from_study : str
            The name of the study that the pipeline was generated from

        Returns
        -------
        record : arcana.provenance.Record
            The provenance record generated by the specified pipeline
        """
        try:
            return self._records[(pipeline_name, from_study)]
        except KeyError:
            found = []
            for sname, pnames in groupby(sorted(self._records,
                                                key=itemgetter(1)),
                                         key=itemgetter(1)):
                found.append("'{}' for '{}'".format(
                    "', '".join(p for p, _ in pnames), sname))
            raise ArcanaNameError(
                (pipeline_name, from_study),
                ("{} doesn't have a provenance record for pipeline '{}' "
                 "for '{}' study (found {})".format(
                     self, pipeline_name, from_study, '; '.join(found))))
Exemple #4
0
 def session(self, subject_id):
     try:
         return self._sessions[str(subject_id)]
     except KeyError:
         raise ArcanaNameError(
             subject_id,
             ("{} doesn't have a session named '{}' ('{}')".format(
                 self, subject_id, "', '".join(self._sessions))))
Exemple #5
0
 def substudy_spec(cls, name):
     try:
         return cls._substudy_specs[name]
     except KeyError:
         raise ArcanaNameError(
             name,
             "'{}' not found in sub-studes ('{}')"
             .format(name, "', '".join(cls._substudy_specs)))
Exemple #6
0
 def substudy(self, name):
     try:
         return self._substudies[name]
     except KeyError:
         raise ArcanaNameError(
             name,
             "'{}' not found in sub-studes ('{}')"
             .format(name, "', '".join(self._substudies)))
Exemple #7
0
 def param_spec(cls, name):
     try:
         return cls._param_specs[name]
     except KeyError:
         raise ArcanaNameError(
             name,
             "No parameter spec named '{}' in {}, available:\n{}".format(
                 name, cls.__name__,
                 "\n".join(list(cls._param_specs.keys()))))
Exemple #8
0
 def _get_parameter(self, name):
     try:
         parameter = self._parameters[name]
     except KeyError:
         try:
             parameter = self._param_specs[name]
         except KeyError:
             raise ArcanaNameError(
                 name, "Invalid parameter, '{}', in {} (valid '{}')".format(
                     name, self._param_error_location,
                     "', '".join(self.param_spec_names())))
     return parameter
Exemple #9
0
 def __init__(self, name, repository, processor, inputs,
              parameters=None, **kwargs):
     try:
         # This works for PY3 as the metaclass inserts it itself if
         # it isn't provided
         metaclass = type(self).__dict__['__metaclass__']
         if not issubclass(metaclass, MultiStudyMetaClass):
             raise KeyError
     except KeyError:
         raise ArcanaUsageError(
             "Need to set MultiStudyMetaClass (or sub-class) as "
             "the metaclass of all classes derived from "
             "MultiStudy")
     super(MultiStudy, self).__init__(
         name, repository, processor, inputs, parameters=parameters,
         **kwargs)
     self._substudies = {}
     for substudy_spec in self.substudy_specs():
         substudy_cls = substudy_spec.study_class
         # Map inputs, data_specs to the substudy
         mapped_inputs = {}
         for data_name in substudy_cls.data_spec_names():
             mapped_name = substudy_spec.map(data_name)
             if mapped_name in self.input_names:
                 mapped_inputs[data_name] = self.input(mapped_name)
             else:
                 try:
                     inpt = self.spec(mapped_name)
                 except ArcanaMissingDataException:
                     pass
                 else:
                     if inpt.derived:
                         mapped_inputs[data_name] = inpt
         # Map parameters to the substudy
         mapped_parameters = {}
         for param_name in substudy_cls.param_spec_names():
             mapped_name = substudy_spec.map(param_name)
             parameter = self._get_parameter(mapped_name)
             mapped_parameters[param_name] = parameter
         # Create sub-study
         substudy = substudy_spec.study_class(
             name + '_' + substudy_spec.name,
             repository, processor, mapped_inputs,
             parameters=mapped_parameters, enforce_inputs=False,
             subject_ids=self.subject_ids, visit_ids=self.visit_ids,
             clear_caches=False)
         # Append to dictionary of substudies
         if substudy_spec.name in self._substudies:
             raise ArcanaNameError(
                 substudy_spec.name,
                 "Duplicate sub-study names '{}'"
                 .format(substudy_spec.name))
         self._substudies[substudy_spec.name] = substudy
Exemple #10
0
 def map(self, name):
     try:
         return self._name_map[name]
     except KeyError:
         if name not in self.study_class.spec_names():
             raise ArcanaNameError(
                 name,
                 ("'{}' doesn't match any filesets, fields, parameters " +
                  "in the study class {} ('{}')")
                 .format(name, self.name,
                         self.study_class.__name__,
                         "', '".join(self.study_class.spec_names())))
         return self.apply_prefix(name)
Exemple #11
0
    def field(self, name, from_study=None):
        """
        Gets the field named 'name' produced by the Study named 'study' if
        provided. If a spec is passed instead of a str to the name argument,
        then the study will be set from the spec iff it is derived

        Parameters
        ----------
        name : str | BaseField
            The name of the field or a spec matching the given name
        study : str | None
            Name of the study that produced the field if derived. If None
            and a spec is passed instaed of string to the name argument then
            the study name will be taken from the spec instead.
        """
        if isinstance(name, BaseField):
            if from_study is None and name.derived:
                from_study = name.study.name
            name = name.name
        try:
            return self._fields[(name, from_study)]
        except KeyError:
            available = [
                d.name for d in self.fields if d.from_study == from_study
            ]
            other_studies = [
                (d.from_study if d.from_study is not None else '<root>')
                for d in self.fields if d.name == name
            ]
            if other_studies:
                msg = (". NB: matching field(s) found for '{}' study(ies) "
                       "('{}')".format(name, "', '".join(other_studies)))
            else:
                msg = ''
            raise ArcanaNameError(name,
                                  ("{} doesn't have a field named '{}'{} " +
                                   "(available '{}')").format(
                                       self, name,
                                       (" from study '{}'".format(from_study)
                                        if from_study is not None else ''),
                                       "', '".join(available), msg))
Exemple #12
0
    def data_spec(cls, name):
        """
        Return the fileset_spec, i.e. the template of the fileset expected to
        be supplied or generated corresponding to the fileset_spec name.

        Parameters
        ----------
        name : Str
            Name of the fileset_spec to return
        """
        # If the provided "name" is actually a data item or parameter then
        # replace it with its name.
        if isinstance(name, BaseData):
            name = name.name
        try:
            return cls._data_specs[name]
        except KeyError:
            raise ArcanaNameError(
                name,
                "No fileset spec named '{}' in {}, available:\n{}".format(
                    name, cls.__name__,
                    "\n".join(list(cls._data_specs.keys()))))
Exemple #13
0
    def fileset(self, id, from_study=None, format=None):
        """
        Gets the fileset with the ID 'id' produced by the Study named 'study'
        if provided. If a spec is passed instead of a str to the name argument,
        then the study will be set from the spec iff it is derived

        Parameters
        ----------
        id : str | FilesetSpec
            The name of the fileset or a spec matching the given name
        from_study : str | None
            Name of the study that produced the fileset if derived. If None
            and a spec is passed instaed of string to the name argument then
            the study name will be taken from the spec instead.
        format : FileFormat | str | None
            Either the format of the fileset to return or the name of the
            format. If None and only a single fileset is found for the given
            name and study then that is returned otherwise an exception is
            raised
        """
        if isinstance(id, BaseFileset):
            if from_study is None and id.derived:
                from_study = id.study.name
            id = id.name
        try:
            format_dct = self._filesets[(id, from_study)]
        except KeyError:
            available = [('{}(format={})'.format(f.id, f._resource_name)
                          if f._resource_name is not None else f.id)
                         for f in self.filesets if f.from_study == from_study]
            other_studies = [
                (f.from_study if f.from_study is not None else '<root>')
                for f in self.filesets if f.id == id
            ]
            if other_studies:
                msg = (". NB: matching fileset(s) found for '{}' study(ies) "
                       "('{}')".format(id, "', '".join(other_studies)))
            else:
                msg = ''
            raise ArcanaNameError(
                id, ("{} doesn't have a fileset named '{}'{} "
                     "(available '{}'){}".format(
                         self, id,
                         (" from study '{}'".format(from_study) if from_study
                          is not None else ''), "', '".join(available), msg)))
        else:
            if format is None:
                all_formats = list(format_dct.values())
                if len(all_formats) > 1:
                    raise ArcanaNameError(
                        id,
                        "Multiple filesets found for '{}'{} in {} with formats"
                        " {}. Need to specify a format".format(
                            id, ("in '{}'".format(from_study)
                                 if from_study is not None else ''), self,
                            "', '".join(format_dct.keys())))
                fileset = all_formats[0]
            else:
                try:
                    if isinstance(format, str):
                        fileset = format_dct[format]
                    else:
                        try:
                            fileset = format_dct[format.ext]
                        except KeyError:
                            fileset = None
                            for rname, rfileset in format_dct.items():
                                if rname in format.resource_names(
                                        self.tree.repository.type):  # noqa pylint: disable=no-member
                                    fileset = rfileset
                                    break
                            if fileset is None:
                                raise
                except KeyError:
                    raise ArcanaNameError(
                        format,
                        ("{} doesn't have a fileset named '{}'{} with "
                         "format '{}' (available '{}'){}".format(
                             self, id, (" from study '{}'".format(from_study)
                                        if from_study is not None else ''),
                             format, "', '".join(format_dct.keys()), msg)))

        return fileset
Exemple #14
0
 def node(self, name):
     node = self.workflow.get_node('{}_{}'.format(self.name, name))
     if node is None:
         raise ArcanaNameError(
             name, "{} doesn't have node named '{}'".format(self, name))
     return node
Exemple #15
0
 def input(self, name):
     try:
         return self._inputs[name]
     except KeyError:
         raise ArcanaNameError(
             name, "{} doesn't have an input named '{}'".format(self, name))
Exemple #16
0
 def __init__(self,
              name,
              repository,
              processor,
              inputs,
              environment=None,
              parameters=None,
              subject_ids=None,
              visit_ids=None,
              enforce_inputs=True,
              fill_tree=False,
              clear_caches=True):
     try:
         # This works for PY3 as the metaclass inserts it itself if
         # it isn't provided
         metaclass = type(self).__dict__['__metaclass__']
         if not issubclass(metaclass, StudyMetaClass):
             raise KeyError
     except KeyError:
         raise ArcanaUsageError(
             "Need to have StudyMetaClass (or a sub-class) as "
             "the metaclass of all classes derived from Study")
     if isinstance(repository, basestring):
         repository = BasicRepo(repository, depth=None)
     if isinstance(processor, basestring):
         processor = SingleProc(processor)
     if environment is None:
         environment = StaticEnv()
     self._name = name
     self._repository = repository
     self._processor = processor.bind(self)
     self._environment = environment
     self._inputs = {}
     self._subject_ids = (tuple(subject_ids)
                          if subject_ids is not None else None)
     self._visit_ids = tuple(visit_ids) if visit_ids is not None else None
     self._fill_tree = fill_tree
     # Initialise caches for data collection and pipeline objects
     if clear_caches:
         self.clear_caches()
     # Set parameters
     if parameters is None:
         parameters = {}
     elif not isinstance(parameters, dict):
         # Convert list of parameters into dictionary
         parameters = {o.name: o for o in parameters}
     self._parameters = {}
     for param_name, param in list(parameters.items()):
         if not isinstance(param, Parameter):
             param = Parameter(param_name, param)
         try:
             param_spec = self._param_specs[param_name]
         except KeyError:
             raise ArcanaNameError(
                 param_name,
                 "Provided parameter '{}' is not present in the "
                 "allowable parameters for {} classes ('{}')".format(
                     param_name,
                     type(self).__name__,
                     "', '".join(self.param_spec_names())))
         param_spec.check_valid(param,
                                context=' {}(name={})'.format(
                                    type(self).__name__, name))
         self._parameters[param_name] = param
     # Convert inputs to a dictionary if passed in as a list/tuple
     if not isinstance(inputs, dict):
         inputs = {i.name: i for i in inputs}
     else:
         # Convert string patterns into Input objects
         for inpt_name, inpt in list(inputs.items()):
             if isinstance(inpt, basestring):
                 spec = self.data_spec(inpt_name)
                 if spec.is_fileset:
                     inpt = InputFilesets(inpt_name,
                                          pattern=inpt,
                                          is_regex=True)
                 else:
                     inpt = InputFields(inpt_name,
                                        pattern=inpt,
                                        dtype=spec.dtype,
                                        is_regex=True)
                 inputs[inpt_name] = inpt
     # Check validity of study inputs
     for inpt_name, inpt in inputs.items():
         try:
             spec = self.data_spec(inpt_name)
         except ArcanaNameError:
             raise ArcanaNameError(
                 inpt.name,
                 "Input name '{}' isn't in data specs of {} ('{}')".format(
                     inpt.name, self.__class__.__name__,
                     "', '".join(self._data_specs)))
         else:
             if spec.is_fileset:
                 if inpt.is_field:
                     raise ArcanaUsageError(
                         "Passed field ({}) as input to fileset spec"
                         " {}".format(inpt, spec))
             elif not inpt.is_field:
                 raise ArcanaUsageError(
                     "Passed fileset ({}) as input to field spec {}".format(
                         inpt, spec))
     # "Bind" input selectors to the current study object, and attempt to
     # match with data in the repository
     input_errors = []
     with self.repository:
         if not self.subject_ids:
             raise ArcanaUsageError(
                 "No subject IDs provided and destination repository "
                 "is empty")
         if not self.visit_ids:
             raise ArcanaUsageError(
                 "No visit IDs provided and destination repository "
                 "is empty")
         for inpt_name, inpt in list(inputs.items()):
             try:
                 try:
                     self._inputs[inpt_name] = bound_inpt = inpt.bind(
                         self, spec_name=inpt_name)
                 except ArcanaInputMissingMatchError as e:
                     if not inpt.drop_if_missing:
                         raise e
                 else:
                     spec = self.data_spec(inpt_name)
                     if spec.is_fileset:
                         if spec.derived:
                             try:
                                 spec.format.converter_from(
                                     bound_inpt.format)
                             except ArcanaNoConverterError as e:
                                 e.msg += (
                                     ", which is requried to convert:\n" +
                                     "{} to\n{}.").format(
                                         e, bound_inpt, spec)
                                 raise e
                         else:
                             if bound_inpt.format not in spec.valid_formats:
                                 raise ArcanaUsageError(
                                     "Cannot pass {} as an input to {} as "
                                     "it is not in one of the valid formats"
                                     " ('{}')".format(
                                         bound_inpt, spec, "', '".join(
                                             f.name
                                             for f in spec.valid_formats)))
             except ArcanaInputError as e:
                 # Collate errors across all inputs into a single error
                 # message
                 input_errors.append(e)
     if input_errors:
         raise ArcanaInputError('\n'.join(str(e) for e in input_errors))
     # Check remaining specs are optional or have default values
     for spec in self.data_specs():
         if spec.name not in self.input_names:
             if not spec.derived and spec.default is None:
                 # Emit a warning if an acquired fileset has not been
                 # provided for an "acquired fileset"
                 msg = (
                     " input fileset '{}' was not provided to {}.".format(
                         spec.name, self))
                 if spec.optional:
                     logger.info('Optional' + msg)
                 else:
                     if enforce_inputs:
                         raise ArcanaMissingInputError(
                             'Non-optional' + msg + " Pipelines depending "
                             "on this fileset will not run")