Exemple #1
0
 def __init__(self,
              name,
              collection,
              frequency=None,
              format=None):  # @ReservedAssignment
     collection = list(collection)
     if collection:
         implicit_frequency = self._common_attr(collection, 'frequency')
         if frequency is None:
             frequency = implicit_frequency
         elif frequency != implicit_frequency:
             raise ArcanaUsageError(
                 "Implicit frequency '{}' does not match explicit "
                 "frequency '{}' for '{}' FilesetCollection".format(
                     implicit_frequency, frequency, name))
         implicit_format = self._common_attr(collection, 'format')
         if format is None:
             format = implicit_format  # @ReservedAssignment
         elif format != implicit_format:
             raise ArcanaUsageError(
                 "Implicit format '{}' does not match explicit "
                 "format '{}' for '{}' FilesetCollection".format(
                     implicit_format, format, name))
     if frequency is None:
         raise ArcanaUsageError(
             "Need to provide explicit frequency for empty "
             "FilesetCollection")
     if format is None:
         raise ArcanaUsageError("Need to provide explicit format for empty "
                                "FilesetCollection")
     BaseFileset.__init__(self, name, format, frequency=frequency)
     BaseCollection.__init__(self, collection, frequency)
Exemple #2
0
 def __init__(
         self,
         name,
         format,
         pattern=None,  # @ReservedAssignment @IgnorePep8
         frequency='per_session',
         id=None,  # @ReservedAssignment @IgnorePep8
         order=None,
         dicom_tags=None,
         is_regex=False,
         from_study=None,
         repository=None,
         study_=None,
         collection_=None):
     if pattern is None and id is None:
         raise ArcanaUsageError(
             "Either 'pattern' or 'id' need to be provided to "
             "FilesetSelector constructor")
     BaseFileset.__init__(self, name, format, frequency)
     BaseMatch.__init__(self, pattern, is_regex, order, from_study,
                        repository, study_, collection_)
     if dicom_tags is not None and format.name != 'dicom':
         raise ArcanaUsageError(
             "Cannot use 'dicom_tags' kwarg with non-DICOM "
             "format ({})".format(format))
     self._dicom_tags = dicom_tags
     if order is not None and id is not None:
         raise ArcanaUsageError(
             "Cannot provide both 'order' and 'id' to a fileset"
             "match")
     self._id = str(id) if id is not None else id
Exemple #3
0
 def __init__(self, name, default, choices=None, desc=None, dtype=None):
     super(SwitchSpec, self).__init__(name, default, desc=desc, dtype=dtype)
     if self.is_boolean:
         if choices is not None:
             raise ArcanaUsageError(
                 "Choices ({}) are only valid for non-boolean "
                 "switches ('{}')".format("', '".join(choices), name))
     elif choices is None:
         raise ArcanaUsageError("Choices must be provided for non-boolean "
                                "switches ('{}')".format(name))
     self._choices = tuple(choices) if choices is not None else None
     self._desc = desc
Exemple #4
0
 def check_valid(self, parameter, context=None):
     if parameter.value is not None:
         error_msg = ("Incorrect datatype for '{}' parameter provided "
                      "({}){}, Should be {}".format(
                          parameter.name, type(parameter.value),
                          'in ' + context if context is not None else '',
                          self.dtype))
         if self.array:
             if any(not isinstance(v, self.dtype) for v in parameter.value):
                 raise ArcanaUsageError(error_msg + ' array')
         elif not isinstance(parameter.value, self.dtype):
             raise ArcanaUsageError(error_msg)
Exemple #5
0
 def check_valid(self, switch, context=''):
     super(SwitchSpec, self).check_valid(switch, context=context)
     if self.is_boolean:
         if not isinstance(switch.value, bool):
             raise ArcanaUsageError(
                 "Value provided to switch '{}'{} should be a "
                 "boolean (not {})".format(self.name, context,
                                           switch.value))
     elif switch.value not in self.choices:
         raise ArcanaUsageError(
             "Value provided to switch '{}'{} ({}) is not a valid "
             "choice ('{}')".format(self.name, context, switch.value,
                                    "', '".join(self.choices)))
Exemple #6
0
 def _list_outputs(self):
     outputs = self._outputs().get()
     new_files = set(os.listdir(os.getcwd())) - self.listdir_before
     if len(new_files) > 1:
         raise ArcanaUsageError(
             "Zip repositorys can only contain a single directory, found '{}'"
             .format("', '".join(new_files)))
     try:
         unzipped = next(iter(new_files))
     except StopIteration:
         raise ArcanaUsageError(
             "No files or directories found in unzipped directory")
     outputs['gunzipped'] = op.join(os.getcwd(), unzipped)
     return outputs
Exemple #7
0
    def segmentation_pipeline(self, img_type=2, **kwargs):
        pipeline = self.create_pipeline(
            name='FAST_segmentation',
            inputs=[DatasetSpec('brain', nifti_gz_format)],
            outputs=[DatasetSpec('wm_seg', nifti_gz_format)],
            desc="White matter segmentation of the reference image",
            version=1,
            citations=[fsl_cite],
            **kwargs)

        fast = pipeline.create_node(fsl.FAST(),
                                    name='fast',
                                    requirements=[fsl509_req])
        fast.inputs.img_type = img_type
        fast.inputs.segments = True
        fast.inputs.out_basename = 'Reference_segmentation'
        pipeline.connect_input('brain', fast, 'in_files')
        split = pipeline.create_node(Split(), name='split')
        split.inputs.splits = [1, 1, 1]
        split.inputs.squeeze = True
        pipeline.connect(fast, 'tissue_class_files', split, 'inlist')
        if img_type == 1:
            pipeline.connect_output('wm_seg', split, 'out3')
        elif img_type == 2:
            pipeline.connect_output('wm_seg', split, 'out2')
        else:
            raise ArcanaUsageError(
                "'img_type' parameter can either be 1 or 2 (not {})".format(
                    img_type))

        return pipeline
Exemple #8
0
 def __init__(
         self,
         name,
         primary_match,
         format,
         association,  # @ReservedAssignment @IgnorePep8
         fieldmap_type=None,
         order=0):
     FilesetSelector.__init__(
         self,
         name,
         format,
         pattern=None,
         frequency='per_session',  # @ReservedAssignment @IgnorePep8
         id=None,
         order=order,
         dicom_tags=None,
         is_regex=False,
         from_study=None)
     self._primary_match = primary_match
     self._association = association
     if fieldmap_type is not None and association != 'fieldmap':
         raise ArcanaUsageError("'fieldmap_type' (provided to '{}' match) "
                                "is only valid for 'fieldmap' "
                                "associations (not '{}')".format(
                                    name, association))
     self._fieldmap_type = fieldmap_type
Exemple #9
0
 def _common_attr(self, collection, attr_name):
     attr_set = set(getattr(c, attr_name) for c in collection)
     if len(attr_set) != 1:
         raise ArcanaUsageError(
             "Heterogeneous attributes for '{}' within {}".format(
                 attr_name, self))
     return next(iter(attr_set))
Exemple #10
0
 def from_path(cls, path, frequency='per_session', format=None,  # @ReservedAssignment @IgnorePep8
               **kwargs):
     if not os.path.exists(path):
         raise ArcanaUsageError(
             "Attempting to read Fileset from path '{}' but it "
             "does not exist".format(path))
     if os.path.isdir(path):
         within_exts = frozenset(
             split_extension(f)[1] for f in os.listdir(path)
             if not f.startswith('.'))
         if format is None:
             # Try to guess format
             try:
                 format = FileFormat.by_within_dir_exts(within_exts)  # @ReservedAssignment @IgnorePep8
             except ArcanaFileFormatNotRegisteredError:
                 # Fall back to general directory format
                 format = directory_format  # @ReservedAssignment
         name = os.path.basename(path)
     else:
         filename = os.path.basename(path)
         name, ext = split_extension(filename)
         if format is None:
             try:
                 format = FileFormat.by_ext(ext)  # @ReservedAssignment @IgnorePep8
             except ArcanaFileFormatNotRegisteredError as e:
                 raise ArcanaFileFormatNotRegisteredError(
                     str(e) + ", which is required to identify the "
                     "format of the fileset at '{}'".format(path))
     return cls(name, format, frequency=frequency,
                path=path, **kwargs)
Exemple #11
0
 def header_info_extraction_pipeline(self, **kwargs):
     if self.input('primary').format != dicom_format:
         raise ArcanaUsageError(
             "Can only extract header info if 'primary' dataset "
             "is provided in DICOM format ({})".format(
                 self.input('primary').format))
     return self.header_info_extraction_pipeline_factory(
         'header_info_extraction', 'primary', **kwargs)
Exemple #12
0
 def __init__(self,
              name,
              collection,
              frequency=None,
              dtype=None,
              array=None):
     collection = list(collection)
     if collection:
         implicit_frequency = self._common_attr(collection, 'frequency')
         if frequency is None:
             frequency = implicit_frequency
         elif frequency != implicit_frequency:
             raise ArcanaUsageError(
                 "Implicit frequency '{}' does not match explicit "
                 "frequency '{}' for '{}' FilesetCollection".dtype(
                     implicit_frequency, frequency, name))
         implicit_dtype = self._common_attr(collection, 'dtype')
         if dtype is None:
             dtype = implicit_dtype  # @ReservedAssignment
         elif dtype != implicit_dtype:
             raise ArcanaUsageError(
                 "Implicit dtype '{}' does not match explicit "
                 "dtype '{}' for '{}' FilesetCollection".dtype(
                     implicit_dtype, dtype, name))
         implicit_array = self._common_attr(collection, 'array')
         if array is None:
             array = implicit_array
         elif array != implicit_array:
             raise ArcanaUsageError(
                 "Implicit array '{}' does not match explicit "
                 "array '{}' for '{}' FilesetCollection".dtype(
                     implicit_array, array, name))
     if frequency is None:
         raise ArcanaUsageError(
             "Need to provide explicit frequency for empty "
             "FilesetCollection")
     if dtype is None:
         raise ArcanaUsageError("Need to provide explicit dtype for empty "
                                "FilesetCollection")
     BaseField.__init__(self,
                        name,
                        dtype=dtype,
                        frequency=frequency,
                        array=array)
     BaseCollection.__init__(self, collection, frequency)
Exemple #13
0
 def __init__(self, name, desc=None, optional=False, default=None):
     if optional and default is not None:
         raise ArcanaUsageError(
             "'optional' doesn't make sense for specs ('{}') with default "
             "values".format(name))
     self._desc = desc
     self._study = None
     self._optional = optional
     # Set the name of the default collection-like object so it matches
     # the name of the spec
     if default is not None:
         if default.frequency != self.frequency:
             raise ArcanaUsageError(
                 "Frequency of default collection-like object passed to "
                 "'{}' spec ('{}'), does not match spec ('{}')".format(
                     name, default.freqency, self.frequency))
         default = deepcopy(default)
     self._default = default
Exemple #14
0
 def __init__(self, name, pipeline_name, desc=None):
     if pipeline_name is not None:
         if not isinstance(pipeline_name, basestring):
             raise ArcanaUsageError(
                 "Pipeline name for {} '{}' is not a string "
                 "'{}'".format(name, pipeline_name))
     self._pipeline_name = pipeline_name
     self._desc = desc
     self._study = None
     self._collection = None
Exemple #15
0
 def path_depth(self, dpath):
     relpath = op.relpath(dpath, self.root_dir)
     if '..' in relpath:
         raise ArcanaUsageError(
             "Path '{}' is not a sub-directory of '{}'".format(
                 dpath, self.root_dir))
     elif relpath == '.':
         depth = 0
     else:
         depth = relpath.count(op.sep) + 1
     return depth
Exemple #16
0
 def __init__(self, name, value):
     self._name = name
     if value is None:
         self._dtype = None
     else:
         if not isinstance(value, (int, float, str, tuple, list)):
             raise ArcanaUsageError(
                 "Invalid type for '{}' parameter default ({}), {}, "
                 "can be one of int, float or str".format(
                     name, value, type(value)))
         self._dtype = (str if isinstance(value, str) else type(value))
     self._value = value
Exemple #17
0
 def bind(self, study):
     """
     Used for duck typing Collection objects with Spec and Match
     in source and sink initiation. Checks IDs match sessions in study.
     """
     if self.frequency == 'per_subject':
         tree_subject_ids = list(study.tree.subject_ids)
         subject_ids = list(self._collection.keys())
         if tree_subject_ids != subject_ids:
             raise ArcanaUsageError(
                 "Subject IDs in collection provided to '{}' ('{}') "
                 "do not match Study tree ('{}')".format(
                     self.name, "', '".join(subject_ids),
                     "', '".join(tree_subject_ids)))
     elif self.frequency == 'per_visit':
         tree_visit_ids = list(study.tree.visit_ids)
         visit_ids = list(self._collection.keys())
         if tree_visit_ids != visit_ids:
             raise ArcanaUsageError(
                 "Subject IDs in collection provided to '{}' ('{}') "
                 "do not match Study tree ('{}')".format(
                     self.name, "', '".join(visit_ids),
                     "', '".join(tree_visit_ids)))
     elif self.frequency == 'per_session':
         for subject in study.tree.subjects:
             if subject.id not in self._collection:
                 raise ArcanaUsageError(
                     "Study subject ID '{}' was not found in colleciton "
                     "provided to '{}' (found '{}')".format(
                         subject.id, self.name,
                         "', '".join(self._collection.keys())))
             for session in subject.sessions:
                 if session.visit_id not in self._collection[subject.id]:
                     raise ArcanaUsageError(
                         "Study visit ID '{}' for subject '{}' was not "
                         "found in colleciton provided to '{}' (found '{}')"
                         .format(
                             subject.id, self.name, "', '".join(
                                 self._collection[subject.id].keys())))
Exemple #18
0
 def __init__(self, collection, frequency):
     self._frequency = frequency
     if collection:
         self._repository = self._common_attr(collection, 'repository')
         self._from_study = self._common_attr(collection, 'from_study')
     else:
         self._repository = None
         self._from_study = None
     if frequency == 'per_study':
         # If wrapped in an iterable
         if not isinstance(collection, self.CollectedClass):
             if len(collection) > 1:
                 raise ArcanaUsageError(
                     "More than one {} passed to {}".format(
                         self.CONTAINED_CLASS.__name__,
                         type(self).__name__))
             collection = list(collection)
         self._collection = collection
     elif frequency == 'per_session':
         self._collection = OrderedDict()
         for subj_id in sorted(set(c.subject_id for c in collection)):
             self._collection[subj_id] = OrderedDict(
                 sorted(((c.visit_id, c)
                         for c in collection if c.subject_id == subj_id),
                        key=itemgetter(0)))
     elif frequency == 'per_subject':
         self._collection = OrderedDict(
             sorted(((c.subject_id, c) for c in collection),
                    key=itemgetter(0)))
     elif frequency == 'per_visit':
         self._collection = OrderedDict(
             sorted(((c.visit_id, c) for c in collection),
                    key=itemgetter(0)))
     else:
         assert False
     for datum in self:
         if not isinstance(datum, self.CollectedClass):
             raise ArcanaUsageError("Invalid class {} in {}".format(
                 datum, self))
Exemple #19
0
 def assertContentsEqual(self, collection, reference, context=None):
     if isinstance(collection, Fileset):
         collection = [collection]
     if isinstance(reference, (basestring, int, float)):
         if len(collection) != 1:
             raise ArcanaUsageError(
                 "Multi-subject/visit collections cannot be compared"
                 " against a single contents string (list or dict "
                 "should be provided)")
         references = [str(reference)]
         filesets = list(collection)
     elif isinstance(reference, dict):
         references = []
         filesets = []
         for subj_id, subj_dct in references.items():
             for visit_id, ref_value in subj_dct.items():
                 references.append(str(ref_value))
                 filesets.append(collection.item(subject_id=subj_id,
                                                 visit_id=visit_id))
     elif isinstance(reference, (list, tuple)):
         references = [str(r) for r in reference]
         filesets = list(collection)
         if len(references) != len(filesets):
             raise ArcanaUsageError(
                 "Number of provided references ({}) does not match"
                 " size of collection ({})".format(len(references),
                                                   len(filesets)))
     else:
         raise ArcanaUsageError(
             "Unrecognised format for reference ({})"
             .format(reference))
     for fileset, ref in zip(filesets, references):
         with open(fileset.path) as f:
             contents = f.read()
         msg = ("Contents of {} ({}) do not match reference ({})"
                .format(fileset, contents, ref))
         if context is not None:
             msg += 'for ' + context
         self.assertEqual(contents, ref, msg)
Exemple #20
0
 def __init__(self, name, default, desc=None, dtype=None, array=False):
     super(ParameterSpec, self).__init__(name, default)
     self._desc = desc
     self._array = array
     if dtype is not None:
         if self.default is not None and (
                 not array and not isinstance(self.default, dtype)
                 or array and any(not isinstance(d, dtype)
                                  for d in self.default)):
             raise ArcanaUsageError(
                 "Provided default value ({}) does not match explicit "
                 "dtype ({})".format(self.default, dtype))
         self._dtype = dtype
Exemple #21
0
 def derivable(self):
     """
     Whether the spec (only valid for derived specs) can be derived
     given the inputs and switches provided to the study
     """
     if not self.derived:
         raise ArcanaUsageError("'{}' is not a derived {}".format(
             self.name, type(self)))
     try:
         for inpt in self.pipeline.study_inputs:
             self.study.spec(inpt.name)
     except (ArcanaOutputNotProducedException, ArcanaMissingDataException):
         return False
     return True
Exemple #22
0
def parse_single_value(value):
    """
    Tries to convert to int, float and then gives up and assumes the value
    is of type string. Useful when excepting values that may be string
    representations of numerical values
    """
    if isinstance(value, (int, float)):
        return value
    try:
        value = int(value)
    except ValueError:
        try:
            value = float(value)
        except ValueError:
            if not isinstance(value, basestring):
                raise ArcanaUsageError(
                    "Unrecognised value type {}".format(value))
    return value
Exemple #23
0
 def pipeline(self):
     if self.pipeline_name is None:
         raise ArcanaUsageError(
             "{} is an acquired data spec so doesn't have a pipeline".
             format(self))
     try:
         getter = getattr(self.study, self.pipeline_name)
     except AttributeError:
         raise ArcanaError(
             "There is no pipeline method named '{}' in present in "
             "'{}' study".format(self.pipeline_name, self.study))
     # Set up study to record which parameters
     # referenced during the pipeline generation
     self.study._pipeline_to_generate = self.pipeline_name
     self.study._referenced_parameters = set()
     try:
         pipeline = getter()
         if pipeline is None:
             raise ArcanaDesignError(
                 "'{}' pipeline constructor in {} is missing return "
                 "statement (should return a Pipeline object)".format(
                     self.pipeline_name, self.study))
         # Copy referenced parameters to pipeline
         pipeline._referenced_parameters = (
             self.study._referenced_parameters)
     except AttributeError as e:
         # Need to capture this as exception to avoid it getting
         # confused with specs that don't have pipelines
         raise ArcanaError("AttributeError was thrown attempting to "
                           "construct '{}': {}".format(
                               self.pipeline_name, e))
     finally:
         # Reset referenced parameters after generating pipeline
         self.study._pipeline_to_generate = None
         self.study._referenced_parameters = None
     if self.name not in pipeline.output_names:
         raise ArcanaOutputNotProducedException(
             "'{}' is not produced by {} pipeline in {} class given the "
             "provided  switches ({}) and the missing inputs ('{}')".format(
                 self.name, pipeline.name, self.study.__class__.__name__,
                 ', '.join('{}={}'.format(s.name, s.value)
                           for s in self.study.switches),
                 "', '".join(self.study.missing_inputs)))
     return pipeline
Exemple #24
0
    def branch(self, name, values=None):  # @UnusedVariable @IgnorePep8
        """
        Checks whether the given switch matches the value provided

        Parameters
        ----------
        name : str
            The name of the parameter to retrieve
        value : str | None
            The value(s) of the switch to match if a non-boolean switch
        """
        if isinstance(values, basestring):
            values = [values]
        spec = self.parameter_spec(name)
        if not isinstance(spec, SwitchSpec):
            raise ArcanaUsageError(
                "{} is standard parameter not a switch".format(spec))
        switch = self._get_parameter(name)
        if spec.is_boolean:
            if values is not None:
                raise ArcanaDesignError(
                    "Should not provide values ({}) to boolean switch "
                    "'{}' in {}".format(values, name,
                                        self._param_error_location))
            in_branch = switch.value
        else:
            if values is None:
                raise ArcanaDesignError(
                    "Value(s) need(s) to be provided non-boolean switch"
                    " '{}' in {}".format(name, self._param_error_location))
            # Register parameter as being used by the pipeline
            unrecognised_values = set(values) - set(spec.choices)
            if unrecognised_values:
                raise ArcanaDesignError(
                    "Provided value(s) ('{}') for switch '{}' in {} "
                    "is not a valid option ('{}')".format(
                        "', '".join(unrecognised_values), name,
                        self._param_error_location, "', '".join(spec.choices)))
            in_branch = switch.value in values
        if self._referenced_parameters is not None:
            self._referenced_parameters.add(name)
        return in_branch
Exemple #25
0
def parse_value(value):
    # Split strings with commas into lists
    if ',' in value:
        value = value.split(',')
    # Cast all iterables (except strings) into lists
    if not isinstance(value, basestring):
        try:
            value = list(value)
        except TypeError:
            pass
    if isinstance(value, list):
        value = [parse_single_value(v) for v in value]
        # Check to see if datatypes are consistent
        dtypes = set(type(v) for v in value)
        if dtypes == set((float, int)):
            # If both ints and floats are presents, cast to floats
            value = [float(v) for v in value]
        elif len(dtypes) > 1:
            raise ArcanaUsageError(
                "Inconsistent datatypes in values array ({})".format(value))
    else:
        value = parse_single_value(value)
    return value
Exemple #26
0
    def run(self, *pipelines, **kwargs):
        """
        Connects all pipelines to that study's repository and runs them
        in the same NiPype workflow

        Parameters
        ----------
        pipeline(s) : Pipeline, ...
            The pipeline to connect to repository
        subject_ids : list[str]
            The subset of subject IDs to process. If None all available will be
            processed. Note this is not a duplication of the study
            and visit IDs passed to the Study __init__, as they define the
            scope of the analysis and these simply limit the scope of the
            current run (e.g. to break the analysis into smaller chunks and
            run separately). Therefore, if the analysis joins over subjects,
            then all subjects will be processed and this parameter will be
            ignored.
        visit_ids : list[str]
            The same as 'subject_ids' but for visit IDs
        session_ids : list[str,str]
            The same as 'subject_ids' and 'visit_ids', except specifies a set
            of specific combinations in tuples of (subject ID, visit ID).
        force : bool | 'all'
            A flag to force the reprocessing of all sessions in the filter
            array, regardless of whether the parameters|pipeline used
            to generate them matches the current ones. NB: if True only the
            final pipeline will be reprocessed (prerequisite pipelines won't
            run unless they don't match provenance). To process all
            prerequisite pipelines 'all' should be passed to force.

        Returns
        -------
        report : ReportNode
            The final report node, which can be connected to subsequent
            pipelines
        """
        if not pipelines:
            raise ArcanaUsageError(
                "No pipelines provided to {}.run".format(self))
        # Get filter kwargs  (NB: in Python 3 they could be in the arg list)
        subject_ids = kwargs.pop('subject_ids', [])
        visit_ids = kwargs.pop('visit_ids', [])
        session_ids = kwargs.pop('session_ids', [])
        clean_work_dir = kwargs.pop('clean_work_dir',
                                    self._clean_work_dir_between_runs)
        # Create name by combining pipelines
        name = '_'.join(p.name for p in pipelines)
        # Clean work dir if required
        if clean_work_dir:
            workflow_work_dir = op.join(self.work_dir, name)
            if op.exists(workflow_work_dir):
                shutil.rmtree(workflow_work_dir)
        # Trim the end of very large names to avoid problems with
        # workflow names exceeding system limits.
        name = name[:WORKFLOW_MAX_NAME_LEN]
        workflow = pe.Workflow(name=name, base_dir=self.work_dir)
        already_connected = {}
        # Generate filter array to optionally restrict the run to certain
        # subject and visit IDs.
        tree = self.study.tree
        # Create maps from the subject|visit IDs to an index used to represent
        # them in the filter array
        subject_inds = {s.id: i for i, s in enumerate(tree.subjects)}
        visit_inds = {v.id: i for i, v in enumerate(tree.visits)}
        if not subject_ids and not visit_ids and not session_ids:
            # No filters applied so create a full filter array
            filter_array = np.ones((len(subject_inds), len(visit_inds)),
                                   dtype=bool)
        else:
            # Filters applied so create an empty filter array and populate
            # from filter lists
            filter_array = np.zeros((len(subject_inds), len(visit_inds)),
                                    dtype=bool)
            for subj_id in subject_ids:
                filter_array[subject_inds[subj_id], :] = True
            for visit_id in visit_ids:
                filter_array[:, visit_inds[visit_id]] = True
            for subj_id, visit_id in session_ids:
                filter_array[subject_inds[subj_id],
                             visit_inds[visit_id]] = True
            if not filter_array.any():
                raise ArcanaUsageError(
                    "Provided filters:\n" +
                    ("  subject_ids: {}\n".format(', '.join(subject_ids))
                     if subject_ids is not None else '') +
                    ("  visit_ids: {}\n".format(', '.join(visit_ids))
                     if visit_ids is not None else '') +
                    ("  session_ids: {}\n".format(', '.join(session_ids))
                     if session_ids is not None else '') +
                    "Did not match any sessions in the project:\n" +
                    "  subject_ids: {}\n".format(', '.join(subject_inds)) +
                    "  visit_ids: {}\n".format(', '.join(visit_inds)))
        for pipeline in pipelines:
            try:
                self._connect_pipeline(pipeline,
                                       workflow,
                                       subject_inds,
                                       visit_inds,
                                       filter_array,
                                       already_connected=already_connected,
                                       **kwargs)
            except ArcanaNoRunRequiredException:
                logger.info("Not running '{}' pipeline as its outputs "
                            "are already present in the repository".format(
                                pipeline.name))
        # Reset the cached tree of filesets in the repository as it will
        # change after the pipeline has run.
        self.study.repository.clear_cache()
        #         workflow.write_graph(graph2use='flat', format='svg')
        #         print('Graph saved in {} directory'.format(os.getcwd()))
        return workflow.run(plugin=self._plugin)
Exemple #27
0
    def _to_process(self, pipeline, filter_array, subject_inds, visit_inds,
                    force):
        """
        Check whether the outputs of the pipeline are present in all sessions
        in the project repository and were generated with matching parameters
        and pipelines. Return an 2D boolean array (subjects: rows,
        visits: cols) with the sessions to process marked True.

        Parameters
        ----------
        pipeline : Pipeline
            The pipeline to determine the sessions to process
        filter_array : 2-D numpy.array[bool]
            A two-dimensional boolean array, where rows and columns correspond
            correspond to subjects and visits in the repository tree. True
            values represent a subject/visit ID pairs to include
            in the current round of processing. Note that if the 'force'
            flag is not set, sessions won't be reprocessed unless the
            parameters and pipeline version saved in the provenance doesn't
            match that of the given pipeline.
        subject_inds : dict[str,int]
            Mapping from subject ID to index in filter|to_process arrays
        visit_inds : dict[str,int]
            Mapping from visit ID to index in filter|to_process arrays
        force : bool
            Whether to force reprocessing of all (filtered) sessions or not

        Returns
        -------
        to_process : 2-D numpy.array[bool]
            A two-dimensional boolean array, where rows correspond to
            subjects and columns correspond to visits in the repository. True
            values represent subject/visit ID pairs to run the pipeline for
        """
        # Check to see if the pipeline has any low frequency outputs, because
        # if not then each session can be processed indepdently. Otherwise,
        # the "session matrix" (as defined by subject_ids and visit_ids
        # passed to the Study class) needs to be complete, i.e. a session
        # exists (with the full complement of requird inputs) for each
        # subject/visit ID pair.
        tree = self.study.tree
        low_freq_outputs = [
            o.name for o in pipeline.outputs if o.frequency != 'per_session'
        ]
        if low_freq_outputs and list(tree.incomplete_subjects):
            raise ArcanaUsageError(
                "Can't process '{}' pipeline as it has low frequency outputs "
                "(i.e. outputs that aren't of 'per_session' frequency) "
                "({}) and subjects ({}) that are missing one "
                "or more visits ({}). Please restrict the subject/visit "
                "IDs in the study __init__ to continue the analysis".format(
                    self.name, ', '.join(low_freq_outputs),
                    ', '.join(s.id for s in tree.incomplete_subjects),
                    ', '.join(v.id for v in tree.incomplete_visits)))
        # Initialise an array of sessions to process
        to_process = np.zeros((len(subject_inds), len(visit_inds)), dtype=bool)
        for output in pipeline.frequency_outputs('per_study'):
            collection = self.study.spec(output).collection
            # Include all sessions if a per-study output needs to be
            # reprocessed. Note that this will almost always be the case if
            # any other output needs to be reprocessed.
            #
            # NB: Filter array should always have at least one true value at
            # this point
            if pipeline.metadata_mismatch(collection.item()) or force:
                to_process[:] = True
                # No point continuing since to_process array is already full
                return to_process
        for output in pipeline.frequency_outputs('per_subject'):
            collection = self.study.spec(output).collection
            for item in collection:
                i = subject_inds[item.subject_id]
                # NB: The output will be reprocessed using data from every
                # visit of each subject. However, the visits to include in the
                # analysis can be specified the initialisation of the Study.
                if ((pipeline.metadata_mismatch(item) or force)
                        and filter_array[i, :].any()):
                    to_process[i, :] = True
        for output in pipeline.frequency_outputs('per_visit'):
            collection = self.study.spec(output).collection
            for item in collection:
                j = visit_inds[item.visit_id]
                # NB: The output will be reprocessed using data from every
                # subject of each vist. However, the subject to include in the
                # analysis can be specified the initialisation of the Study.
                if ((pipeline.metadata_mismatch(item) or force)
                        and filter_array[:, j].any()):
                    to_process[:, j] = True
        for output in pipeline.frequency_outputs('per_session'):
            collection = self.study.spec(output).collection
            for item in collection:
                i = subject_inds[item.subject_id]
                j = visit_inds[item.visit_id]
                if ((pipeline.metadata_mismatch(item) or force)
                        and filter_array[i, j]):
                    to_process[i, j] = True
        if not to_process.any():
            raise ArcanaNoRunRequiredException(
                "No sessions to process for '{}' pipeline".format(
                    pipeline.name))
        return to_process
Exemple #28
0
    def data(self, name, subject_id=None, visit_id=None, **kwargs):
        """
        Returns the Fileset or Field associated with the name,
        generating derived filesets as required. Multiple names in a
        list can be provided, in which case their workflows are
        joined into a single workflow.

        Parameters
        ----------
        name : str | List[str]
            The name of the FilesetSpec|FieldSpec to retried the
            filesets for
        subject_id : int | str | List[int|str] | None
            The subject ID or subject IDs to return. If None all are
            returned
        visit_id : int | str | List[int|str] | None
            The visit ID or visit IDs to return. If None all are
            returned

        Returns
        -------
        data : Fileset | Field | List[Fileset | Field] | List[List[Fileset | Field]]
            If a single name is provided then data is either a single
            Fileset or field if a single subject_id and visit_id are
            provided, otherwise a list of filesets or fields
            corresponding to the given name. If muliple names are
            provided then a list is returned containing the data for
            each provided name.
        """
        if isinstance(name, basestring):
            single_name = True
            names = [name]
        else:
            names = name
            single_name = False

        def is_single_id(id_):  # @IgnorePep8
            return isinstance(id_, (basestring, int))

        subject_ids = ([subject_id]
                       if is_single_id(subject_id) else subject_id)
        visit_ids = ([visit_id] if is_single_id(visit_id) else visit_id)
        # Work out which pipelines need to be run
        pipelines = []
        for name in names:
            try:
                pipeline = self.spec(name).pipeline
                pipeline.required_outputs.add(name)
                pipelines.append(pipeline)
            except AttributeError:
                pass  # Match objects don't have pipelines
        # Run all pipelines together
        if pipelines:
            self.processor.run(*pipelines,
                               subject_ids=subject_ids,
                               visit_ids=visit_ids,
                               **kwargs)
        all_data = []
        for name in names:
            spec = self.spec(name)
            data = spec.collection
            if subject_ids is not None and spec.frequency in ('per_session',
                                                              'per_subject'):
                data = [d for d in data if d.subject_id in subject_ids]
            if visit_ids is not None and spec.frequency in ('per_session',
                                                            'per_visit'):
                data = [d for d in data if d.visit_id in visit_ids]
            if not data:
                raise ArcanaUsageError(
                    "No matching data found (subject_id={}, visit_id={})".
                    format(subject_id, visit_id))
            if is_single_id(subject_id) and is_single_id(visit_id):
                assert len(data) == 1
                data = data[0]
            else:
                data = spec.CollectionClass(spec.name, data)
            if single_name:
                return data
            all_data.append(data)
        return all_data
Exemple #29
0
 def __init__(self,
              name,
              repository,
              processor,
              inputs,
              environment=None,
              parameters=None,
              subject_ids=None,
              visit_ids=None,
              enforce_inputs=True,
              reprocess=False,
              fill_tree=False):
     try:
         # This works for PY3 as the metaclass inserts it itself if
         # it isn't provided
         metaclass = type(self).__dict__['__metaclass__']
         if not issubclass(metaclass, StudyMetaClass):
             raise KeyError
     except KeyError:
         raise ArcanaUsageError(
             "Need to have StudyMetaClass (or a sub-class) as "
             "the metaclass of all classes derived from Study")
     if isinstance(repository, basestring):
         repository = DirectoryRepository(repository, depth=None)
     if isinstance(processor, basestring):
         processor = LinearProcessor(processor)
     if environment is None:
         environment = StaticEnvironment()
     self._name = name
     self._repository = repository
     self._processor = processor.bind(self)
     self._environment = environment
     self._inputs = {}
     self._subject_ids = subject_ids
     self._visit_ids = visit_ids
     self._tree = self.repository.cached_tree(subject_ids=subject_ids,
                                              visit_ids=visit_ids,
                                              fill=fill_tree)
     if not self.subject_ids:
         raise ArcanaUsageError(
             "No subject IDs provided and destination repository "
             "is empty")
     if not self.visit_ids:
         raise ArcanaUsageError(
             "No visit IDs provided and destination repository "
             "is empty")
     self._reprocess = reprocess
     # For recording which parameters are accessed
     # during pipeline generation so they can be attributed to the
     # pipeline after it is generated (and then saved in the
     # provenance
     self._pipeline_to_generate = None
     self._referenced_parameters = None
     # Set parameters
     if parameters is None:
         parameters = {}
     elif not isinstance(parameters, dict):
         # Convert list of parameters into dictionary
         parameters = {o.name: o for o in parameters}
     self._parameters = {}
     for param_name, param in list(parameters.items()):
         if not isinstance(param, Parameter):
             param = Parameter(param_name, param)
         try:
             param_spec = self._param_specs[param_name]
         except KeyError:
             raise ArcanaNameError(
                 param_name,
                 "Provided parameter '{}' is not present in the "
                 "allowable parameters for {} classes ('{}')".format(
                     param_name,
                     type(self).__name__,
                     "', '".join(self.parameter_spec_names())))
         param_spec.check_valid(param,
                                context='{}(name={})'.format(
                                    type(self).__name__, name))
         self._parameters[param_name] = param
     # Convert inputs to a dictionary if passed in as a list/tuple
     if not isinstance(inputs, dict):
         inputs = {i.name: i for i in inputs}
     # Add each "input fileset" checking to see whether the given
     # fileset_spec name is valid for the study types
     for inpt_name, inpt in list(inputs.items()):
         try:
             spec = self.data_spec(inpt_name)
         except ArcanaNameError:
             raise ArcanaNameError(
                 inpt.name,
                 "Input name '{}' isn't in data specs of {} ('{}')".format(
                     inpt.name, self.__class__.__name__,
                     "', '".join(self._data_specs)))
         else:
             if isinstance(spec, BaseFileset):
                 if isinstance(inpt, BaseField):
                     raise ArcanaUsageError(
                         "Passed field ({}) as input to fileset spec"
                         " {}".format(inpt, spec))
                 if spec.derived:
                     try:
                         # FIXME: should provide requirement manager to
                         # converter_from but it hasn't been implemented yet
                         spec.format.converter_from(inpt.format)
                     except ArcanaNoConverterError as e:
                         raise ArcanaNoConverterError(
                             "{}, which is requried to convert:\n{} "
                             "to\n{}.".format(e, inpt, spec))
                 else:
                     if inpt.format not in spec.valid_formats:
                         raise ArcanaUsageError(
                             "Cannot pass {} as an input to {} as it is "
                             "not in one of the valid formats ('{}')".
                             format(
                                 inpt, spec,
                                 "', '".join(f.name
                                             for f in spec.valid_formats)))
             elif not isinstance(inpt, BaseField):
                 raise ArcanaUsageError(
                     "Passed fileset ({}) as input to field spec {}".format(
                         inpt, spec))
         self._inputs[inpt_name] = inpt.bind(self)
     # "Bind" data specs in the class to the current study object
     # this will allow them to prepend the study name to the name
     # of the fileset
     self._bound_specs = {}
     for spec in self.data_specs():
         if spec.name not in self.input_names:
             if not spec.derived and spec.default is None:
                 # Emit a warning if an acquired fileset has not been
                 # provided for an "acquired fileset"
                 msg = (" acquired fileset '{}' was not given as"
                        " an input of {}.".format(spec.name, self))
                 if spec.optional:
                     logger.info('Optional' + msg)
                 else:
                     if enforce_inputs:
                         raise ArcanaMissingInputError(
                             'Non-optional' + msg + " Pipelines "
                             "depending on this fileset will not "
                             "run")
             else:
                 self._bound_specs[spec.name] = spec.bind(self)
Exemple #30
0
 def __new__(metacls, name, bases, dct):  # @NoSelf @UnusedVariable
     if not any(issubclass(b, Study) for b in bases):
         raise ArcanaUsageError(
             "StudyMetaClass can only be used for classes that "
             "have Study as a base class")
     try:
         add_data_specs = dct['add_data_specs']
     except KeyError:
         add_data_specs = []
     try:
         add_param_specs = dct['add_param_specs']
     except KeyError:
         add_param_specs = []
     combined_attrs = set()
     combined_data_specs = {}
     combined_param_specs = {}
     for base in reversed(bases):
         # Get the combined class dictionary including base dicts
         # excluding auto-added properties for data and parameter specs
         combined_attrs.update(
             a for a in dir(base)
             if (not issubclass(base, Study) or a not in base.spec_names()))
         try:
             combined_data_specs.update(
                 (d.name, d) for d in base.data_specs())
         except AttributeError:
             pass
         try:
             combined_param_specs.update(
                 (p.name, p) for p in base.parameter_specs())
         except AttributeError:
             pass
     combined_attrs.update(list(dct.keys()))
     combined_data_specs.update((d.name, d) for d in add_data_specs)
     combined_param_specs.update((p.name, p) for p in add_param_specs)
     # Check that the pipeline names in data specs correspond to a
     # pipeline method in the class
     for spec in add_data_specs:
         if spec.derived:
             if spec.pipeline_name == 'pipeline':
                 raise ArcanaDesignError(
                     "Cannot use the name 'pipeline' for the name of a "
                     "pipeline constructor in class {} as it clashes "
                     "with base method to create pipelines".format(name))
             if spec.pipeline_name not in combined_attrs:
                 raise ArcanaDesignError(
                     "Pipeline to generate '{}', '{}', is not present"
                     " in '{}' class".format(spec.name, spec.pipeline_name,
                                             name))
     # Check for name clashes between data and parameter specs
     spec_name_clashes = (set(combined_data_specs)
                          & set(combined_param_specs))
     if spec_name_clashes:
         raise ArcanaDesignError(
             "'{}' name both data and parameter specs in '{}' class".format(
                 "', '".join(spec_name_clashes), name))
     reserved_clashes = [
         n for n in combined_data_specs if n in Study.ITERFIELDS
     ]
     if reserved_clashes:
         raise ArcanaDesignError(
             "'{}' data spec names clash with reserved names".format(
                 "', '".join(reserved_clashes), name))
     dct['_data_specs'] = combined_data_specs
     dct['_param_specs'] = combined_param_specs
     if '__metaclass__' not in dct:
         dct['__metaclass__'] = metacls
     return type(name, bases, dct)