Beispiel #1
0
 def match_node(self, node, **kwargs):
     primary_match = self.primary.match_node(node, **kwargs)
     layout = self.primary.dataset.layout
     if self.association == 'grads':
         if self.type == 'bvec':
             path = layout.get_bvec(primary_match.path)
         elif self.type == 'bval':
             path = layout.get_bval(primary_match.path)
         else:
             raise ArcanaUsageError(
                 "'{}' is not a valid type for '{}' associations"
                 .format(self.type, self.association))
     else:
         fieldmaps = layout.get_fieldmap(primary_match.path,
                                         return_list=True)
         try:
             fieldmap = next(f for f in fieldmaps
                             if f['type'] == self.association)
         except StopIteration:
             raise ArcanaInputMissingMatchError(
                 "No \"{}\" field-maps associated with {} (found {})"
                 .format(self.association, primary_match,
                         ', '.join(f['type'] for f in fieldmaps)))
         try:
             path = fieldmap[self.type]
         except KeyError:
             raise ArcanaUsageError(
                 "'{}' is not a valid type for '{}' associations"
                 .format(self.type, self.association))
     return Fileset.from_path(path, format=self._format,
                              dataset=self.primary.dataset,
                              subject_id=node.subject_id,
                              visit_id=node.visit_id)
Beispiel #2
0
 def __init__(self, collection, frequency):
     self._frequency = frequency
     if frequency == 'per_study':
         # If wrapped in an iterable
         if not isinstance(collection, self.CollectedClass):
             if len(collection) > 1:
                 raise ArcanaUsageError(
                     "More than one {} passed to {}".format(
                         self.CONTAINED_CLASS.__name__,
                         type(self).__name__))
             collection = list(collection)
         self._collection = collection
     elif frequency == 'per_session':
         self._collection = OrderedDict()
         for subj_id in sorted(set(c.subject_id for c in collection)):
             self._collection[subj_id] = OrderedDict(
                 sorted(((c.visit_id, c)
                         for c in collection if c.subject_id == subj_id),
                        key=itemgetter(0)))
     elif frequency == 'per_subject':
         self._collection = OrderedDict(
             sorted(((c.subject_id, c) for c in collection),
                    key=itemgetter(0)))
     elif frequency == 'per_visit':
         self._collection = OrderedDict(
             sorted(((c.visit_id, c) for c in collection),
                    key=itemgetter(0)))
     else:
         assert False
     for datum in self:
         if not isinstance(datum, self.CollectedClass):
             raise ArcanaUsageError("Invalid class {} in {}".format(
                 datum, self))
Beispiel #3
0
 def __init__(self, name, extension=None, desc='',
              directory=False, within_dir_exts=None,
              aux_files=None, resource_names=None):
     if not name.islower():
         raise ArcanaUsageError(
             "All data format names must be lower case ('{}')"
             .format(name))
     if extension is None and not directory:
         raise ArcanaUsageError(
             "Extension for '{}' format can only be None if it is a "
             "directory".format(name))
     self._name = name
     self._extension = extension
     self._desc = desc
     self._directory = directory
     if within_dir_exts is not None:
         if not directory:
             raise ArcanaUsageError(
                 "'within_dir_exts' keyword arg is only valid "
                 "for directory data formats, not '{}'".format(name))
         within_dir_exts = frozenset(within_dir_exts)
     self._within_dir_exts = within_dir_exts
     self._converters = {}
     self._resource_names = (resource_names
                             if resource_names is not None else {})
     self._aux_files = aux_files if aux_files is not None else {}
     for sc_name, sc_ext in self.aux_files.items():
         if sc_ext == self.ext:
             raise ArcanaUsageError(
                 "Extension for side car '{}' cannot be the same as the "
                 "primary file ('{}')".format(sc_name, sc_ext))
Beispiel #4
0
 def __init__(self,
              name,
              format=None,
              frequency='per_session',
              path=None,
              aux_files=None,
              id=None,
              uri=None,
              subject_id=None,
              visit_id=None,
              dataset=None,
              from_analysis=None,
              exists=True,
              checksums=None,
              record=None,
              resource_name=None,
              potential_aux_files=None,
              quality=None):
     BaseFileset.__init__(self,
                          name=name,
                          format=format,
                          frequency=frequency)
     BaseItemMixin.__init__(self, subject_id, visit_id, dataset,
                            from_analysis, exists, record)
     if aux_files is not None:
         if path is None:
             raise ArcanaUsageError(
                 "Side cars provided to '{}' fileset ({}) but not primary "
                 "path".format(self.name, aux_files))
         if format is None:
             raise ArcanaUsageError(
                 "Side cars provided to '{}' fileset ({}) but format is "
                 "not specified".format(self.name, aux_files))
     if path is not None:
         path = op.abspath(op.realpath(path))
         if aux_files is None:
             aux_files = {}
         elif set(aux_files.keys()) != set(self.format.aux_files.keys()):
             raise ArcanaUsageError(
                 "Provided side cars for '{}' but expected '{}'".format(
                     "', '".join(aux_files.keys()),
                     "', '".join(self.format.aux_files.keys())))
     self._path = path
     self._aux_files = aux_files if aux_files is not None else {}
     self._uri = uri
     self._id = id
     self._checksums = checksums
     self._resource_name = resource_name
     self._quality = quality
     if potential_aux_files is not None and format is not None:
         raise ArcanaUsageError(
             "Potential paths should only be provided to Fileset.__init__ "
             "({}) when the format of the fileset ({}) is not determined".
             format(self.name, format))
     if potential_aux_files is not None:
         potential_aux_files = list(potential_aux_files)
     self._potential_aux_files = potential_aux_files
Beispiel #5
0
 def collection(self):
     if self._study is None:
         raise ArcanaUsageError(
             "{} needs to be bound to a study before accessing "
             "the corresponding collection".format(self))
     if self.default is None:
         raise ArcanaUsageError(
             "{} does not have default so cannot access its collection".
             format(self))
     return self.default.collection
Beispiel #6
0
 def slice(self):
     if self._analysis is None:
         raise ArcanaUsageError(
             "{} needs to be bound to a analysis before accessing "
             "the corresponding slice".format(self))
     if self.default is None:
         raise ArcanaUsageError(
             "{} does not have default so cannot access its slice".format(
                 self))
     return self.default.slice
Beispiel #7
0
 def __init__(self, min_version, max_version):
     if min_version.requirement != max_version.requirement:
         raise ArcanaUsageError(
             "Inconsistent requirements between min and max versions "
             "({} and {})".format(min_version.requirement,
                                  max_version.requirement))
     self._min_ver = min_version
     self._max_ver = max_version
     if max_version < min_version:
         raise ArcanaUsageError(
             "Maxium version in is less than minimum in {}".format(self))
Beispiel #8
0
 def __init__(self, name, default, choices=None, desc=None, fallbacks=None):
     super(SwitchSpec, self).__init__(name, default, desc=desc)
     if self.is_boolean:
         if choices is not None:
             raise ArcanaUsageError(
                 "Choices ({}) are only valid for non-boolean "
                 "switches ('{}')".format("', '".join(choices), name))
     elif choices is None:
         raise ArcanaUsageError("Choices must be provided for non-boolean "
                                "switches ('{}')".format(name))
     self._choices = tuple(choices) if choices is not None else None
     self._desc = desc
     self._fallbacks = fallbacks if fallbacks is not None else {}
Beispiel #9
0
 def __init__(self,
              name,
              value=None,
              dtype=None,
              frequency='per_session',
              array=None,
              subject_id=None,
              visit_id=None,
              dataset=None,
              from_analysis=None,
              exists=True,
              record=None):
     # Try to determine dtype and array from value if they haven't
     # been provided.
     if value is None:
         if dtype is None:
             raise ArcanaUsageError(
                 "Either 'value' or 'dtype' must be provided to "
                 "Field init")
         array = bool(array)  # Convert to array is None to False
     else:
         value = parse_value(value)
         if isinstance(value, list):
             if array is False:
                 raise ArcanaUsageError(
                     "Array value passed to '{}', which is explicitly not "
                     "an array ({})".format(name, value))
             array = True
         else:
             if array:
                 raise ArcanaUsageError(
                     "Non-array value ({}) passed to '{}', which expects "
                     "array{}".format(value, name,
                                      ('of type {}'.format(dtype)
                                       if dtype is not None else '')))
             array = False
         if dtype is None:
             if array:
                 dtype = type(value[0])
             else:
                 dtype = type(value)
         else:
             # Ensure everything is cast to the correct type
             if array:
                 value = [dtype(v) for v in value]
             else:
                 value = dtype(value)
     BaseField.__init__(self, name, dtype, frequency, array)
     BaseItemMixin.__init__(self, subject_id, visit_id, dataset,
                            from_analysis, exists, record)
     self._value = value
Beispiel #10
0
 def check_valid(self, switch, context=''):
     super(SwitchSpec, self).check_valid(switch, context=context)
     if self.is_boolean:
         if not isinstance(switch.value, bool):
             raise ArcanaUsageError(
                 "Value provided to switch '{}'{} should be a "
                 "boolean (not {})".format(self.name, context,
                                           switch.value))
     elif switch.value != self.default and switch.value not in self.choices:
         raise ArcanaUsageError(
             "Value provided to switch '{}'{} ({}) is not a valid "
             "choice ('{}')".format(
                 self.name, context, switch.value,
                 "', '".join(str(c) for c in self.choices)))
Beispiel #11
0
 def _list_outputs(self):
     outputs = self._outputs().get()
     new_files = set(os.listdir(os.getcwd())) - self.listdir_before
     if len(new_files) > 1:
         raise ArcanaUsageError(
             "Zip repositorys can only contain a single directory, found "
             "'{}'".format("', '".join(new_files)))
     try:
         unzipped = next(iter(new_files))
     except StopIteration:
         raise ArcanaUsageError(
             "No files or directories found in unzipped directory")
     outputs['gunzipped'] = op.join(os.getcwd(), unzipped)
     return outputs
Beispiel #12
0
 def _list_outputs(self):
     x = self.inputs.x
     y = self.inputs.y
     if isinstance(x, basestring):
         x = self._load_file(x)
     if isinstance(y, basestring):
         y = self._load_file(y)
     oper = getattr(operator, self.inputs.op)
     if isdefined(y):
         z = oper(x, y)
     elif isinstance(x, list):
         if x:
             if isinstance(x[0], basestring):
                 x = [self._load_file(u) for u in x]
         else:
             raise ArcanaUsageError("Cannot provide empty list to 'x'")
         z = reduce(oper, x)
     else:
         raise Exception("If 'y' is not provided then x needs to be list")
     outputs = self.output_spec().get()
     if self.inputs.as_file:
         z_path = op.abspath(self._gen_z_fname())
         with open(z_path, 'w') as f:
             f.write(str(z))
         outputs['z'] = z_path
     else:
         outputs['z'] = z
     return outputs
Beispiel #13
0
 def __init__(self, spec_name, pattern=None, valid_formats=None,
              frequency='per_session', id=None,
              order=None, dicom_tags=None, is_regex=False, from_study=None,
              skip_missing=False, drop_if_missing=False,
              fallback_to_default=False, repository=None,
              acceptable_quality=None,
              study_=None, collection_=None):
     BaseFileset.__init__(self, spec_name, None, frequency)
     BaseInputMixin.__init__(self, pattern, is_regex, order,
                             from_study, skip_missing, drop_if_missing,
                             fallback_to_default, repository, study_,
                             collection_)
     self._dicom_tags = dicom_tags
     if order is not None and id is not None:
         raise ArcanaUsageError(
             "Cannot provide both 'order' and 'id' to a fileset"
             "match")
     if valid_formats is not None:
         try:
             valid_formats = tuple(valid_formats)
         except TypeError:
             valid_formats = (valid_formats,)
     self._valid_formats = valid_formats
     self._id = str(id) if id is not None else id
     if isinstance(acceptable_quality, basestring):
         acceptable_quality = (acceptable_quality,)
     elif acceptable_quality is not None:
         acceptable_quality = tuple(acceptable_quality)
     self._acceptable_quality = acceptable_quality
Beispiel #14
0
 def fileset_path(self, fileset, dataset=None, fname=None):
     if not fileset.derived:
         raise ArcanaUsageError(
             "Can only get automatically get path to derived filesets not "
             "{}".format(fileset))
     if dataset is None:
         dataset = fileset.dataset
     if fname is None:
         fname = fileset.fname
     if fileset.subject_id is not None:
         subject_id = fileset.subject_id
     else:
         subject_id = self.SUMMARY_NAME
     if fileset.visit_id is not None:
         visit_id = fileset.visit_id
     else:
         visit_id = self.SUMMARY_NAME
     sess_dir = op.join(dataset.name,
                        'derivatives',
                        fileset.from_analysis,
                        'sub-{}'.format(subject_id),
                        'ses-{}'.format(visit_id))
     # Make session dir if required
     if not op.exists(sess_dir):
         os.makedirs(sess_dir, stat.S_IRWXU | stat.S_IRWXG)
     return op.join(sess_dir, fname)
Beispiel #15
0
 def coreg_brain_pipeline(self, **name_maps):
     """
     Coregistered + brain-extracted images can be derived in 2-ways. If an
     explicit brain-extracted reference is provided to
     'coreg_ref_brain' then that is used to coregister a brain extracted
     image against. Alternatively, if only a skull-included reference is
     provided then the registration is performed with skulls-included and
     then brain extraction is performed after
     """
     if self.provided('coreg_ref_brain'):
         pipeline = self.linear_coreg_pipeline(
             name='linear_coreg_brain',
             input_map={
                 'preproc': 'brain',
                 'coreg_ref': 'coreg_ref_brain'
             },
             output_map={'coreg': 'coreg_brain'},
             name_maps=name_maps)
     elif self.provided('coreg_ref'):
         pipeline = self.brain_extraction_pipeline(
             name='linear_coreg_brain',
             input_map={'preproc': 'coreg'},
             output_map={'brain': 'coreg_brain'},
             name_maps=name_maps)
     else:
         raise ArcanaUsageError(
             "Either 'coreg_ref' or 'coreg_ref_brain' needs to be provided "
             "in order to derive coreg_brain")
     return pipeline
Beispiel #16
0
 def format(self):
     try:
         return self.default.format
     except AttributeError:
         raise ArcanaUsageError(
             "File format is not defined for InputFilesetSpec objects "
             "without a default")
Beispiel #17
0
    def segmentation_pipeline(self, img_type=2, **name_maps):
        pipeline = self.new_pipeline(
            name='FAST_segmentation',
            name_maps=name_maps,
            inputs=[FilesetSpec('brain', nifti_gz_format)],
            outputs=[FilesetSpec('wm_seg', nifti_gz_format)],
            desc="White matter segmentation of the reference image",
            references=[fsl_cite])

        fast = pipeline.add('fast',
                            fsl.FAST(img_type=img_type,
                                     segments=True,
                                     out_basename='Reference_segmentation'),
                            inputs={'in_files': ('brain', nifti_gz_format)},
                            requirements=[fsl_req.v('5.0.9')]),

        # Determine output field of split to use
        if img_type == 1:
            split_output = 'out3'
        elif img_type == 2:
            split_output = 'out2'
        else:
            raise ArcanaUsageError(
                "'img_type' parameter can either be 1 or 2 (not {})".format(
                    img_type))

        pipeline.add('split',
                     Split(splits=[1, 1, 1], squeeze=True),
                     connect={'inlist': (fast, 'tissue_class_files')},
                     outputs={split_output: ('wm_seg', nifti_gz_format)})

        return pipeline
Beispiel #18
0
 def __init__(self,
              pattern,
              is_regex,
              order,
              from_analysis,
              skip_missing=False,
              drop_if_missing=False,
              fallback_to_default=False,
              dataset=None,
              analysis_=None,
              slice_=None):
     self._pattern = pattern
     self._is_regex = is_regex
     self._order = order
     self._from_analysis = from_analysis
     self._dataset = dataset
     self._skip_missing = skip_missing
     self._drop_if_missing = drop_if_missing
     self._fallback_to_default = fallback_to_default
     if skip_missing and fallback_to_default:
         raise ArcanaUsageError(
             "Cannot provide both mutually exclusive 'skip_missing' and "
             "'fallback_to_default' flags to {}".format(self))
     # Set when fallback_to_default is True and there are missing matches
     self._derivable = False
     self._fallback = None
     # analysis_ and slice_ are not intended to be provided to __init__
     # except when recreating when using initkwargs
     self._analysis = analysis_
     self._slice = slice_
Beispiel #19
0
 def pipeline_getter(self):
     "For duck-typing with *Spec types"
     if not self.derivable:
         raise ArcanaUsageError(
             "There is no pipeline getter for {} because it doesn't "
             "fallback to a derived spec".format(self))
     return self._fallback.pipeline_getter
Beispiel #20
0
    def get_checksums(self, fileset):
        """
        Downloads the MD5 digests associated with the files in the file-set.
        These are saved with the downloaded files in the cache and used to
        check if the files have been updated on the server

        Parameters
        ----------
        resource : xnat.ResourceCatalog
            The xnat resource
        file_format : FileFormat
            The format of the fileset to get the checksums for. Used to
            determine the primary file within the resource and change the
            corresponding key in the checksums dictionary to '.' to match
            the way it is generated locally by Arcana.
        """
        if fileset.uri is None:
            raise ArcanaUsageError(
                "Can't retrieve checksums as URI has not been set for {}"
                .format(fileset))
        with self:
            checksums = {r['Name']: r['digest']
                         for r in self.login.get_json(fileset.uri + '/files')[
                             'ResultSet']['Result']}
        if not fileset.format.directory:
            # Replace the key corresponding to the primary file with '.' to
            # match the way that checksums are created by Arcana
            primary = fileset.format.assort_files(checksums.keys())[0]
            checksums['.'] = checksums.pop(primary)
        return checksums
Beispiel #21
0
    def __init__(self,
                 name,
                 repository=None,
                 subject_ids=None,
                 visit_ids=None,
                 fill_tree=False,
                 depth=0,
                 subject_id_map=None,
                 visit_id_map=None,
                 file_formats=(),
                 clear_cache=True):
        if repository is None:
            # needs to be imported here to avoid circular imports
            from .local import LocalFileSystemRepo
            repository = LocalFileSystemRepo()
            if not op.exists(name):
                raise ArcanaUsageError(
                    "Base directory for LocalFileSystemRepo '{}' does not "
                    "exist".format(name))
        self._name = repository.standardise_name(name)
        self._repository = repository
        self._subject_ids = (tuple(subject_ids)
                             if subject_ids is not None else None)
        self._visit_ids = tuple(visit_ids) if visit_ids is not None else None
        self._fill_tree = fill_tree
        self._depth = depth
        if clear_cache:
            self.clear_cache()

        self._subject_id_map = subject_id_map
        self._visit_id_map = visit_id_map
        self._inv_subject_id_map = {}
        self._inv_visit_id_map = {}
        self._file_formats = file_formats
        self._cached_tree = None
Beispiel #22
0
 def __init__(self,
              name,
              collection,
              frequency=None,
              dtype=None,
              array=None):
     collection = list(collection)
     if collection:
         implicit_frequency = self._common_attr(collection, 'frequency')
         if frequency is None:
             frequency = implicit_frequency
         elif frequency != implicit_frequency:
             raise ArcanaUsageError(
                 "Implicit frequency '{}' does not match explicit "
                 "frequency '{}' for '{}' FieldCollection".format(
                     implicit_frequency, frequency, name))
         implicit_dtype = self._common_attr(collection, 'dtype')
         if dtype is None:
             dtype = implicit_dtype
         elif dtype != implicit_dtype:
             raise ArcanaUsageError(
                 "Implicit dtype '{}' does not match explicit "
                 "dtype '{}' for '{}' FieldCollection".format(
                     implicit_dtype, dtype, name))
         implicit_array = self._common_attr(collection, 'array')
         if array is None:
             array = implicit_array
         elif array != implicit_array:
             raise ArcanaUsageError(
                 "Implicit array '{}' does not match explicit "
                 "array '{}' for '{}' FieldCollection".format(
                     implicit_array, array, name))
     if frequency is None:
         raise ArcanaUsageError(
             "Need to provide explicit frequency for empty "
             "FieldCollection")
     if dtype is None:
         raise ArcanaUsageError("Need to provide explicit dtype for empty "
                                "FieldCollection")
     BaseField.__init__(self,
                        name,
                        dtype=dtype,
                        frequency=frequency,
                        array=array)
     BaseCollectionMixin.__init__(self, collection, frequency)
Beispiel #23
0
 def from_path(cls, path, **kwargs):
     if not op.exists(path):
         raise ArcanaUsageError(
             "Attempting to read Fileset from path '{}' but it "
             "does not exist".format(path))
     if op.isdir(path):
         name = op.basename(path)
     else:
         name = split_extension(op.basename(path))[0]
     return cls(name, path=path, **kwargs)
Beispiel #24
0
 def coreg_matrix_pipeline(self, **name_maps):
     if self.provided('coreg_ref_brain'):
         pipeline = self.coreg_brain_pipeline(**name_maps)
     elif self.provided('coreg_ref'):
         pipeline = self.linear_coreg_pipeline(**name_maps)
     else:
         raise ArcanaUsageError(
             "'coreg_matrix' can only be derived if 'coreg_ref' or "
             "'coreg_ref_brain' is provided to {}".format(self))
     return pipeline
Beispiel #25
0
 def __init__(self,
              name,
              slce,
              format=None,
              frequency=None,
              candidate_formats=None):
     if format is None and candidate_formats is None:
         formats = set(d.format for d in slce)
         if len(formats) > 1:
             raise ArcanaUsageError(
                 "Either 'format' or candidate_formats needs to be supplied"
                 " during the initialisation of a FilesetSlice ('{}') with "
                 "heterogeneous formats".format(name))
         format = next(iter(formats))
     slce = list(slce)
     if not slce:
         if format is None:
             format = candidate_formats[0]
         if frequency is None:
             raise ArcanaUsageError(
                 "Need to provide explicit frequency for empty "
                 "FilesetSlice")
     else:
         implicit_frequency = self._common_attr(slce, 'frequency')
         if frequency is None:
             frequency = implicit_frequency
         elif frequency != implicit_frequency:
             raise ArcanaUsageError(
                 "Implicit frequency '{}' does not match explicit "
                 "frequency '{}' for '{}' FilesetSlice".format(
                     implicit_frequency, frequency, name))
         formatted_slice = []
         for fileset in slce:
             fileset = copy(fileset)
             if fileset.exists and fileset.format is None:
                 fileset.format = (fileset.detect_format(candidate_formats)
                                   if format is None else format)
             formatted_slice.append(fileset)
         slce = formatted_slice
         format = self._common_attr(slce, 'format')
     BaseFileset.__init__(self, name, format, frequency=frequency)
     BaseSliceMixin.__init__(self, slce, frequency)
Beispiel #26
0
 def _gen_prov_path_regex(self, path):
     if isinstance(path, basestring):
         if path.startswith('/'):
             path = path[1:]
         regex = re.compile(r"root\['{}'\].*"
                            .format(r"'\]\['".join(path.split('/'))))
     elif not isinstance(path, re.Pattern):
         raise ArcanaUsageError(
             "Provenance in/exclude paths can either be path strings or "
             "regexes, not '{}'".format(path))
     return regex
Beispiel #27
0
 def path_depth(cls, root_dir, dpath):
     relpath = op.relpath(dpath, root_dir)
     if '..' in relpath:
         raise ArcanaUsageError(
             "Path '{}' is not a sub-directory of '{}'".format(
                 dpath, root_dir))
     elif relpath == '.':
         depth = 0
     else:
         depth = relpath.count(op.sep) + 1
     return depth
Beispiel #28
0
 def dataset(self):
     if self._dataset is None:
         if self._analysis is None:
             raise ArcanaUsageError(
                 "Cannot access dataset of {} as it wasn't explicitly "
                 "provided and Input hasn't been bound to a analysis".
                 format(self))
         repo = self._analysis.dataset
     else:
         repo = self._dataset
     return repo
Beispiel #29
0
 def repository(self):
     if self._repository is None:
         if self._study is None:
             raise ArcanaUsageError(
                 "Cannot access repository of {} as it wasn't explicitly "
                 "provided and Input hasn't been bound to a study"
                 .format(self))
         repo = self._study.repository
     else:
         repo = self._repository
     return repo
Beispiel #30
0
 def _common_attr(self, collection, attr_name, ignore_none=True):
     attr_set = set(getattr(c, attr_name) for c in collection)
     if ignore_none:
         attr_set -= set([None])
     if len(attr_set) > 1:
         raise ArcanaUsageError(
             "Heterogeneous attributes for '{}' within {}".format(
                 attr_name, self))
     try:
         return next(iter(attr_set))
     except StopIteration:
         return None