def match_node(self, node, **kwargs): primary_match = self.primary.match_node(node, **kwargs) layout = self.primary.dataset.layout if self.association == 'grads': if self.type == 'bvec': path = layout.get_bvec(primary_match.path) elif self.type == 'bval': path = layout.get_bval(primary_match.path) else: raise ArcanaUsageError( "'{}' is not a valid type for '{}' associations" .format(self.type, self.association)) else: fieldmaps = layout.get_fieldmap(primary_match.path, return_list=True) try: fieldmap = next(f for f in fieldmaps if f['type'] == self.association) except StopIteration: raise ArcanaInputMissingMatchError( "No \"{}\" field-maps associated with {} (found {})" .format(self.association, primary_match, ', '.join(f['type'] for f in fieldmaps))) try: path = fieldmap[self.type] except KeyError: raise ArcanaUsageError( "'{}' is not a valid type for '{}' associations" .format(self.type, self.association)) return Fileset.from_path(path, format=self._format, dataset=self.primary.dataset, subject_id=node.subject_id, visit_id=node.visit_id)
def __init__(self, collection, frequency): self._frequency = frequency if frequency == 'per_study': # If wrapped in an iterable if not isinstance(collection, self.CollectedClass): if len(collection) > 1: raise ArcanaUsageError( "More than one {} passed to {}".format( self.CONTAINED_CLASS.__name__, type(self).__name__)) collection = list(collection) self._collection = collection elif frequency == 'per_session': self._collection = OrderedDict() for subj_id in sorted(set(c.subject_id for c in collection)): self._collection[subj_id] = OrderedDict( sorted(((c.visit_id, c) for c in collection if c.subject_id == subj_id), key=itemgetter(0))) elif frequency == 'per_subject': self._collection = OrderedDict( sorted(((c.subject_id, c) for c in collection), key=itemgetter(0))) elif frequency == 'per_visit': self._collection = OrderedDict( sorted(((c.visit_id, c) for c in collection), key=itemgetter(0))) else: assert False for datum in self: if not isinstance(datum, self.CollectedClass): raise ArcanaUsageError("Invalid class {} in {}".format( datum, self))
def __init__(self, name, extension=None, desc='', directory=False, within_dir_exts=None, aux_files=None, resource_names=None): if not name.islower(): raise ArcanaUsageError( "All data format names must be lower case ('{}')" .format(name)) if extension is None and not directory: raise ArcanaUsageError( "Extension for '{}' format can only be None if it is a " "directory".format(name)) self._name = name self._extension = extension self._desc = desc self._directory = directory if within_dir_exts is not None: if not directory: raise ArcanaUsageError( "'within_dir_exts' keyword arg is only valid " "for directory data formats, not '{}'".format(name)) within_dir_exts = frozenset(within_dir_exts) self._within_dir_exts = within_dir_exts self._converters = {} self._resource_names = (resource_names if resource_names is not None else {}) self._aux_files = aux_files if aux_files is not None else {} for sc_name, sc_ext in self.aux_files.items(): if sc_ext == self.ext: raise ArcanaUsageError( "Extension for side car '{}' cannot be the same as the " "primary file ('{}')".format(sc_name, sc_ext))
def __init__(self, name, format=None, frequency='per_session', path=None, aux_files=None, id=None, uri=None, subject_id=None, visit_id=None, dataset=None, from_analysis=None, exists=True, checksums=None, record=None, resource_name=None, potential_aux_files=None, quality=None): BaseFileset.__init__(self, name=name, format=format, frequency=frequency) BaseItemMixin.__init__(self, subject_id, visit_id, dataset, from_analysis, exists, record) if aux_files is not None: if path is None: raise ArcanaUsageError( "Side cars provided to '{}' fileset ({}) but not primary " "path".format(self.name, aux_files)) if format is None: raise ArcanaUsageError( "Side cars provided to '{}' fileset ({}) but format is " "not specified".format(self.name, aux_files)) if path is not None: path = op.abspath(op.realpath(path)) if aux_files is None: aux_files = {} elif set(aux_files.keys()) != set(self.format.aux_files.keys()): raise ArcanaUsageError( "Provided side cars for '{}' but expected '{}'".format( "', '".join(aux_files.keys()), "', '".join(self.format.aux_files.keys()))) self._path = path self._aux_files = aux_files if aux_files is not None else {} self._uri = uri self._id = id self._checksums = checksums self._resource_name = resource_name self._quality = quality if potential_aux_files is not None and format is not None: raise ArcanaUsageError( "Potential paths should only be provided to Fileset.__init__ " "({}) when the format of the fileset ({}) is not determined". format(self.name, format)) if potential_aux_files is not None: potential_aux_files = list(potential_aux_files) self._potential_aux_files = potential_aux_files
def collection(self): if self._study is None: raise ArcanaUsageError( "{} needs to be bound to a study before accessing " "the corresponding collection".format(self)) if self.default is None: raise ArcanaUsageError( "{} does not have default so cannot access its collection". format(self)) return self.default.collection
def slice(self): if self._analysis is None: raise ArcanaUsageError( "{} needs to be bound to a analysis before accessing " "the corresponding slice".format(self)) if self.default is None: raise ArcanaUsageError( "{} does not have default so cannot access its slice".format( self)) return self.default.slice
def __init__(self, min_version, max_version): if min_version.requirement != max_version.requirement: raise ArcanaUsageError( "Inconsistent requirements between min and max versions " "({} and {})".format(min_version.requirement, max_version.requirement)) self._min_ver = min_version self._max_ver = max_version if max_version < min_version: raise ArcanaUsageError( "Maxium version in is less than minimum in {}".format(self))
def __init__(self, name, default, choices=None, desc=None, fallbacks=None): super(SwitchSpec, self).__init__(name, default, desc=desc) if self.is_boolean: if choices is not None: raise ArcanaUsageError( "Choices ({}) are only valid for non-boolean " "switches ('{}')".format("', '".join(choices), name)) elif choices is None: raise ArcanaUsageError("Choices must be provided for non-boolean " "switches ('{}')".format(name)) self._choices = tuple(choices) if choices is not None else None self._desc = desc self._fallbacks = fallbacks if fallbacks is not None else {}
def __init__(self, name, value=None, dtype=None, frequency='per_session', array=None, subject_id=None, visit_id=None, dataset=None, from_analysis=None, exists=True, record=None): # Try to determine dtype and array from value if they haven't # been provided. if value is None: if dtype is None: raise ArcanaUsageError( "Either 'value' or 'dtype' must be provided to " "Field init") array = bool(array) # Convert to array is None to False else: value = parse_value(value) if isinstance(value, list): if array is False: raise ArcanaUsageError( "Array value passed to '{}', which is explicitly not " "an array ({})".format(name, value)) array = True else: if array: raise ArcanaUsageError( "Non-array value ({}) passed to '{}', which expects " "array{}".format(value, name, ('of type {}'.format(dtype) if dtype is not None else ''))) array = False if dtype is None: if array: dtype = type(value[0]) else: dtype = type(value) else: # Ensure everything is cast to the correct type if array: value = [dtype(v) for v in value] else: value = dtype(value) BaseField.__init__(self, name, dtype, frequency, array) BaseItemMixin.__init__(self, subject_id, visit_id, dataset, from_analysis, exists, record) self._value = value
def check_valid(self, switch, context=''): super(SwitchSpec, self).check_valid(switch, context=context) if self.is_boolean: if not isinstance(switch.value, bool): raise ArcanaUsageError( "Value provided to switch '{}'{} should be a " "boolean (not {})".format(self.name, context, switch.value)) elif switch.value != self.default and switch.value not in self.choices: raise ArcanaUsageError( "Value provided to switch '{}'{} ({}) is not a valid " "choice ('{}')".format( self.name, context, switch.value, "', '".join(str(c) for c in self.choices)))
def _list_outputs(self): outputs = self._outputs().get() new_files = set(os.listdir(os.getcwd())) - self.listdir_before if len(new_files) > 1: raise ArcanaUsageError( "Zip repositorys can only contain a single directory, found " "'{}'".format("', '".join(new_files))) try: unzipped = next(iter(new_files)) except StopIteration: raise ArcanaUsageError( "No files or directories found in unzipped directory") outputs['gunzipped'] = op.join(os.getcwd(), unzipped) return outputs
def _list_outputs(self): x = self.inputs.x y = self.inputs.y if isinstance(x, basestring): x = self._load_file(x) if isinstance(y, basestring): y = self._load_file(y) oper = getattr(operator, self.inputs.op) if isdefined(y): z = oper(x, y) elif isinstance(x, list): if x: if isinstance(x[0], basestring): x = [self._load_file(u) for u in x] else: raise ArcanaUsageError("Cannot provide empty list to 'x'") z = reduce(oper, x) else: raise Exception("If 'y' is not provided then x needs to be list") outputs = self.output_spec().get() if self.inputs.as_file: z_path = op.abspath(self._gen_z_fname()) with open(z_path, 'w') as f: f.write(str(z)) outputs['z'] = z_path else: outputs['z'] = z return outputs
def __init__(self, spec_name, pattern=None, valid_formats=None, frequency='per_session', id=None, order=None, dicom_tags=None, is_regex=False, from_study=None, skip_missing=False, drop_if_missing=False, fallback_to_default=False, repository=None, acceptable_quality=None, study_=None, collection_=None): BaseFileset.__init__(self, spec_name, None, frequency) BaseInputMixin.__init__(self, pattern, is_regex, order, from_study, skip_missing, drop_if_missing, fallback_to_default, repository, study_, collection_) self._dicom_tags = dicom_tags if order is not None and id is not None: raise ArcanaUsageError( "Cannot provide both 'order' and 'id' to a fileset" "match") if valid_formats is not None: try: valid_formats = tuple(valid_formats) except TypeError: valid_formats = (valid_formats,) self._valid_formats = valid_formats self._id = str(id) if id is not None else id if isinstance(acceptable_quality, basestring): acceptable_quality = (acceptable_quality,) elif acceptable_quality is not None: acceptable_quality = tuple(acceptable_quality) self._acceptable_quality = acceptable_quality
def fileset_path(self, fileset, dataset=None, fname=None): if not fileset.derived: raise ArcanaUsageError( "Can only get automatically get path to derived filesets not " "{}".format(fileset)) if dataset is None: dataset = fileset.dataset if fname is None: fname = fileset.fname if fileset.subject_id is not None: subject_id = fileset.subject_id else: subject_id = self.SUMMARY_NAME if fileset.visit_id is not None: visit_id = fileset.visit_id else: visit_id = self.SUMMARY_NAME sess_dir = op.join(dataset.name, 'derivatives', fileset.from_analysis, 'sub-{}'.format(subject_id), 'ses-{}'.format(visit_id)) # Make session dir if required if not op.exists(sess_dir): os.makedirs(sess_dir, stat.S_IRWXU | stat.S_IRWXG) return op.join(sess_dir, fname)
def coreg_brain_pipeline(self, **name_maps): """ Coregistered + brain-extracted images can be derived in 2-ways. If an explicit brain-extracted reference is provided to 'coreg_ref_brain' then that is used to coregister a brain extracted image against. Alternatively, if only a skull-included reference is provided then the registration is performed with skulls-included and then brain extraction is performed after """ if self.provided('coreg_ref_brain'): pipeline = self.linear_coreg_pipeline( name='linear_coreg_brain', input_map={ 'preproc': 'brain', 'coreg_ref': 'coreg_ref_brain' }, output_map={'coreg': 'coreg_brain'}, name_maps=name_maps) elif self.provided('coreg_ref'): pipeline = self.brain_extraction_pipeline( name='linear_coreg_brain', input_map={'preproc': 'coreg'}, output_map={'brain': 'coreg_brain'}, name_maps=name_maps) else: raise ArcanaUsageError( "Either 'coreg_ref' or 'coreg_ref_brain' needs to be provided " "in order to derive coreg_brain") return pipeline
def format(self): try: return self.default.format except AttributeError: raise ArcanaUsageError( "File format is not defined for InputFilesetSpec objects " "without a default")
def segmentation_pipeline(self, img_type=2, **name_maps): pipeline = self.new_pipeline( name='FAST_segmentation', name_maps=name_maps, inputs=[FilesetSpec('brain', nifti_gz_format)], outputs=[FilesetSpec('wm_seg', nifti_gz_format)], desc="White matter segmentation of the reference image", references=[fsl_cite]) fast = pipeline.add('fast', fsl.FAST(img_type=img_type, segments=True, out_basename='Reference_segmentation'), inputs={'in_files': ('brain', nifti_gz_format)}, requirements=[fsl_req.v('5.0.9')]), # Determine output field of split to use if img_type == 1: split_output = 'out3' elif img_type == 2: split_output = 'out2' else: raise ArcanaUsageError( "'img_type' parameter can either be 1 or 2 (not {})".format( img_type)) pipeline.add('split', Split(splits=[1, 1, 1], squeeze=True), connect={'inlist': (fast, 'tissue_class_files')}, outputs={split_output: ('wm_seg', nifti_gz_format)}) return pipeline
def __init__(self, pattern, is_regex, order, from_analysis, skip_missing=False, drop_if_missing=False, fallback_to_default=False, dataset=None, analysis_=None, slice_=None): self._pattern = pattern self._is_regex = is_regex self._order = order self._from_analysis = from_analysis self._dataset = dataset self._skip_missing = skip_missing self._drop_if_missing = drop_if_missing self._fallback_to_default = fallback_to_default if skip_missing and fallback_to_default: raise ArcanaUsageError( "Cannot provide both mutually exclusive 'skip_missing' and " "'fallback_to_default' flags to {}".format(self)) # Set when fallback_to_default is True and there are missing matches self._derivable = False self._fallback = None # analysis_ and slice_ are not intended to be provided to __init__ # except when recreating when using initkwargs self._analysis = analysis_ self._slice = slice_
def pipeline_getter(self): "For duck-typing with *Spec types" if not self.derivable: raise ArcanaUsageError( "There is no pipeline getter for {} because it doesn't " "fallback to a derived spec".format(self)) return self._fallback.pipeline_getter
def get_checksums(self, fileset): """ Downloads the MD5 digests associated with the files in the file-set. These are saved with the downloaded files in the cache and used to check if the files have been updated on the server Parameters ---------- resource : xnat.ResourceCatalog The xnat resource file_format : FileFormat The format of the fileset to get the checksums for. Used to determine the primary file within the resource and change the corresponding key in the checksums dictionary to '.' to match the way it is generated locally by Arcana. """ if fileset.uri is None: raise ArcanaUsageError( "Can't retrieve checksums as URI has not been set for {}" .format(fileset)) with self: checksums = {r['Name']: r['digest'] for r in self.login.get_json(fileset.uri + '/files')[ 'ResultSet']['Result']} if not fileset.format.directory: # Replace the key corresponding to the primary file with '.' to # match the way that checksums are created by Arcana primary = fileset.format.assort_files(checksums.keys())[0] checksums['.'] = checksums.pop(primary) return checksums
def __init__(self, name, repository=None, subject_ids=None, visit_ids=None, fill_tree=False, depth=0, subject_id_map=None, visit_id_map=None, file_formats=(), clear_cache=True): if repository is None: # needs to be imported here to avoid circular imports from .local import LocalFileSystemRepo repository = LocalFileSystemRepo() if not op.exists(name): raise ArcanaUsageError( "Base directory for LocalFileSystemRepo '{}' does not " "exist".format(name)) self._name = repository.standardise_name(name) self._repository = repository self._subject_ids = (tuple(subject_ids) if subject_ids is not None else None) self._visit_ids = tuple(visit_ids) if visit_ids is not None else None self._fill_tree = fill_tree self._depth = depth if clear_cache: self.clear_cache() self._subject_id_map = subject_id_map self._visit_id_map = visit_id_map self._inv_subject_id_map = {} self._inv_visit_id_map = {} self._file_formats = file_formats self._cached_tree = None
def __init__(self, name, collection, frequency=None, dtype=None, array=None): collection = list(collection) if collection: implicit_frequency = self._common_attr(collection, 'frequency') if frequency is None: frequency = implicit_frequency elif frequency != implicit_frequency: raise ArcanaUsageError( "Implicit frequency '{}' does not match explicit " "frequency '{}' for '{}' FieldCollection".format( implicit_frequency, frequency, name)) implicit_dtype = self._common_attr(collection, 'dtype') if dtype is None: dtype = implicit_dtype elif dtype != implicit_dtype: raise ArcanaUsageError( "Implicit dtype '{}' does not match explicit " "dtype '{}' for '{}' FieldCollection".format( implicit_dtype, dtype, name)) implicit_array = self._common_attr(collection, 'array') if array is None: array = implicit_array elif array != implicit_array: raise ArcanaUsageError( "Implicit array '{}' does not match explicit " "array '{}' for '{}' FieldCollection".format( implicit_array, array, name)) if frequency is None: raise ArcanaUsageError( "Need to provide explicit frequency for empty " "FieldCollection") if dtype is None: raise ArcanaUsageError("Need to provide explicit dtype for empty " "FieldCollection") BaseField.__init__(self, name, dtype=dtype, frequency=frequency, array=array) BaseCollectionMixin.__init__(self, collection, frequency)
def from_path(cls, path, **kwargs): if not op.exists(path): raise ArcanaUsageError( "Attempting to read Fileset from path '{}' but it " "does not exist".format(path)) if op.isdir(path): name = op.basename(path) else: name = split_extension(op.basename(path))[0] return cls(name, path=path, **kwargs)
def coreg_matrix_pipeline(self, **name_maps): if self.provided('coreg_ref_brain'): pipeline = self.coreg_brain_pipeline(**name_maps) elif self.provided('coreg_ref'): pipeline = self.linear_coreg_pipeline(**name_maps) else: raise ArcanaUsageError( "'coreg_matrix' can only be derived if 'coreg_ref' or " "'coreg_ref_brain' is provided to {}".format(self)) return pipeline
def __init__(self, name, slce, format=None, frequency=None, candidate_formats=None): if format is None and candidate_formats is None: formats = set(d.format for d in slce) if len(formats) > 1: raise ArcanaUsageError( "Either 'format' or candidate_formats needs to be supplied" " during the initialisation of a FilesetSlice ('{}') with " "heterogeneous formats".format(name)) format = next(iter(formats)) slce = list(slce) if not slce: if format is None: format = candidate_formats[0] if frequency is None: raise ArcanaUsageError( "Need to provide explicit frequency for empty " "FilesetSlice") else: implicit_frequency = self._common_attr(slce, 'frequency') if frequency is None: frequency = implicit_frequency elif frequency != implicit_frequency: raise ArcanaUsageError( "Implicit frequency '{}' does not match explicit " "frequency '{}' for '{}' FilesetSlice".format( implicit_frequency, frequency, name)) formatted_slice = [] for fileset in slce: fileset = copy(fileset) if fileset.exists and fileset.format is None: fileset.format = (fileset.detect_format(candidate_formats) if format is None else format) formatted_slice.append(fileset) slce = formatted_slice format = self._common_attr(slce, 'format') BaseFileset.__init__(self, name, format, frequency=frequency) BaseSliceMixin.__init__(self, slce, frequency)
def _gen_prov_path_regex(self, path): if isinstance(path, basestring): if path.startswith('/'): path = path[1:] regex = re.compile(r"root\['{}'\].*" .format(r"'\]\['".join(path.split('/')))) elif not isinstance(path, re.Pattern): raise ArcanaUsageError( "Provenance in/exclude paths can either be path strings or " "regexes, not '{}'".format(path)) return regex
def path_depth(cls, root_dir, dpath): relpath = op.relpath(dpath, root_dir) if '..' in relpath: raise ArcanaUsageError( "Path '{}' is not a sub-directory of '{}'".format( dpath, root_dir)) elif relpath == '.': depth = 0 else: depth = relpath.count(op.sep) + 1 return depth
def dataset(self): if self._dataset is None: if self._analysis is None: raise ArcanaUsageError( "Cannot access dataset of {} as it wasn't explicitly " "provided and Input hasn't been bound to a analysis". format(self)) repo = self._analysis.dataset else: repo = self._dataset return repo
def repository(self): if self._repository is None: if self._study is None: raise ArcanaUsageError( "Cannot access repository of {} as it wasn't explicitly " "provided and Input hasn't been bound to a study" .format(self)) repo = self._study.repository else: repo = self._repository return repo
def _common_attr(self, collection, attr_name, ignore_none=True): attr_set = set(getattr(c, attr_name) for c in collection) if ignore_none: attr_set -= set([None]) if len(attr_set) > 1: raise ArcanaUsageError( "Heterogeneous attributes for '{}' within {}".format( attr_name, self)) try: return next(iter(attr_set)) except StopIteration: return None