def _run_module_cmd(cls, *args): if 'MODULESHOME' in os.environ: try: modulecmd = sp.check_output('which modulecmd', shell=True).strip() except sp.CalledProcessError: modulecmd = False if not modulecmd: modulecmd = '{}/bin/modulecmd'.format( os.environ['MODULESHOME']) if not os.path.exists(modulecmd): raise ArcanaError( "Cannot find 'modulecmd' on path or in MODULESHOME.") logger.debug("Running modules command '{}'".format(' '.join(args))) try: output, error = sp.Popen( [modulecmd, 'python'] + list(args), stdout=sp.PIPE, stderr=sp.PIPE).communicate() except (sp.CalledProcessError, OSError) as e: raise ArcanaError( "Call to subprocess `{}` threw an error: {}".format( ' '.join([modulecmd, 'python'] + list(args)), e)) exec(output) if PY3: error = error.decode('utf-8') return error else: raise ArcanaModulesNotInstalledException('MODULESHOME')
def _run_interface(self, runtime): loaded_modules = ModulesEnvironment.preloaded() if first_req.name not in loaded_modules: raise ArcanaError("Mrtrix module was not loaded in Node") if second_req.name not in loaded_modules: raise ArcanaError("Dcm2niix module was not loaded in Node") return runtime
def assertField(self, name, ref_value, from_study, subject=None, visit=None, frequency='per_session', to_places=None): esc_name = from_study + '_' + name output_dir = self.get_session_dir(subject, visit, frequency) try: with open(op.join(output_dir, DirectoryRepository.FIELDS_FNAME)) as f: fields = json.load(f, 'rb') except IOError as e: if e.errno == errno.ENOENT: raise ArcanaError( "No fields were created by pipeline in study '{}'" .format(from_study)) try: value = fields[esc_name] except KeyError: raise ArcanaError( "Field '{}' was not created by pipeline in study '{}'. " "Created fields were ('{}')" .format(esc_name, from_study, "', '".join(fields))) msg = ("Field value '{}' for study '{}', {}, does not match " "reference value ({})".format(name, from_study, value, ref_value)) if to_places is not None: self.assertAlmostEqual( value, ref_value, to_places, '{} to {} decimal places'.format(msg, to_places)) else: self.assertEqual(value, ref_value, msg)
def __init__(self, work_dir, partition=None, account=None, email=None, mail_on=('FAIL', ), generic_resources=None, ntasks_per_node=None, cpus_per_task=None, **kwargs): if email is None: try: email = os.environ['EMAIL'] except KeyError: raise ArcanaError( "'email' kwarg needs to be provided for SlurmProcessor" " if 'EMAIL' environment variable not set") self._email = email self._mail_on = mail_on self._account = account self._partition = partition self._ntasks_per_node = ntasks_per_node self._cpus_per_task = cpus_per_task self._generic_resources = generic_resources super(SlurmProcessor, self).__init__(work_dir, **kwargs)
def add_session(self, filesets=None, fields=None, project_dir=None, subject=None, visit=None): if project_dir is None: project_dir = self.project_dir if filesets is None: filesets = {} if subject is None: subject = self.SUBJECT if visit is None: visit = self.VISIT session_dir = op.join(project_dir, subject, visit) os.makedirs(session_dir) for name, fileset in list(filesets.items()): if isinstance(fileset, Fileset): dst_path = op.join(session_dir, name + fileset.format.ext_str) if fileset.format.directory: shutil.copytree(fileset.path, dst_path) else: shutil.copy(fileset.path, dst_path) elif isinstance(fileset, basestring): # Write string as text file with open(op.join(session_dir, name + '.txt'), 'w') as f: f.write(fileset) else: raise ArcanaError( "Unrecognised fileset ({}) in {} test setup. Can " "be either a Fileset or basestring object" .format(fileset, self)) if fields is not None: with open(op.join(session_dir, DirectoryRepository.FIELDS_FNAME), 'w', **JSON_ENCODING) as f: json.dump(fields, f)
def _run_interface(self, runtime): dirname = self.out_dir os.makedirs(dirname) ext = self.inputs.extension if isdefined(self.inputs.file_names): if len(self.inputs.file_names) != len(self.inputs.in_files): raise ArcanaError( "Number of provided filenames ({}) does not match number " "of provided files ({})".format( len(self.inputs.file_names), len(self.inputs.in_files))) for i, f in enumerate(self.inputs.in_files): if isdefined(self.inputs.file_names): path = op.join(self.out_dir, op.basename(self.inputs.file_names[i])) if op.isdir(f): shutil.copytree(f, path) else: shutil.copy(f, path) else: # Not quite sure what is going on here, probably should # ask Francesco what this logic is for if op.isdir(f): out_name = f.split('/')[-1] if ext: out_name = '{0}_{1}'.format(out_name, ext + str(i).zfill(3)) shutil.copytree(f, dirname + '/{}'.format(out_name)) elif op.isfile(f): if ext == '.dcm': fname = op.join(dirname, str(i).zfill(4)) + ext else: fname = dirname shutil.copy(f, fname) return runtime
def path(self): if self._path is None: if self.repository is not None: self._path = self.repository.get_fileset(self) else: raise ArcanaError( "Neither path nor repository has been set for Fileset(" "'{}')".format(self.name)) return self._path
def value(self): if self._value is None: if self.repository is not None: self._value = self.repository.get_field(self) else: raise ArcanaError( "Neither value nor repository has been set for Field(" "'{}')".format(self.name)) return self._value
def pipeline(self): if self.pipeline_name is None: raise ArcanaUsageError( "{} is an acquired data spec so doesn't have a pipeline". format(self)) try: getter = getattr(self.study, self.pipeline_name) except AttributeError: raise ArcanaError( "There is no pipeline method named '{}' in present in " "'{}' study".format(self.pipeline_name, self.study)) # Set up study to record which parameters # referenced during the pipeline generation self.study._pipeline_to_generate = self.pipeline_name self.study._referenced_parameters = set() try: pipeline = getter() if pipeline is None: raise ArcanaDesignError( "'{}' pipeline constructor in {} is missing return " "statement (should return a Pipeline object)".format( self.pipeline_name, self.study)) # Copy referenced parameters to pipeline pipeline._referenced_parameters = ( self.study._referenced_parameters) except AttributeError as e: # Need to capture this as exception to avoid it getting # confused with specs that don't have pipelines raise ArcanaError("AttributeError was thrown attempting to " "construct '{}': {}".format( self.pipeline_name, e)) finally: # Reset referenced parameters after generating pipeline self.study._pipeline_to_generate = None self.study._referenced_parameters = None if self.name not in pipeline.output_names: raise ArcanaOutputNotProducedException( "'{}' is not produced by {} pipeline in {} class given the " "provided switches ({}) and the missing inputs ('{}')".format( self.name, pipeline.name, self.study.__class__.__name__, ', '.join('{}={}'.format(s.name, s.value) for s in self.study.switches), "', '".join(self.study.missing_inputs))) return pipeline
def __init__(self, root_dir, depth=None): super(DirectoryRepository, self).__init__() if not op.exists(root_dir): raise ArcanaError( "Base directory for DirectoryRepository '{}' does not exist". format(root_dir)) self._root_dir = op.abspath(root_dir) if depth is None: depth = self.guess_depth(root_dir) self._depth = depth
def get_atlas_path(name, dataset='brain', resolution='1mm'): """ Returns the path to the atlas (or atlas mask) in the arcana repository Parameters ---------- name : str Name of the Atlas, can be one of ('mni_nl6') atlas_type : str Whether to return the brain mask or the full atlas, can be one of 'image', 'mask' """ if name == 'MNI152': # MNI ICBM 152 non-linear 6th Generation Symmetric Average Brain # Stereotaxic Registration Model (http://nist.mni.mcgill.ca/?p=858) if resolution not in ['0.5mm', '1mm', '2mm']: raise ArcanaError( "Invalid resolution for MNI152, '{}', can be one of '0.5mm', " "'1mm' or '2mm'".format(resolution)) if dataset == 'image': path = os.path.join(get_fsl_reference_path(), 'MNI152_T1_{}.nii.gz'.format(resolution)) elif dataset == 'mask': path = os.path.join( get_fsl_reference_path(), 'MNI152_T1_{}_brain_mask.nii.gz'.format(resolution)) elif dataset == 'mask_dilated': if resolution != '2mm': raise ArcanaError( "Dilated MNI masks are not available for {} resolution ". format(resolution)) path = os.path.join( get_fsl_reference_path(), 'MNI152_T1_{}_brain_mask_dil.nii.gz'.format(resolution)) elif dataset == 'brain': path = os.path.join(get_fsl_reference_path(), 'MNI152_T1_{}_brain.nii.gz'.format(resolution)) else: raise ArcanaError("Unrecognised dataset '{}'".format(dataset)) else: raise ArcanaError("Unrecognised atlas name '{}'".format(name)) return os.path.abspath(path)
def _gen_outfilename(self): if isdefined(self.inputs.out_file): if not self.inputs.out_file.endswith('.mat'): raise ArcanaError( "Output NODDI ROI should be saved with '.mat' extension " "(provided '{}')".format(self.inputs.out_file)) out_name = self.inputs.out_file else: base, _ = split_extension(os.path.basename(self.inputs.in_file)) out_name = os.path.join(os.getcwd(), "{}_ROI.mat".format(base)) return out_name
def later_or_equal_version(cls, version, reference): for v_part, r_part in zip_longest(version, reference, fillvalue=0): if type(v_part) != type(r_part): raise ArcanaError( "Type of version part {} (of '{}'), {}, does not match " "type of reference part {}, {}".format( v_part, version, type(v_part), r_part, type(r_part))) if v_part > r_part: return True elif v_part < r_part: return False return True
def _get_resource(cls, xfileset, fileset): for resource_name in fileset.format.xnat_resource_names: try: return xfileset.resources[resource_name] except KeyError: continue raise ArcanaError( "'{}' fileset is not available in '{}' format(s), " "available resources are '{}'".format( fileset.name, "', '".join(fileset.format.xnat_resource_names), "', '".join(r.label for r in list(fileset.resources.values()))))
def _create_project(self, project_name=None): if project_name is None: project_name = self.project if SERVER == 'https://mbi-xnat.erc.monash.edu.au': raise ArcanaError( "Shouldn't be creating projects on the production " "server") with xnat.connect(SERVER) as login: uri = '/data/archive/projects/{}'.format(project_name) query = {'xsiType': 'xnat:projectData', 'req_format': 'qa'} response = login.put(uri, query=query) if response.ok: logger.info("Created test project '{}'".format(project_name))
def _get_digests(cls, resource): """ Downloads the MD5 digests associated with the files in a resource. These are saved with the downloaded files in the cache and used to check if the files have been updated on the server """ result = resource.xnat_session.get(resource.uri + '/files') if result.status_code != 200: raise ArcanaError( "Could not download metadata for resource {}".format( resource.id)) return dict((r['Name'], r['digest']) for r in result.json()['ResultSet']['Result'])
def _extract_ids(self, name): parts = name.split('_') if len(parts) < 3: raise ArcanaError( "'{}' in multi-subject test session '{}' needs to be " "prepended with subject and session IDs (delimited by " "'_')".format(name, self.xnat_session_name)) subj_id, visit_id = parts[:2] if subj_id.lower() == SUMMARY_NAME.lower(): subj_id = SUMMARY_NAME if visit_id.lower() == SUMMARY_NAME.lower(): visit_id = SUMMARY_NAME basename = '_'.join(parts[2:]) return subj_id, visit_id, basename
def download_dataset(download_path, server, user, password, session_id, dataset_name, file_format=None): """ Downloads a single dataset from an XNAT server """ with xnat.connect(server, user=user, password=password) as xnat_login: try: session = xnat_login.experiments[session_id] except KeyError: raise ArcanaError("Didn't find session matching '{}' on {}".format( session_id, server)) try: dataset = session.scans[dataset_name] except KeyError: raise ArcanaError("Didn't find dataset matching '{}' in {}".format( dataset_name, session_id)) if file_format is None: file_format = guess_file_format(dataset) download_resource(download_path, dataset, file_format, session.label)
def __init__(self, name, min_version, max_version=None, version_split=split_version, references=None, website=None): self._name = name.lower() self._min_ver = tuple(min_version) if max_version is not None: self._max_ver = tuple(max_version) if not self.later_or_equal_version(self._max_ver, self._min_ver): raise ArcanaError( "Supplied max version ({}) is not greater than min " " version ({})".format(self._min_ver, self._max_ver)) else: self._max_ver = None self._version_split = version_split self._references = references if references is not None else [] self._website = website
def outputnode(self, frequency): """ Generates an output node for the given frequency. It also adds implicit file format conversion nodes to the pipeline. Parameters ---------- frequency : str The frequency (i.e. 'per_session', 'per_visit', 'per_subject' or 'per_study') of the output node to retrieve """ # Check to see whether there are any outputs for the given frequency outputs = list(self.frequency_outputs(frequency)) if not outputs: raise ArcanaError( "No outputs to '{}' pipeline for requested freqency '{}'". format(self.name, frequency)) # Get list of output names for the requested frequency, addding fields # to hold iterator IDs output_names = [o.name for o in outputs] # Generate output node and connect it to appropriate nodes outputnode = self.add('{}_outputnode'.format(frequency), IdentityInterface(fields=output_names)) # Loop through list of nodes connected to study data specs and # connect them to the newly created output node for output in outputs: # @ReservedAssignment conv_cache = {} (node, node_out, format, conv_kwargs) = self._output_conns[ output.name] # @ReservedAssignment @IgnorePep8 # If fileset formats differ between study and pipeline # outputs create converter node (if one hasn't been already) # and connect output to that before connecting to outputnode if self.requires_conversion(output, format): if format.name not in conv_cache: conv_cache[format.name] = output.format.converter_from( format, **conv_kwargs) (conv_node, conv_in, conv_out) = conv_cache[format.name].get_node( '{}_{}_{}_to_{}_conversion'.format( self.name, output.name, output.format.name, format.name)) self.connect(node, node_out, conv_node, conv_in) self.connect(conv_node, conv_out, outputnode, output.name) else: self.connect(node, node_out, outputnode, output.name) return outputnode
def _upload_datset(self, xnat_login, fileset, xsession): if self._is_derived(fileset): type_name = self._derived_name(fileset) else: type_name = fileset.name xfileset = xnat_login.classes.MrScanData(type=type_name, parent=xsession) xresource = xfileset.create_resource(fileset.format.name.upper()) if fileset.format.directory: for fname in os.listdir(fileset.path): fpath = op.join(fileset.path, fname) xresource.upload(fpath, fname) else: if not op.exists(fileset.path): raise ArcanaError("Cannot upload fileset {} as path ({}) does " "not exist".format(fileset, fpath)) xresource.upload(fileset.path, op.basename(fileset.path))
def _download_fileset(cls, tmp_dir, xresource, xfileset, fileset, session_label, cache_path): # Download resource to zip file zip_path = op.join(tmp_dir, 'download.zip') with open(zip_path, 'wb') as f: xresource.xnat_session.download_stream(xresource.uri + '/files', f, format='zip', verbose=True) digests = cls._get_digests(xresource) # Extract downloaded zip file expanded_dir = op.join(tmp_dir, 'expanded') try: with ZipFile(zip_path) as zip_file: zip_file.extractall(expanded_dir) except BadZipfile as e: raise ArcanaError("Could not unzip file '{}' ({})".format( xresource.id, e)) data_path = op.join( expanded_dir, session_label, 'scans', (xfileset.id + '-' + special_char_re.sub('_', xfileset.type)), 'resources', xresource.label, 'files') if not fileset.format.directory: # If the dataformat is not a directory (e.g. DICOM), # attempt to locate a single file within the resource # directory with the appropriate filename and add that # to be the complete data path. fnames = os.listdir(data_path) match_fnames = [ f for f in fnames if (lower(split_extension(f)[-1]) == lower( fileset.format.extension)) ] if len(match_fnames) == 1: data_path = op.join(data_path, match_fnames[0]) else: raise ArcanaMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format( fileset.format.extension, "', '".join(fnames), data_path)) shutil.move(data_path, cache_path) with open(cache_path + XnatRepository.MD5_SUFFIX, 'w', **JSON_ENCODING) as f: json.dump(digests, f) shutil.rmtree(tmp_dir)
def best_requirement(cls, possible_requirements, available_modules, preloaded_modules=None): if preloaded_modules is None: preloaded_modules = {} # If possible reqs is a singleton, wrap it in a list for # iterating if isinstance(possible_requirements, Requirement): possible_requirements = [possible_requirements] # Loop through all parameters for a given requirement and see # if at least one can be satisfied. logger.debug( "Searching for one of {}".format( ', '.join(str(r) for r in possible_requirements))) ver_exceptions = [] # Will hold all version error messages for req in possible_requirements: try: version = preloaded_modules[req.name] logger.debug("Found preloaded version {} of module '{}'" .format(version, req.name)) if req.valid_version(req.split_version(version)): return req.name, version else: raise ArcanaError( "Incompatible module version already loaded {}/{}," " (valid {}->{}) please unload before running " "pipeline" .format( req.name, version, req.min_version, (req.max_version if req.max_version is not None else ''))) except KeyError: try: best_version = req.best_version( available_modules[req.name]) logger.debug("Found best version '{}' of module '{}' for" " requirement {}".format(best_version, req.name, req)) return req.name, best_version except ArcanaRequirementVersionException as e: ver_exceptions.append(e) # If no parameters can be satisfied, otherwise raise exception with # combined messages from all parameters. raise ArcanaRequirementVersionException( ' and '.join(str(e) for e in ver_exceptions))
def _list_outputs(self): if (not isdefined(self.inputs.compression) or (self.inputs.compression == 'y' or self.inputs.compression == 'i')): im_ext = '.nii.gz' else: im_ext = '.nii' outputs = self._outputs().get() # As Dcm2niix sometimes prepends a prefix onto the filenames to avoid # name clashes with multiple echos, we need to check the output folder # for all filenames that end with the "generated filename". out_dir = self._gen_filename('out_dir') fname = self._gen_filename('filename') + im_ext base, ext = split_extension(fname) match_re = re.compile(r'(_e\d+)?{}(_(?:e|c)\d+)?{}'.format( base, ext if ext is not None else '')) products = [ os.path.join(out_dir, f) for f in os.listdir(out_dir) if match_re.match(f) is not None ] if len(products) == 1: converted = products[0] elif len(products) > 1 and self.inputs.multifile_concat: ex_file = nib.load(products[0]) data = ex_file.get_data() merged_file = np.zeros( (data.shape[0], data.shape[1], data.shape[2], len(products))) for i, el in enumerate(products): f = nib.load(el) merged_file[:, :, :, i] = f.get_data() im2save = nib.Nifti1Image(merged_file, ex_file.affine) nib.save(im2save, out_dir + fname) converted = out_dir + fname elif len(products) > 1 and not self.inputs.multifile_concat: converted = products[-1] else: raise ArcanaError("No products produced by dcm2niix ({})".format( ', '.join(os.listdir(out_dir)))) outputs['converted'] = converted return outputs
def get_session_dir(self, subject=None, visit=None, frequency='per_session', derived=False): if subject is None and frequency in ('per_session', 'per_subject'): subject = self.SUBJECT if visit is None and frequency in ('per_session', 'per_visit'): visit = self.VISIT if frequency == 'per_session': assert subject is not None assert visit is not None parts = [self.project, subject, visit] elif frequency == 'per_subject': assert subject is not None assert visit is None parts = [self.project, subject, XnatRepository.SUMMARY_NAME] elif frequency == 'per_visit': assert visit is not None assert subject is None parts = [self.project, XnatRepository.SUMMARY_NAME, visit] elif frequency == 'per_study': assert subject is None assert visit is None parts = [ self.project, XnatRepository.SUMMARY_NAME, XnatRepository.SUMMARY_NAME ] else: assert False session_id = '_'.join(parts) if derived: session_id += XnatRepository.PROCESSED_SUFFIX session_path = op.join(self.output_cache_dir, session_id) if not op.exists(session_path): raise ArcanaError( "Session path '{}' does not exist".format(session_path)) return session_path
def bind(self, study): """ Returns a copy of the Spec bound to the given study Parameters ---------- study : Study A study to bind the fileset spec to (should happen in the study __init__) """ if self._study is not None: # Avoid rebinding specs in sub-studies that have already # been bound to MultiStudy bound = self else: bound = copy(self) bound._study = study if not hasattr(study, self.pipeline_name): raise ArcanaError( "{} does not have a method named '{}' required to " "derive {}".format(study, self.pipeline_name, self)) bound._bind_tree(study.tree) return bound
def download_resource(download_path, dataset, file_format, session_label): xresource = None for resource_name in file_format.xnat_resource_names: try: xresource = dataset.resources[resource_name] break except KeyError: logger.debug("Did not find resource corresponding to '{}' for {}, " "will try alternatives if available".format( resource_name, dataset)) continue if xresource is None: raise ArcanaError( "Didn't find '{}' resource(s) in {} dataset matching " "'{}' in {}".format("', '".join(file_format.xnat_resource_names), dataset.type)) tmp_dir = download_path + '.download' xresource.download_dir(tmp_dir) dataset_label = dataset.id + '-' + special_char_re.sub('_', dataset.type) src_path = os.path.join(tmp_dir, session_label, 'scans', dataset_label, 'resources', xresource.label, 'files') if not file_format.directory: fnames = os.listdir(src_path) match_fnames = [ f for f in fnames if lower(split_extension(f)[-1]) == lower(file_format.extension) ] if len(match_fnames) == 1: src_path = os.path.join(src_path, match_fnames[0]) else: raise ArcanaMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format(file_format.extension, "', '".join(fnames), src_path)) shutil.move(src_path, download_path) shutil.rmtree(tmp_dir)
def __init__( self, name, valid_formats, frequency='per_session', # @ReservedAssignment @IgnorePep8 desc=None, optional=False, default=None): # Ensure allowed formats is a list try: valid_formats = sorted(valid_formats, key=attrgetter('name')) except TypeError: valid_formats = [valid_formats] else: if not valid_formats: raise ArcanaError( "'{}' spec doesn't have any allowed formats".format(name)) self._valid_formats = valid_formats BaseFileset.__init__(self, name, valid_formats[0], frequency) BaseAcquiredSpec.__init__(self, name, desc, optional=optional, default=default)
def bind(self, study): """ Returns a copy of the AcquiredSpec bound to the given study Parameters ---------- study : Study A study to bind the fileset spec to (should happen in the study __init__) """ if self.default is None: raise ArcanaError( "Attempted to bind '{}' to {} but only acquired specs with " "a default value should be bound to studies{})".format( self.name, study)) if self._study is not None: # This avoids rebinding specs to sub-studies that have already # been bound to the multi-study bound = self else: bound = copy(self) bound._study = study bound._default = bound.default.bind(study) return bound
def output_file_path(self, fname, from_study, subject=None, visit=None, frequency='per_session'): try: acq_path = self.BASE_CLASS.output_file_path(self, fname, from_study, subject=subject, visit=visit, frequency=frequency, derived=False) except KeyError: acq_path = None try: proc_path = self.BASE_CLASS.output_file_path(self, fname, from_study, subject=subject, visit=visit, frequency=frequency, derived=True) except KeyError: proc_path = None if acq_path is not None and op.exists(acq_path): if op.exists(proc_path): raise ArcanaError( "Both acquired and derived paths were found for " "'{}_{}' ({} and {})".format(from_study, fname, acq_path, proc_path)) path = acq_path else: path = proc_path return path