def assertField(self, name, ref_value, study_name, subject=None, visit=None, frequency='per_session', to_places=None): esc_name = study_name + '_' + name output_dir = self.get_session_dir(subject, visit, frequency) try: with open(os.path.join(output_dir, FIELDS_FNAME)) as f: fields = json.load(f) except OSError as e: if e.errno == errno.ENOENT: raise NiAnalysisError( "No fields were created by pipeline in study '{}'".format( study_name)) try: value = fields[esc_name] except KeyError: raise NiAnalysisError( "Field '{}' was not created by pipeline in study '{}'. " "Created fields were ('{}')".format(esc_name, study_name, "', '".join(fields))) msg = ("Field value '{}' for study '{}', {}, does not match " "reference value ({})".format(name, study_name, value, ref_value)) if to_places is not None: self.assertAlmostEqual( value, ref_value, to_places, '{} to {} decimal places'.format(msg, to_places)) else: self.assertEqual(value, ref_value, msg)
def _run_module_cmd(cls, *args): if 'MODULESHOME' in os.environ: try: modulecmd = sp.check_output('which modulecmd', shell=True).strip() except sp.CalledProcessError: modulecmd = False if not modulecmd: modulecmd = '{}/bin/modulecmd'.format( os.environ['MODULESHOME']) if not os.path.exists(modulecmd): raise NiAnalysisError( "Cannot find 'modulecmd' on path or in MODULESHOME.") logger.debug("Running modules command '{}'".format(' '.join(args))) try: output, error = sp.Popen([modulecmd, 'python'] + list(args), stdout=sp.PIPE, stderr=sp.PIPE).communicate() except (sp.CalledProcessError, OSError) as e: raise NiAnalysisError( "Call to subprocess `{}` threw an error: {}".format( ' '.join([modulecmd, 'python'] + list(args)), e)) exec output return error else: raise NiAnalysisModulesNotInstalledException('MODULESHOME')
def download_dataset(download_path, server, user, password, session_id, dataset_name, data_format=None): """ Downloads a single dataset from an XNAT server """ with xnat.connect(server, user=user, password=password) as xnat_login: try: session = xnat_login.experiments[session_id] except KeyError: raise NiAnalysisError( "Didn't find session matching '{}' on {}".format( session_id, server)) try: dataset = session.scans[dataset_name] except KeyError: raise NiAnalysisError( "Didn't find dataset matching '{}' in {}".format( dataset_name, session_id)) if data_format is None: data_format = guess_data_format(dataset) download_resource(download_path, dataset, data_format, session.label)
def match(self, subject_id=None, visit_id=None): if self._matches is None: raise NiAnalysisError( "{} has not been bound to study".format(self)) if self.frequency == 'per_session': if subject_id is None or visit_id is None: raise NiAnalysisError( "The 'subject_id' and 'visit_id' must be provided " "to get the match from {}".format(self)) dataset = self._matches[subject_id][visit_id] elif self.frequency == 'per_subject': if subject_id is None: raise NiAnalysisError( "The 'subject_id' arg must be provided to get " "the match from {}".format(self)) dataset = self._matches[subject_id] elif self.frequency == 'per_visit': if visit_id is None: raise NiAnalysisError( "The 'visit_id' arg must be provided to get " "the match from {}".format(self)) dataset = self._matches[visit_id] elif self.frequency == 'per_project': dataset = self._matches return dataset
def __init__(self, name, dtype, frequency): super(BaseField, self).__init__(name, frequency) if dtype not in self.dtypes: raise NiAnalysisError( "Invalid dtype {}, can be one of {}".format( dtype.__name__, ', '.join(self._dtype_names()))) self._dtype = dtype
def get_labels(cls, frequency, project_id, subject_id=None, visit_id=None): """ Returns the labels for the XNAT subject and sessions given the frequency and provided IDs. """ if frequency == 'per_session': assert visit_id is not None assert subject_id is not None subj_label = '{}_{}'.format(project_id, subject_id) sess_label = '{}_{}_{}'.format(project_id, subject_id, visit_id) elif frequency == 'per_subject': assert subject_id is not None subj_label = '{}_{}'.format(project_id, subject_id) sess_label = '{}_{}_{}'.format(project_id, subject_id, cls.SUMMARY_NAME) elif frequency == 'per_visit': assert visit_id is not None subj_label = '{}_{}'.format(project_id, cls.SUMMARY_NAME) sess_label = '{}_{}_{}'.format(project_id, cls.SUMMARY_NAME, visit_id) elif frequency == 'per_project': subj_label = '{}_{}'.format(project_id, cls.SUMMARY_NAME) sess_label = '{}_{}_{}'.format(project_id, cls.SUMMARY_NAME, cls.SUMMARY_NAME) else: raise NiAnalysisError( "Unrecognised frequency '{}'".format(frequency)) return (subj_label, sess_label)
def download_resource(download_path, dataset, data_format_name, session_label): data_format = DataFormat.by_name(data_format_name) try: resource = dataset.resources[data_format.xnat_resource_name] except KeyError: raise NiAnalysisError( "Didn't find {} resource in {} dataset matching '{}' in {}".format( data_format.xnat_resource_name, dataset.type)) tmp_dir = download_path + '.download' resource.download_dir(tmp_dir) dataset_label = dataset.id + '-' + special_char_re.sub('_', dataset.type) src_path = os.path.join(tmp_dir, session_label, 'scans', dataset_label, 'resources', data_format.xnat_resource_name, 'files') if not data_format.directory: fnames = os.listdir(src_path) match_fnames = [ f for f in fnames if lower(split_extension(f)[-1]) == lower(data_format.extension) ] if len(match_fnames) == 1: src_path = os.path.join(src_path, match_fnames[0]) else: raise NiAnalysisMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format(data_format.extension, "', '".join(fnames), src_path)) shutil.move(src_path, download_path) shutil.rmtree(tmp_dir)
def _create_session(self, xnat_login, subject_id, visit_id): """ This creates a derived session in a way that respects whether the acquired session has been shared into another project or not. If we weren't worried about this we could just use session = xnat_login.classes.MrSessionData(label=proc_session_id, parent=subject) """ uri = ('/data/archive/projects/{}/subjects/{}/experiments/{}'.format( self.inputs.project_id, subject_id, visit_id)) query = { 'xsiType': 'xnat:mrSessionData', 'label': visit_id, 'req_format': 'qa' } response = xnat_login.put(uri, query=query) if response.status_code not in (200, 201): raise NiAnalysisError( "Could not create session '{}' in subject '{}' in project '{}'" " response code {}".format(visit_id, subject_id, self.inputs.project_id, response)) return xnat_login.classes.MrSessionData(uri=uri, xnat_session=xnat_login)
def cache(self, dataset, prev_login=None): """ Caches a single dataset (if the 'path' attribute is accessed and it has not been previously cached for example Parameters ---------- dataset : Dataset The dataset to cache prev_login : xnat.XNATSession An XNATSession object to use for the connection. A new one is created if one isn't provided """ if dataset.archive is not self: raise NiAnalysisError("{} is not from {}".format(dataset, self)) assert dataset.uri is not None with self.login(prev_login=prev_login) as xnat_login: sess_id, scan_id = re.match(r'/data/experiments/(\w+)/scans/(.*)', dataset.uri).groups() xsession = xnat_login.experiments[sess_id] xdataset = xsession.scans[scan_id] xresource = XnatSource.get_resource(xdataset, dataset) cache_path = self.cache_path(dataset) XnatSource.download_dataset(tempfile.mkdtemp(), xresource, xdataset, dataset, xsession.label, cache_path) return cache_path
def __init__(self, name, value, frequency='per_session', derived=False, subject_id=None, visit_id=None, archive=None): if isinstance(value, int): dtype = int elif isinstance(value, float): dtype = float elif isinstance(value, basestring): # Attempt to implicitly convert from string try: value = int(value) dtype = int except ValueError: try: value = float(value) dtype = float except ValueError: dtype = str else: raise NiAnalysisError( "Unrecognised field dtype {} (can be int, float or str)". format(value)) super(Field, self).__init__(name, dtype, frequency=frequency) self._value = value self._derived = derived self._subject_id = subject_id self._visit_id = visit_id self._archive = archive
def guess_data_format(dataset): dataset_formats = [ r for r in dataset.resources.itervalues() if r.label.lower() in DataFormat.by_names ] if len(dataset_formats) > 1: raise NiAnalysisError( "Multiple valid resources '{}' for '{}' dataset, please pass " "'data_format' to 'download_dataset' method to speficy resource to" "download".format("', '".join(dataset_formats), dataset.type)) elif not dataset_formats: raise NiAnalysisError( "No recognised data formats for '{}' dataset (available resources " "are '{}')".format( dataset.type, "', '".join(r.label for r in dataset.resources.itervalues()))) return dataset_formats[0].label
def path(self): if self._path is None: if self.archive is not None: self._path = self.archive.cache(self) else: raise NiAnalysisError( "Neither path nor archive has been set for Dataset " "{}".format(self.name)) return self._path
def __init__(self, study, name, inputs, outputs, desc, citations, version, name_prefix='', add_inputs=[], add_outputs=[]): self._name = name_prefix + name inputs = list(inputs) + list(add_inputs) outputs = list(outputs) + list(add_outputs) self._study = study self._workflow = pe.Workflow(name=self.name) self._version = int(version) self._desc = desc # Set up inputs self._check_spec_names(inputs, 'input') if any(i.name in self.iterfields for i in inputs): raise NiAnalysisError( "Cannot have a dataset spec named '{}' as it clashes with " "iterable field of that name".format(i.name)) self._inputs = inputs self._inputnode = self.create_node(IdentityInterface( fields=(tuple(self.input_names) + self.iterfields)), name="inputnode", wall_time=10, memory=1000) # Set up outputs self._check_spec_names(outputs, 'output') self._outputs = defaultdict(list) for output in outputs: freq = self._study.data_spec(output).frequency self._outputs[freq].append(output) self._outputnodes = {} for freq in self._outputs: self._outputnodes[freq] = self.create_node( IdentityInterface(fields=[o.name for o in self._outputs[freq]]), name="{}_outputnode".format(freq), wall_time=10, memory=1000) # Create sets of unconnected inputs/outputs self._unconnected_inputs = set(self.input_names) self._unconnected_outputs = set(self.output_names) assert len(inputs) == len(self._unconnected_inputs), ( "Duplicate inputs found in '{}'".format("', '".join( self.input_names))) assert len(outputs) == len(self._unconnected_outputs), ( "Duplicate outputs found in '{}'".format("', '".join( self.output_names))) self._citations = citations # Keep record of all options used in the pipeline construction # so that they can be saved with the provenence. self._used_options = set()
def get_resource(cls, xdataset, dataset): try: xresource = xdataset.resources[dataset.format.xnat_resource_name] except KeyError: raise NiAnalysisError( "'{}' dataset is not available in '{}' format, " "available resources are '{}'".format( dataset.name, dataset.format.xnat_resource_name, "', '".join(r.label for r in dataset.resources.values()))) return xresource
def _check_spec_names(self, specs, spec_type): # Check for unrecognised inputs/outputs unrecognised = set(s for s in specs if s.name not in self.study.data_spec_names()) if unrecognised: raise NiAnalysisError( "'{}' are not valid {} names for {} study ('{}')".format( "', '".join(u.name for u in unrecognised), spec_type, self.study.__class__.__name__, "', '".join(self.study.data_spec_names())))
def get_atlas_path(name, dataset='brain', resolution='1mm'): """ Returns the path to the atlas (or atlas mask) in the nianalysis repository Parameters ---------- name : str Name of the Atlas, can be one of ('mni_nl6') atlas_type : str Whether to return the brain mask or the full atlas, can be one of 'image', 'mask' """ if name == 'MNI152': # MNI ICBM 152 non-linear 6th Generation Symmetric Average Brain # Stereotaxic Registration Model (http://nist.mni.mcgill.ca/?p=858) if resolution not in ['0.5mm', '1mm', '2mm']: raise NiAnalysisError( "Invalid resolution for MNI152, '{}', can be one of '0.5mm', " "'1mm' or '2mm'".format(resolution)) if dataset == 'image': path = os.path.join(get_fsl_reference_path(), 'MNI152_T1_{}.nii.gz'.format(resolution)) elif dataset == 'mask': path = os.path.join( get_fsl_reference_path(), 'MNI152_T1_{}_brain_mask.nii.gz'.format(resolution)) elif dataset == 'mask_dilated': if resolution != '2mm': raise NiAnalysisError( "Dilated MNI masks are not available for {} resolution ". format(resolution)) path = os.path.join( get_fsl_reference_path(), 'MNI152_T1_{}_brain_mask_dil.nii.gz'.format(resolution)) elif dataset == 'brain': path = os.path.join(get_fsl_reference_path(), 'MNI152_T1_{}_brain.nii.gz'.format(resolution)) else: raise NiAnalysisError("Unrecognised dataset '{}'".format(dataset)) else: raise NiAnalysisError("Unrecognised atlas name '{}'".format(name)) return os.path.abspath(path)
def _list_outputs(self): outputs = {} session_id = (self.inputs.subject_id, self.inputs.visit_id) session_ids = zip(self.inputs.subject_ids, self.inputs.visit_ids) if session_ids.count(session_id) != 1: raise NiAnalysisError( "More than one indices matched {} in subjects and visits list " "({})".format(session_id, session_ids)) index = session_ids.index(session_id) outputs['item'] = self.inputs.items[index] return outputs
def later_or_equal_version(cls, version, reference): for v_part, r_part in izip_longest(version, reference, fillvalue=0): if type(v_part) != type(r_part): raise NiAnalysisError( "Type of version part {} (of '{}'), {}, does not match " "type of reference part {}, {}".format( v_part, version, type(v_part), r_part, type(r_part))) if v_part > r_part: return True elif v_part < r_part: return False return True
def get_digests(cls, resource): """ Downloads the MD5 digests associated with the files in a resource. These are saved with the downloaded files in the cache and used to check if the files have been updated on the server """ result = resource.xnat_session.get(resource.uri + '/files') if result.status_code != 200: raise NiAnalysisError( "Could not download metadata for resource {}".format( resource.id)) return dict((r['Name'], r['digest']) for r in result.json()['ResultSet']['Result'])
def _extract_ids(self, name): parts = name.split('_') if len(parts) < 3: raise NiAnalysisError( "'{}' in multi-subject test session '{}' needs to be " "prepended with subject and session IDs (delimited by " "'_')".format(name, self.xnat_session_name)) subj_id, visit_id = parts[:2] if subj_id.lower() == SUMMARY_NAME.lower(): subj_id = SUMMARY_NAME if visit_id.lower() == SUMMARY_NAME.lower(): visit_id = SUMMARY_NAME basename = '_'.join(parts[2:]) return subj_id, visit_id, basename
def __init__(self, name, min_version, max_version=None, version_split=split_version, citations=None): self._name = name.lower() self._min_ver = tuple(min_version) if max_version is not None: self._max_ver = tuple(max_version) if not self.later_or_equal_version(self._max_ver, self._min_ver): raise NiAnalysisError( "Supplied max version ({}) is not greater than min " " version ({})".format(self._min_ver, self._max_ver)) else: self._max_ver = None self._version_split = version_split self._citations = citations if citations is not None else []
def best_requirement(cls, possible_requirements, available_modules, preloaded_modules=None): if preloaded_modules is None: preloaded_modules = {} # If possible reqs is a singleton, wrap it in a list for # iterating if isinstance(possible_requirements, Requirement): possible_requirements = [possible_requirements] # Loop through all options for a given requirement and see # if at least one can be satisfied. logger.debug("Searching for one of {}".format(', '.join( str(r) for r in possible_requirements))) ver_exceptions = [] # Will hold all version error messages for req in possible_requirements: try: version = preloaded_modules[req.name] logger.debug( "Found preloaded version {} of module '{}'".format( version, req.name)) if req.valid_version(req.split_version(version)): return req.name, version else: raise NiAnalysisError( "Incompatible module version already loaded {}/{}," " (valid {}->{}) please unload before running " "pipeline".format(req.name, version, req.min_version, (req.max_version if req.max_version is not None else ''))) except KeyError: try: best_version = req.best_version( available_modules[req.name]) logger.debug("Found best version '{}' of module '{}' for" " requirement {}".format( best_version, req.name, req)) return req.name, best_version except NiAnalysisRequirementVersionException as e: ver_exceptions.append(e) # If no options can be satisfied, otherwise raise exception with # combined messages from all options. raise NiAnalysisRequirementVersionException(' and '.join( str(e) for e in ver_exceptions))
def sink(self, outputs, frequency='per_session', name=None, study_name=None, **kwargs): """ Returns a NiPype node that puts the output data back to the archive system. The input spec of the node's interface should inherit from ArchiveSinkInputSpec Parameters ---------- project_id : str The ID of the project to return the sessions for outputs : List(BaseFile|Field) | list( An iterable of nianalysis.Dataset nianalysis.Field objects, which specify the datasets to put into the archive system name : str Name of the NiPype node study_name: str Prefix used to distinguish datasets generated by a particular study. Used for derived datasets only """ if name is None: name = "{}_{}_sink".format(self.type, frequency) outputs = list(outputs) # protected against iterators if frequency.startswith('per_session'): sink_class = self.Sink elif frequency.startswith('per_subject'): sink_class = self.SubjectSink elif frequency.startswith('per_visit'): sink_class = self.VisitSink elif frequency.startswith('per_project'): sink_class = self.ProjectSink else: raise NiAnalysisError( "Unrecognised frequency '{}' can be one of '{}'".format( frequency, "', '".join(Dataset.MULTIPLICITY_OPTIONS))) datasets = [o for o in outputs if isinstance(o, BaseDataset)] fields = [o for o in outputs if isinstance(o, BaseField)] return Node(sink_class(study_name, datasets, fields, **kwargs), name=name)
def download_dataset(cls, tmp_dir, xresource, xdataset, dataset, session_label, cache_path): # Download resource to zip file zip_path = os.path.join(tmp_dir, 'download.zip') with open(zip_path, 'w') as f: xresource.xnat_session.download_stream(xresource.uri + '/files', f, format='zip', verbose=True) digests = cls.get_digests(xresource) # Extract downloaded zip file expanded_dir = os.path.join(tmp_dir, 'expanded') try: with ZipFile(zip_path) as zip_file: zip_file.extractall(expanded_dir) except BadZipfile as e: raise NiAnalysisError("Could not unzip file '{}' ({})".format( xresource.id, e)) data_path = os.path.join( expanded_dir, session_label, 'scans', (xdataset.id + '-' + special_char_re.sub('_', xdataset.type)), 'resources', dataset.format.xnat_resource_name, 'files') if not dataset.format.directory: # If the dataformat is not a directory (e.g. DICOM), # attempt to locate a single file within the resource # directory with the appropriate filename and add that # to be the complete data path. fnames = os.listdir(data_path) match_fnames = [ f for f in fnames if (lower(split_extension(f)[-1]) == lower( dataset.format.extension)) ] if len(match_fnames) == 1: data_path = os.path.join(data_path, match_fnames[0]) else: raise NiAnalysisMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format( dataset.format.extension, "', '".join(fnames), data_path)) shutil.move(data_path, cache_path) with open(cache_path + XnatArchive.MD5_SUFFIX, 'w') as f: json.dump(digests, f) shutil.rmtree(tmp_dir)
def _list_outputs(self): if (not isdefined(self.inputs.compression) or (self.inputs.compression == 'y' or self.inputs.compression == 'i')): im_ext = '.nii.gz' else: im_ext = '.nii' outputs = self._outputs().get() # As Dcm2niix sometimes prepends a prefix onto the filenames to avoid # name clashes with multiple echos, we need to check the output folder # for all filenames that end with the "generated filename". out_dir = self._gen_filename('out_dir') fname = self._gen_filename('filename') + im_ext base, ext = split_extension(fname) match_re = re.compile(r'(_e\d+)?{}(_(?:e|c)\d+)?{}'.format( base, ext if ext is not None else '')) products = [ os.path.join(out_dir, f) for f in os.listdir(out_dir) if match_re.match(f) is not None ] if len(products) == 1: converted = products[0] elif len(products) > 1 and self.inputs.multifile_concat: ex_file = nib.load(products[0]) data = ex_file.get_data() merged_file = np.zeros( (data.shape[0], data.shape[1], data.shape[2], len(products))) for i, el in enumerate(products): f = nib.load(el) merged_file[:, :, :, i] = f.get_data() im2save = nib.Nifti1Image(merged_file, ex_file.affine) nib.save(im2save, out_dir + fname) converted = out_dir + fname elif len(products) > 1 and not self.inputs.multifile_concat: converted = products[-1] else: raise NiAnalysisError( "No products produced by dcm2niix ({})".format(', '.join( os.listdir(out_dir)))) outputs['converted'] = converted return outputs
def output_file_path(self, fname, study_name, subject=None, visit=None, frequency='per_session'): try: acq_path = self.BASE_CLASS.output_file_path(self, fname, study_name, subject=subject, visit=visit, frequency=frequency, derived=False) except KeyError: acq_path = None try: proc_path = self.BASE_CLASS.output_file_path(self, fname, study_name, subject=subject, visit=visit, frequency=frequency, derived=True) except KeyError: proc_path = None if acq_path is not None and os.path.exists(acq_path): if os.path.exists(proc_path): raise NiAnalysisError( "Both acquired and derived paths were found for " "'{}_{}' ({} and {})".format(study_name, fname, acq_path, proc_path)) path = acq_path else: path = proc_path return path
def _check_only_dirs(cls, dirs, path): if any(not os.path.isdir(os.path.join(path, d)) for d in dirs): raise NiAnalysisError( "Files found in local archive directory '{}' " "('{}') instead of sub-directories".format( path, "', '".join(dirs)))
def get_tree(self, subject_ids=None, visit_ids=None): """ Return the tree of subject and sessions information within a project in the XNAT archive Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned Returns ------- project : nianalysis.archive.Project A hierarchical tree of subject, session and dataset information for the archive """ # Convert subject ids to strings if they are integers if subject_ids is not None: subject_ids = [('{}_{:03d}'.format(self.project_id, s) if isinstance(s, int) else s) for s in subject_ids] # Add derived visit IDs to list of visit ids to filter if visit_ids is not None: visit_ids = visit_ids + [ i + self.PROCESSED_SUFFIX for i in visit_ids ] subjects = [] sessions = defaultdict(list) with self.login() as xnat_login: xproject = xnat_login.projects[self.project_id] visit_sessions = defaultdict(list) # Create list of subjects for xsubject in xproject.subjects.itervalues(): # This assumes that the subject ID is prepended with # the project ID subj_id = xsubject.label[(len(self.project_id) + 1):] if subj_id == XnatArchive.SUMMARY_NAME: continue if not (subject_ids is None or subj_id in subject_ids): continue logger.debug("Getting info for subject '{}'".format(subj_id)) sessions = {} proc_sessions = [] # Get per_session datasets for xsession in xsubject.experiments.itervalues(): visit_id = '_'.join(xsession.label.split('_')[2:]) if visit_id == XnatArchive.SUMMARY_NAME: continue if not (visit_ids is None or visit_id in visit_ids): continue derived = xsession.label.endswith(self.PROCESSED_SUFFIX) session = Session( subj_id, visit_id, datasets=self._get_datasets(xsession, 'per_session', subject_id=subj_id, visit_id=visit_id, derived=derived), fields=self._get_fields(xsession, 'per_session', subject_id=subj_id, visit_id=visit_id, derived=derived), derived=None) if derived: proc_sessions.append(session) else: sessions[visit_id] = session visit_sessions[visit_id].append(session) for proc_session in proc_sessions: visit_id = proc_session.visit_id[:-len(self. PROCESSED_SUFFIX)] try: sessions[visit_id].derived = proc_session except KeyError: raise NiAnalysisError( "No matching acquired session for derived " "session '{}_{}_{}'".format( self.project_id, proc_session.subject_id, proc_session.visit_id)) # Get per_subject datasets subj_summary_name = self.get_labels('per_subject', self.project_id, subj_id)[1] try: xsubj_summary = xsubject.experiments[subj_summary_name] except KeyError: subj_datasets = [] subj_fields = [] else: subj_datasets = self._get_datasets(xsubj_summary, 'per_subject', subject_id=subj_id) subj_fields = self._get_fields(xsubj_summary, 'per_subject', subject_id=subj_id) subjects.append( Subject(subj_id, sorted(sessions.values()), datasets=subj_datasets, fields=subj_fields)) # Create list of visits visits = [] for visit_id, v_sessions in visit_sessions.iteritems(): (_, visit_summary_sess_name) = self.get_labels('per_visit', self.project_id, visit_id=visit_id) # Get 'per_visit' datasets try: xvisit_summary = xproject.experiments[ visit_summary_sess_name] except KeyError: visit_datasets = [] visit_fields = {} else: visit_datasets = self._get_datasets(xvisit_summary, 'per_visit', visit_id=visit_id) visit_fields = self._get_fields(xvisit_summary, 'per_visit', visit_id=visit_id) visits.append( Visit(visit_id, sorted(v_sessions), datasets=visit_datasets, fields=visit_fields)) # Get 'per_project' datasets (proj_summary_subj_name, proj_summary_sess_name) = self.get_labels('per_project', self.project_id) try: xproj_summary = xproject.subjects[ proj_summary_subj_name].experiments[proj_summary_sess_name] except KeyError: proj_datasets = [] proj_fields = [] else: proj_datasets = self._get_datasets(xproj_summary, 'per_project') proj_fields = self._get_fields(xproj_summary, 'per_project') if not subjects: raise NiAnalysisError( "Did not find any subjects matching the IDs '{}' in " "project '{}' (found '{}')".format( ("', '".join(subject_ids) if subject_ids is not None else ''), self.project_id, "', '".join(s.label for s in xproject.subjects.values()))) if not sessions: raise NiAnalysisError( "Did not find any sessions matching the IDs '{}'" "(in subjects '{}') for project '{}'".format( ("', '".join(visit_ids) if visit_ids is not None else ''), "', '".join(s.label for s in xproject.experiments.values()), self.project_id)) return Project(sorted(subjects), sorted(visits), datasets=proj_datasets, fields=proj_fields)
def __init__(self, base_dir): if not os.path.exists(base_dir): raise NiAnalysisError( "Base directory for LocalArchive '{}' does not exist".format( base_dir)) self._base_dir = os.path.abspath(base_dir)
def _list_outputs(self): # FIXME: Should probably not prepend the project before this point subject_id = self.inputs.subject_id.split('_')[-1] visit_id = self.inputs.visit_id base_cache_dir = os.path.join(self.inputs.cache_dir, self.inputs.project_id) sess_kwargs = {} if isdefined(self.inputs.user): sess_kwargs['user'] = self.inputs.user if isdefined(self.inputs.password): sess_kwargs['password'] = self.inputs.password with xnat.connect(server=self.inputs.server, **sess_kwargs) as xnat_login: project = xnat_login.projects[self.inputs.project_id] # Get primary session, derived and summary sessions and cache # dirs sessions = {} cache_dirs = {} for freq, derived in ([('per_session', False)] + zip(MULTIPLICITIES, repeat(True))): subj_label, sess_label = XnatArchive.get_labels( freq, self.inputs.project_id, subject_id, visit_id) if freq == 'per_session' and derived: sess_label += XnatArchive.PROCESSED_SUFFIX cache_dirs[(freq, derived)] = os.path.join(base_cache_dir, subj_label, sess_label) try: subject = project.subjects[subj_label] sessions[(freq, derived)] = subject.experiments[sess_label] except KeyError: continue outputs = {} for dataset in self.datasets: try: session = sessions[(dataset.frequency, dataset.derived)] except KeyError: raise NiAnalysisMissingDataException( "Did not find{} session for frequency '{}', " "it was expected to find {} in".format( (' derived' if dataset.frequency else ''), dataset.frequency, dataset)) cache_dir = cache_dirs[(dataset.frequency, dataset.derived)] try: xdataset = session.scans[dataset.basename( subject_id=subject_id, visit_id=visit_id)] except KeyError: raise NiAnalysisError( "Could not find '{}' dataset in session '{}' " "(found {})".format(dataset.prefixed_name, session.label, "', '".join(session.scans.keys()))) # Get filename fname = dataset.fname(subject_id=subject_id, visit_id=visit_id) # Get resource to check its MD5 digest xresource = self.get_resource(xdataset, dataset) need_to_download = True # FIXME: Should do a check to see if versions match if not os.path.exists(cache_dir): os.makedirs(cache_dir) cache_path = os.path.join(cache_dir, fname) if os.path.exists(cache_path): if self.check_md5: try: with open(cache_path + XnatArchive.MD5_SUFFIX) as f: cached_digests = json.load(f) digests = self.get_digests(xresource) if cached_digests == digests: need_to_download = False except IOError: pass else: need_to_download = False if need_to_download: # The path to the directory which the files will be # downloaded to. tmp_dir = cache_path + '.download' try: # Attempt to make tmp download directory. This will # fail if another process (or previous attempt) has # already created it. In that case this process will # wait to see if that download finishes successfully, # and if so use the cached version. os.mkdir(tmp_dir) except OSError as e: if e.errno == errno.EEXIST: # Another process may be concurrently downloading # the same file to the cache. Wait for # 'race_cond_delay' seconds and then check that it # has been completed or assume interrupted and # redownload. self.delayed_download( tmp_dir, xresource, xdataset, dataset, session.label, cache_path, delay=self.inputs.race_cond_delay) else: raise else: self.download_dataset(tmp_dir, xresource, xdataset, dataset, session.label, cache_path) outputs[dataset.name + PATH_SUFFIX] = cache_path for field in self.fields: prefixed_name = field.prefixed_name session = sessions[(field.frequency, field.derived)] outputs[field.name + FIELD_SUFFIX] = field.dtype( session.fields[prefixed_name]) return outputs