Ejemplo n.º 1
0
 def assertField(self,
                 name,
                 ref_value,
                 study_name,
                 subject=None,
                 visit=None,
                 frequency='per_session',
                 to_places=None):
     esc_name = study_name + '_' + name
     output_dir = self.get_session_dir(subject, visit, frequency)
     try:
         with open(os.path.join(output_dir, FIELDS_FNAME)) as f:
             fields = json.load(f)
     except OSError as e:
         if e.errno == errno.ENOENT:
             raise NiAnalysisError(
                 "No fields were created by pipeline in study '{}'".format(
                     study_name))
     try:
         value = fields[esc_name]
     except KeyError:
         raise NiAnalysisError(
             "Field '{}' was not created by pipeline in study '{}'. "
             "Created fields were ('{}')".format(esc_name, study_name,
                                                 "', '".join(fields)))
     msg = ("Field value '{}' for study '{}', {}, does not match "
            "reference value ({})".format(name, study_name, value,
                                          ref_value))
     if to_places is not None:
         self.assertAlmostEqual(
             value, ref_value, to_places,
             '{} to {} decimal places'.format(msg, to_places))
     else:
         self.assertEqual(value, ref_value, msg)
Ejemplo n.º 2
0
 def _run_module_cmd(cls, *args):
     if 'MODULESHOME' in os.environ:
         try:
             modulecmd = sp.check_output('which modulecmd',
                                         shell=True).strip()
         except sp.CalledProcessError:
             modulecmd = False
         if not modulecmd:
             modulecmd = '{}/bin/modulecmd'.format(
                 os.environ['MODULESHOME'])
             if not os.path.exists(modulecmd):
                 raise NiAnalysisError(
                     "Cannot find 'modulecmd' on path or in MODULESHOME.")
         logger.debug("Running modules command '{}'".format(' '.join(args)))
         try:
             output, error = sp.Popen([modulecmd, 'python'] + list(args),
                                      stdout=sp.PIPE,
                                      stderr=sp.PIPE).communicate()
         except (sp.CalledProcessError, OSError) as e:
             raise NiAnalysisError(
                 "Call to subprocess `{}` threw an error: {}".format(
                     ' '.join([modulecmd, 'python'] + list(args)), e))
         exec output
         return error
     else:
         raise NiAnalysisModulesNotInstalledException('MODULESHOME')
Ejemplo n.º 3
0
def download_dataset(download_path,
                     server,
                     user,
                     password,
                     session_id,
                     dataset_name,
                     data_format=None):
    """
    Downloads a single dataset from an XNAT server
    """
    with xnat.connect(server, user=user, password=password) as xnat_login:
        try:
            session = xnat_login.experiments[session_id]
        except KeyError:
            raise NiAnalysisError(
                "Didn't find session matching '{}' on {}".format(
                    session_id, server))
        try:
            dataset = session.scans[dataset_name]
        except KeyError:
            raise NiAnalysisError(
                "Didn't find dataset matching '{}' in {}".format(
                    dataset_name, session_id))
        if data_format is None:
            data_format = guess_data_format(dataset)
        download_resource(download_path, dataset, data_format, session.label)
Ejemplo n.º 4
0
 def match(self, subject_id=None, visit_id=None):
     if self._matches is None:
         raise NiAnalysisError(
             "{} has not been bound to study".format(self))
     if self.frequency == 'per_session':
         if subject_id is None or visit_id is None:
             raise NiAnalysisError(
                 "The 'subject_id' and 'visit_id' must be provided "
                 "to get the match from {}".format(self))
         dataset = self._matches[subject_id][visit_id]
     elif self.frequency == 'per_subject':
         if subject_id is None:
             raise NiAnalysisError(
                 "The 'subject_id' arg must be provided to get "
                 "the match from {}".format(self))
         dataset = self._matches[subject_id]
     elif self.frequency == 'per_visit':
         if visit_id is None:
             raise NiAnalysisError(
                 "The 'visit_id' arg must be provided to get "
                 "the match from {}".format(self))
         dataset = self._matches[visit_id]
     elif self.frequency == 'per_project':
         dataset = self._matches
     return dataset
Ejemplo n.º 5
0
 def __init__(self, name, dtype, frequency):
     super(BaseField, self).__init__(name, frequency)
     if dtype not in self.dtypes:
         raise NiAnalysisError(
             "Invalid dtype {}, can be one of {}".format(
                 dtype.__name__, ', '.join(self._dtype_names())))
     self._dtype = dtype
Ejemplo n.º 6
0
 def get_labels(cls, frequency, project_id, subject_id=None, visit_id=None):
     """
     Returns the labels for the XNAT subject and sessions given
     the frequency and provided IDs.
     """
     if frequency == 'per_session':
         assert visit_id is not None
         assert subject_id is not None
         subj_label = '{}_{}'.format(project_id, subject_id)
         sess_label = '{}_{}_{}'.format(project_id, subject_id, visit_id)
     elif frequency == 'per_subject':
         assert subject_id is not None
         subj_label = '{}_{}'.format(project_id, subject_id)
         sess_label = '{}_{}_{}'.format(project_id, subject_id,
                                        cls.SUMMARY_NAME)
     elif frequency == 'per_visit':
         assert visit_id is not None
         subj_label = '{}_{}'.format(project_id, cls.SUMMARY_NAME)
         sess_label = '{}_{}_{}'.format(project_id, cls.SUMMARY_NAME,
                                        visit_id)
     elif frequency == 'per_project':
         subj_label = '{}_{}'.format(project_id, cls.SUMMARY_NAME)
         sess_label = '{}_{}_{}'.format(project_id, cls.SUMMARY_NAME,
                                        cls.SUMMARY_NAME)
     else:
         raise NiAnalysisError(
             "Unrecognised frequency '{}'".format(frequency))
     return (subj_label, sess_label)
Ejemplo n.º 7
0
def download_resource(download_path, dataset, data_format_name, session_label):

    data_format = DataFormat.by_name(data_format_name)
    try:
        resource = dataset.resources[data_format.xnat_resource_name]
    except KeyError:
        raise NiAnalysisError(
            "Didn't find {} resource in {} dataset matching '{}' in {}".format(
                data_format.xnat_resource_name, dataset.type))
    tmp_dir = download_path + '.download'
    resource.download_dir(tmp_dir)
    dataset_label = dataset.id + '-' + special_char_re.sub('_', dataset.type)
    src_path = os.path.join(tmp_dir, session_label, 'scans', dataset_label,
                            'resources', data_format.xnat_resource_name,
                            'files')
    if not data_format.directory:
        fnames = os.listdir(src_path)
        match_fnames = [
            f for f in fnames
            if lower(split_extension(f)[-1]) == lower(data_format.extension)
        ]
        if len(match_fnames) == 1:
            src_path = os.path.join(src_path, match_fnames[0])
        else:
            raise NiAnalysisMissingDataException(
                "Did not find single file with extension '{}' "
                "(found '{}') in resource '{}'".format(data_format.extension,
                                                       "', '".join(fnames),
                                                       src_path))
    shutil.move(src_path, download_path)
    shutil.rmtree(tmp_dir)
Ejemplo n.º 8
0
    def _create_session(self, xnat_login, subject_id, visit_id):
        """
        This creates a derived session in a way that respects whether
        the acquired session has been shared into another project or not.

        If we weren't worried about this we could just use

            session = xnat_login.classes.MrSessionData(label=proc_session_id,
                                                       parent=subject)
        """
        uri = ('/data/archive/projects/{}/subjects/{}/experiments/{}'.format(
            self.inputs.project_id, subject_id, visit_id))
        query = {
            'xsiType': 'xnat:mrSessionData',
            'label': visit_id,
            'req_format': 'qa'
        }
        response = xnat_login.put(uri, query=query)
        if response.status_code not in (200, 201):
            raise NiAnalysisError(
                "Could not create session '{}' in subject '{}' in project '{}'"
                " response code {}".format(visit_id, subject_id,
                                           self.inputs.project_id, response))
        return xnat_login.classes.MrSessionData(uri=uri,
                                                xnat_session=xnat_login)
Ejemplo n.º 9
0
    def cache(self, dataset, prev_login=None):
        """
        Caches a single dataset (if the 'path' attribute is accessed
        and it has not been previously cached for example

        Parameters
        ----------
        dataset : Dataset
            The dataset to cache
        prev_login : xnat.XNATSession
            An XNATSession object to use for the connection. A new
            one is created if one isn't provided
        """
        if dataset.archive is not self:
            raise NiAnalysisError("{} is not from {}".format(dataset, self))
        assert dataset.uri is not None
        with self.login(prev_login=prev_login) as xnat_login:
            sess_id, scan_id = re.match(r'/data/experiments/(\w+)/scans/(.*)',
                                        dataset.uri).groups()
            xsession = xnat_login.experiments[sess_id]
            xdataset = xsession.scans[scan_id]
            xresource = XnatSource.get_resource(xdataset, dataset)
            cache_path = self.cache_path(dataset)
            XnatSource.download_dataset(tempfile.mkdtemp(), xresource,
                                        xdataset, dataset, xsession.label,
                                        cache_path)
        return cache_path
Ejemplo n.º 10
0
 def __init__(self,
              name,
              value,
              frequency='per_session',
              derived=False,
              subject_id=None,
              visit_id=None,
              archive=None):
     if isinstance(value, int):
         dtype = int
     elif isinstance(value, float):
         dtype = float
     elif isinstance(value, basestring):
         # Attempt to implicitly convert from string
         try:
             value = int(value)
             dtype = int
         except ValueError:
             try:
                 value = float(value)
                 dtype = float
             except ValueError:
                 dtype = str
     else:
         raise NiAnalysisError(
             "Unrecognised field dtype {} (can be int, float or str)".
             format(value))
     super(Field, self).__init__(name, dtype, frequency=frequency)
     self._value = value
     self._derived = derived
     self._subject_id = subject_id
     self._visit_id = visit_id
     self._archive = archive
Ejemplo n.º 11
0
def guess_data_format(dataset):
    dataset_formats = [
        r for r in dataset.resources.itervalues()
        if r.label.lower() in DataFormat.by_names
    ]
    if len(dataset_formats) > 1:
        raise NiAnalysisError(
            "Multiple valid resources '{}' for '{}' dataset, please pass "
            "'data_format' to 'download_dataset' method to speficy resource to"
            "download".format("', '".join(dataset_formats), dataset.type))
    elif not dataset_formats:
        raise NiAnalysisError(
            "No recognised data formats for '{}' dataset (available resources "
            "are '{}')".format(
                dataset.type,
                "', '".join(r.label for r in dataset.resources.itervalues())))
    return dataset_formats[0].label
Ejemplo n.º 12
0
 def path(self):
     if self._path is None:
         if self.archive is not None:
             self._path = self.archive.cache(self)
         else:
             raise NiAnalysisError(
                 "Neither path nor archive has been set for Dataset "
                 "{}".format(self.name))
     return self._path
Ejemplo n.º 13
0
 def __init__(self,
              study,
              name,
              inputs,
              outputs,
              desc,
              citations,
              version,
              name_prefix='',
              add_inputs=[],
              add_outputs=[]):
     self._name = name_prefix + name
     inputs = list(inputs) + list(add_inputs)
     outputs = list(outputs) + list(add_outputs)
     self._study = study
     self._workflow = pe.Workflow(name=self.name)
     self._version = int(version)
     self._desc = desc
     # Set up inputs
     self._check_spec_names(inputs, 'input')
     if any(i.name in self.iterfields for i in inputs):
         raise NiAnalysisError(
             "Cannot have a dataset spec named '{}' as it clashes with "
             "iterable field of that name".format(i.name))
     self._inputs = inputs
     self._inputnode = self.create_node(IdentityInterface(
         fields=(tuple(self.input_names) + self.iterfields)),
                                        name="inputnode",
                                        wall_time=10,
                                        memory=1000)
     # Set up outputs
     self._check_spec_names(outputs, 'output')
     self._outputs = defaultdict(list)
     for output in outputs:
         freq = self._study.data_spec(output).frequency
         self._outputs[freq].append(output)
     self._outputnodes = {}
     for freq in self._outputs:
         self._outputnodes[freq] = self.create_node(
             IdentityInterface(fields=[o.name
                                       for o in self._outputs[freq]]),
             name="{}_outputnode".format(freq),
             wall_time=10,
             memory=1000)
     # Create sets of unconnected inputs/outputs
     self._unconnected_inputs = set(self.input_names)
     self._unconnected_outputs = set(self.output_names)
     assert len(inputs) == len(self._unconnected_inputs), (
         "Duplicate inputs found in '{}'".format("', '".join(
             self.input_names)))
     assert len(outputs) == len(self._unconnected_outputs), (
         "Duplicate outputs found in '{}'".format("', '".join(
             self.output_names)))
     self._citations = citations
     # Keep record of all options used in the pipeline construction
     # so that they can be saved with the provenence.
     self._used_options = set()
Ejemplo n.º 14
0
 def get_resource(cls, xdataset, dataset):
     try:
         xresource = xdataset.resources[dataset.format.xnat_resource_name]
     except KeyError:
         raise NiAnalysisError(
             "'{}' dataset is not available in '{}' format, "
             "available resources are '{}'".format(
                 dataset.name, dataset.format.xnat_resource_name,
                 "', '".join(r.label for r in dataset.resources.values())))
     return xresource
Ejemplo n.º 15
0
 def _check_spec_names(self, specs, spec_type):
     # Check for unrecognised inputs/outputs
     unrecognised = set(s for s in specs
                        if s.name not in self.study.data_spec_names())
     if unrecognised:
         raise NiAnalysisError(
             "'{}' are not valid {} names for {} study ('{}')".format(
                 "', '".join(u.name for u in unrecognised), spec_type,
                 self.study.__class__.__name__,
                 "', '".join(self.study.data_spec_names())))
Ejemplo n.º 16
0
def get_atlas_path(name, dataset='brain', resolution='1mm'):
    """
    Returns the path to the atlas (or atlas mask) in the nianalysis repository

    Parameters
    ----------
    name : str
        Name of the Atlas, can be one of ('mni_nl6')
    atlas_type : str
        Whether to return the brain mask or the full atlas, can be one of
        'image', 'mask'
    """
    if name == 'MNI152':
        # MNI ICBM 152 non-linear 6th Generation Symmetric Average Brain
        # Stereotaxic Registration Model (http://nist.mni.mcgill.ca/?p=858)
        if resolution not in ['0.5mm', '1mm', '2mm']:
            raise NiAnalysisError(
                "Invalid resolution for MNI152, '{}', can be one of '0.5mm', "
                "'1mm' or '2mm'".format(resolution))
        if dataset == 'image':
            path = os.path.join(get_fsl_reference_path(),
                                'MNI152_T1_{}.nii.gz'.format(resolution))
        elif dataset == 'mask':
            path = os.path.join(
                get_fsl_reference_path(),
                'MNI152_T1_{}_brain_mask.nii.gz'.format(resolution))
        elif dataset == 'mask_dilated':
            if resolution != '2mm':
                raise NiAnalysisError(
                    "Dilated MNI masks are not available for {} resolution ".
                    format(resolution))
            path = os.path.join(
                get_fsl_reference_path(),
                'MNI152_T1_{}_brain_mask_dil.nii.gz'.format(resolution))
        elif dataset == 'brain':
            path = os.path.join(get_fsl_reference_path(),
                                'MNI152_T1_{}_brain.nii.gz'.format(resolution))
        else:
            raise NiAnalysisError("Unrecognised dataset '{}'".format(dataset))
    else:
        raise NiAnalysisError("Unrecognised atlas name '{}'".format(name))
    return os.path.abspath(path)
Ejemplo n.º 17
0
 def _list_outputs(self):
     outputs = {}
     session_id = (self.inputs.subject_id, self.inputs.visit_id)
     session_ids = zip(self.inputs.subject_ids, self.inputs.visit_ids)
     if session_ids.count(session_id) != 1:
         raise NiAnalysisError(
             "More than one indices matched {} in subjects and visits list "
             "({})".format(session_id, session_ids))
     index = session_ids.index(session_id)
     outputs['item'] = self.inputs.items[index]
     return outputs
Ejemplo n.º 18
0
 def later_or_equal_version(cls, version, reference):
     for v_part, r_part in izip_longest(version, reference, fillvalue=0):
         if type(v_part) != type(r_part):
             raise NiAnalysisError(
                 "Type of version part {} (of '{}'), {}, does not match "
                 "type of reference part {}, {}".format(
                     v_part, version, type(v_part), r_part, type(r_part)))
         if v_part > r_part:
             return True
         elif v_part < r_part:
             return False
     return True
Ejemplo n.º 19
0
 def get_digests(cls, resource):
     """
     Downloads the MD5 digests associated with the files in a resource.
     These are saved with the downloaded files in the cache and used to
     check if the files have been updated on the server
     """
     result = resource.xnat_session.get(resource.uri + '/files')
     if result.status_code != 200:
         raise NiAnalysisError(
             "Could not download metadata for resource {}".format(
                 resource.id))
     return dict((r['Name'], r['digest'])
                 for r in result.json()['ResultSet']['Result'])
Ejemplo n.º 20
0
 def _extract_ids(self, name):
     parts = name.split('_')
     if len(parts) < 3:
         raise NiAnalysisError(
             "'{}' in multi-subject test session '{}' needs to be "
             "prepended with subject and session IDs (delimited by "
             "'_')".format(name, self.xnat_session_name))
     subj_id, visit_id = parts[:2]
     if subj_id.lower() == SUMMARY_NAME.lower():
         subj_id = SUMMARY_NAME
     if visit_id.lower() == SUMMARY_NAME.lower():
         visit_id = SUMMARY_NAME
     basename = '_'.join(parts[2:])
     return subj_id, visit_id, basename
Ejemplo n.º 21
0
 def __init__(self,
              name,
              min_version,
              max_version=None,
              version_split=split_version,
              citations=None):
     self._name = name.lower()
     self._min_ver = tuple(min_version)
     if max_version is not None:
         self._max_ver = tuple(max_version)
         if not self.later_or_equal_version(self._max_ver, self._min_ver):
             raise NiAnalysisError(
                 "Supplied max version ({}) is not greater than min "
                 " version ({})".format(self._min_ver, self._max_ver))
     else:
         self._max_ver = None
     self._version_split = version_split
     self._citations = citations if citations is not None else []
Ejemplo n.º 22
0
 def best_requirement(cls,
                      possible_requirements,
                      available_modules,
                      preloaded_modules=None):
     if preloaded_modules is None:
         preloaded_modules = {}
     # If possible reqs is a singleton, wrap it in a list for
     # iterating
     if isinstance(possible_requirements, Requirement):
         possible_requirements = [possible_requirements]
     # Loop through all options for a given requirement and see
     # if at least one can be satisfied.
     logger.debug("Searching for one of {}".format(', '.join(
         str(r) for r in possible_requirements)))
     ver_exceptions = []  # Will hold all version error messages
     for req in possible_requirements:
         try:
             version = preloaded_modules[req.name]
             logger.debug(
                 "Found preloaded version {} of module '{}'".format(
                     version, req.name))
             if req.valid_version(req.split_version(version)):
                 return req.name, version
             else:
                 raise NiAnalysisError(
                     "Incompatible module version already loaded {}/{},"
                     " (valid {}->{}) please unload before running "
                     "pipeline".format(req.name, version, req.min_version,
                                       (req.max_version if req.max_version
                                        is not None else '')))
         except KeyError:
             try:
                 best_version = req.best_version(
                     available_modules[req.name])
                 logger.debug("Found best version '{}' of module '{}' for"
                              " requirement {}".format(
                                  best_version, req.name, req))
                 return req.name, best_version
             except NiAnalysisRequirementVersionException as e:
                 ver_exceptions.append(e)
     # If no options can be satisfied, otherwise raise exception with
     # combined messages from all options.
     raise NiAnalysisRequirementVersionException(' and '.join(
         str(e) for e in ver_exceptions))
Ejemplo n.º 23
0
    def sink(self,
             outputs,
             frequency='per_session',
             name=None,
             study_name=None,
             **kwargs):
        """
        Returns a NiPype node that puts the output data back to the archive
        system. The input spec of the node's interface should inherit from
        ArchiveSinkInputSpec

        Parameters
        ----------
        project_id : str
            The ID of the project to return the sessions for
        outputs : List(BaseFile|Field) | list(
            An iterable of nianalysis.Dataset nianalysis.Field objects,
            which specify the datasets to put into the archive system
        name : str
            Name of the NiPype node
        study_name: str
            Prefix used to distinguish datasets generated by a particular
            study. Used for derived datasets only

        """
        if name is None:
            name = "{}_{}_sink".format(self.type, frequency)
        outputs = list(outputs)  # protected against iterators
        if frequency.startswith('per_session'):
            sink_class = self.Sink
        elif frequency.startswith('per_subject'):
            sink_class = self.SubjectSink
        elif frequency.startswith('per_visit'):
            sink_class = self.VisitSink
        elif frequency.startswith('per_project'):
            sink_class = self.ProjectSink
        else:
            raise NiAnalysisError(
                "Unrecognised frequency '{}' can be one of '{}'".format(
                    frequency, "', '".join(Dataset.MULTIPLICITY_OPTIONS)))
        datasets = [o for o in outputs if isinstance(o, BaseDataset)]
        fields = [o for o in outputs if isinstance(o, BaseField)]
        return Node(sink_class(study_name, datasets, fields, **kwargs),
                    name=name)
Ejemplo n.º 24
0
 def download_dataset(cls, tmp_dir, xresource, xdataset, dataset,
                      session_label, cache_path):
     # Download resource to zip file
     zip_path = os.path.join(tmp_dir, 'download.zip')
     with open(zip_path, 'w') as f:
         xresource.xnat_session.download_stream(xresource.uri + '/files',
                                                f,
                                                format='zip',
                                                verbose=True)
     digests = cls.get_digests(xresource)
     # Extract downloaded zip file
     expanded_dir = os.path.join(tmp_dir, 'expanded')
     try:
         with ZipFile(zip_path) as zip_file:
             zip_file.extractall(expanded_dir)
     except BadZipfile as e:
         raise NiAnalysisError("Could not unzip file '{}' ({})".format(
             xresource.id, e))
     data_path = os.path.join(
         expanded_dir, session_label, 'scans',
         (xdataset.id + '-' + special_char_re.sub('_', xdataset.type)),
         'resources', dataset.format.xnat_resource_name, 'files')
     if not dataset.format.directory:
         # If the dataformat is not a directory (e.g. DICOM),
         # attempt to locate a single file within the resource
         # directory with the appropriate filename and add that
         # to be the complete data path.
         fnames = os.listdir(data_path)
         match_fnames = [
             f for f in fnames if (lower(split_extension(f)[-1]) == lower(
                 dataset.format.extension))
         ]
         if len(match_fnames) == 1:
             data_path = os.path.join(data_path, match_fnames[0])
         else:
             raise NiAnalysisMissingDataException(
                 "Did not find single file with extension '{}' "
                 "(found '{}') in resource '{}'".format(
                     dataset.format.extension, "', '".join(fnames),
                     data_path))
     shutil.move(data_path, cache_path)
     with open(cache_path + XnatArchive.MD5_SUFFIX, 'w') as f:
         json.dump(digests, f)
     shutil.rmtree(tmp_dir)
Ejemplo n.º 25
0
 def _list_outputs(self):
     if (not isdefined(self.inputs.compression)
             or (self.inputs.compression == 'y'
                 or self.inputs.compression == 'i')):
         im_ext = '.nii.gz'
     else:
         im_ext = '.nii'
     outputs = self._outputs().get()
     # As Dcm2niix sometimes prepends a prefix onto the filenames to avoid
     # name clashes with multiple echos, we need to check the output folder
     # for all filenames that end with the "generated filename".
     out_dir = self._gen_filename('out_dir')
     fname = self._gen_filename('filename') + im_ext
     base, ext = split_extension(fname)
     match_re = re.compile(r'(_e\d+)?{}(_(?:e|c)\d+)?{}'.format(
         base, ext if ext is not None else ''))
     products = [
         os.path.join(out_dir, f) for f in os.listdir(out_dir)
         if match_re.match(f) is not None
     ]
     if len(products) == 1:
         converted = products[0]
     elif len(products) > 1 and self.inputs.multifile_concat:
         ex_file = nib.load(products[0])
         data = ex_file.get_data()
         merged_file = np.zeros(
             (data.shape[0], data.shape[1], data.shape[2], len(products)))
         for i, el in enumerate(products):
             f = nib.load(el)
             merged_file[:, :, :, i] = f.get_data()
         im2save = nib.Nifti1Image(merged_file, ex_file.affine)
         nib.save(im2save, out_dir + fname)
         converted = out_dir + fname
     elif len(products) > 1 and not self.inputs.multifile_concat:
         converted = products[-1]
     else:
         raise NiAnalysisError(
             "No products produced by dcm2niix ({})".format(', '.join(
                 os.listdir(out_dir))))
     outputs['converted'] = converted
     return outputs
Ejemplo n.º 26
0
 def output_file_path(self,
                      fname,
                      study_name,
                      subject=None,
                      visit=None,
                      frequency='per_session'):
     try:
         acq_path = self.BASE_CLASS.output_file_path(self,
                                                     fname,
                                                     study_name,
                                                     subject=subject,
                                                     visit=visit,
                                                     frequency=frequency,
                                                     derived=False)
     except KeyError:
         acq_path = None
     try:
         proc_path = self.BASE_CLASS.output_file_path(self,
                                                      fname,
                                                      study_name,
                                                      subject=subject,
                                                      visit=visit,
                                                      frequency=frequency,
                                                      derived=True)
     except KeyError:
         proc_path = None
     if acq_path is not None and os.path.exists(acq_path):
         if os.path.exists(proc_path):
             raise NiAnalysisError(
                 "Both acquired and derived paths were found for "
                 "'{}_{}' ({} and {})".format(study_name, fname, acq_path,
                                              proc_path))
         path = acq_path
     else:
         path = proc_path
     return path
Ejemplo n.º 27
0
 def _check_only_dirs(cls, dirs, path):
     if any(not os.path.isdir(os.path.join(path, d)) for d in dirs):
         raise NiAnalysisError(
             "Files found in local archive directory '{}' "
             "('{}') instead of sub-directories".format(
                 path, "', '".join(dirs)))
Ejemplo n.º 28
0
    def get_tree(self, subject_ids=None, visit_ids=None):
        """
        Return the tree of subject and sessions information within a
        project in the XNAT archive

        Parameters
        ----------
        subject_ids : list(str)
            List of subject IDs with which to filter the tree with. If
            None all are returned
        visit_ids : list(str)
            List of visit IDs with which to filter the tree with. If
            None all are returned

        Returns
        -------
        project : nianalysis.archive.Project
            A hierarchical tree of subject, session and dataset
            information for the archive
        """
        # Convert subject ids to strings if they are integers
        if subject_ids is not None:
            subject_ids = [('{}_{:03d}'.format(self.project_id, s)
                            if isinstance(s, int) else s) for s in subject_ids]
        # Add derived visit IDs to list of visit ids to filter
        if visit_ids is not None:
            visit_ids = visit_ids + [
                i + self.PROCESSED_SUFFIX for i in visit_ids
            ]
        subjects = []
        sessions = defaultdict(list)
        with self.login() as xnat_login:
            xproject = xnat_login.projects[self.project_id]
            visit_sessions = defaultdict(list)
            # Create list of subjects
            for xsubject in xproject.subjects.itervalues():
                # This assumes that the subject ID is prepended with
                # the project ID
                subj_id = xsubject.label[(len(self.project_id) + 1):]
                if subj_id == XnatArchive.SUMMARY_NAME:
                    continue
                if not (subject_ids is None or subj_id in subject_ids):
                    continue
                logger.debug("Getting info for subject '{}'".format(subj_id))
                sessions = {}
                proc_sessions = []
                # Get per_session datasets
                for xsession in xsubject.experiments.itervalues():
                    visit_id = '_'.join(xsession.label.split('_')[2:])
                    if visit_id == XnatArchive.SUMMARY_NAME:
                        continue
                    if not (visit_ids is None or visit_id in visit_ids):
                        continue
                    derived = xsession.label.endswith(self.PROCESSED_SUFFIX)
                    session = Session(
                        subj_id,
                        visit_id,
                        datasets=self._get_datasets(xsession,
                                                    'per_session',
                                                    subject_id=subj_id,
                                                    visit_id=visit_id,
                                                    derived=derived),
                        fields=self._get_fields(xsession,
                                                'per_session',
                                                subject_id=subj_id,
                                                visit_id=visit_id,
                                                derived=derived),
                        derived=None)
                    if derived:
                        proc_sessions.append(session)
                    else:
                        sessions[visit_id] = session
                        visit_sessions[visit_id].append(session)
                for proc_session in proc_sessions:
                    visit_id = proc_session.visit_id[:-len(self.
                                                           PROCESSED_SUFFIX)]
                    try:
                        sessions[visit_id].derived = proc_session
                    except KeyError:
                        raise NiAnalysisError(
                            "No matching acquired session for derived "
                            "session '{}_{}_{}'".format(
                                self.project_id, proc_session.subject_id,
                                proc_session.visit_id))
                # Get per_subject datasets
                subj_summary_name = self.get_labels('per_subject',
                                                    self.project_id,
                                                    subj_id)[1]
                try:
                    xsubj_summary = xsubject.experiments[subj_summary_name]
                except KeyError:
                    subj_datasets = []
                    subj_fields = []
                else:
                    subj_datasets = self._get_datasets(xsubj_summary,
                                                       'per_subject',
                                                       subject_id=subj_id)
                    subj_fields = self._get_fields(xsubj_summary,
                                                   'per_subject',
                                                   subject_id=subj_id)
                subjects.append(
                    Subject(subj_id,
                            sorted(sessions.values()),
                            datasets=subj_datasets,
                            fields=subj_fields))
            # Create list of visits
            visits = []
            for visit_id, v_sessions in visit_sessions.iteritems():
                (_,
                 visit_summary_sess_name) = self.get_labels('per_visit',
                                                            self.project_id,
                                                            visit_id=visit_id)
                # Get 'per_visit' datasets
                try:
                    xvisit_summary = xproject.experiments[
                        visit_summary_sess_name]
                except KeyError:
                    visit_datasets = []
                    visit_fields = {}
                else:
                    visit_datasets = self._get_datasets(xvisit_summary,
                                                        'per_visit',
                                                        visit_id=visit_id)
                    visit_fields = self._get_fields(xvisit_summary,
                                                    'per_visit',
                                                    visit_id=visit_id)
                visits.append(
                    Visit(visit_id,
                          sorted(v_sessions),
                          datasets=visit_datasets,
                          fields=visit_fields))
            # Get 'per_project' datasets
            (proj_summary_subj_name,
             proj_summary_sess_name) = self.get_labels('per_project',
                                                       self.project_id)
            try:
                xproj_summary = xproject.subjects[
                    proj_summary_subj_name].experiments[proj_summary_sess_name]
            except KeyError:
                proj_datasets = []
                proj_fields = []
            else:
                proj_datasets = self._get_datasets(xproj_summary,
                                                   'per_project')
                proj_fields = self._get_fields(xproj_summary, 'per_project')
            if not subjects:
                raise NiAnalysisError(
                    "Did not find any subjects matching the IDs '{}' in "
                    "project '{}' (found '{}')".format(
                        ("', '".join(subject_ids)
                         if subject_ids is not None else ''), self.project_id,
                        "', '".join(s.label
                                    for s in xproject.subjects.values())))
            if not sessions:
                raise NiAnalysisError(
                    "Did not find any sessions matching the IDs '{}'"
                    "(in subjects '{}') for project '{}'".format(
                        ("', '".join(visit_ids)
                         if visit_ids is not None else ''),
                        "', '".join(s.label
                                    for s in xproject.experiments.values()),
                        self.project_id))
        return Project(sorted(subjects),
                       sorted(visits),
                       datasets=proj_datasets,
                       fields=proj_fields)
Ejemplo n.º 29
0
 def __init__(self, base_dir):
     if not os.path.exists(base_dir):
         raise NiAnalysisError(
             "Base directory for LocalArchive '{}' does not exist".format(
                 base_dir))
     self._base_dir = os.path.abspath(base_dir)
Ejemplo n.º 30
0
 def _list_outputs(self):
     # FIXME: Should probably not prepend the project before this point
     subject_id = self.inputs.subject_id.split('_')[-1]
     visit_id = self.inputs.visit_id
     base_cache_dir = os.path.join(self.inputs.cache_dir,
                                   self.inputs.project_id)
     sess_kwargs = {}
     if isdefined(self.inputs.user):
         sess_kwargs['user'] = self.inputs.user
     if isdefined(self.inputs.password):
         sess_kwargs['password'] = self.inputs.password
     with xnat.connect(server=self.inputs.server,
                       **sess_kwargs) as xnat_login:
         project = xnat_login.projects[self.inputs.project_id]
         # Get primary session, derived and summary sessions and cache
         # dirs
         sessions = {}
         cache_dirs = {}
         for freq, derived in ([('per_session', False)] +
                               zip(MULTIPLICITIES, repeat(True))):
             subj_label, sess_label = XnatArchive.get_labels(
                 freq, self.inputs.project_id, subject_id, visit_id)
             if freq == 'per_session' and derived:
                 sess_label += XnatArchive.PROCESSED_SUFFIX
             cache_dirs[(freq,
                         derived)] = os.path.join(base_cache_dir,
                                                  subj_label, sess_label)
             try:
                 subject = project.subjects[subj_label]
                 sessions[(freq, derived)] = subject.experiments[sess_label]
             except KeyError:
                 continue
         outputs = {}
         for dataset in self.datasets:
             try:
                 session = sessions[(dataset.frequency, dataset.derived)]
             except KeyError:
                 raise NiAnalysisMissingDataException(
                     "Did not find{} session for frequency '{}', "
                     "it was expected to find {} in".format(
                         (' derived' if dataset.frequency else ''),
                         dataset.frequency, dataset))
             cache_dir = cache_dirs[(dataset.frequency, dataset.derived)]
             try:
                 xdataset = session.scans[dataset.basename(
                     subject_id=subject_id, visit_id=visit_id)]
             except KeyError:
                 raise NiAnalysisError(
                     "Could not find '{}' dataset in session '{}' "
                     "(found {})".format(dataset.prefixed_name,
                                         session.label,
                                         "', '".join(session.scans.keys())))
             # Get filename
             fname = dataset.fname(subject_id=subject_id, visit_id=visit_id)
             # Get resource to check its MD5 digest
             xresource = self.get_resource(xdataset, dataset)
             need_to_download = True
             # FIXME: Should do a check to see if versions match
             if not os.path.exists(cache_dir):
                 os.makedirs(cache_dir)
             cache_path = os.path.join(cache_dir, fname)
             if os.path.exists(cache_path):
                 if self.check_md5:
                     try:
                         with open(cache_path +
                                   XnatArchive.MD5_SUFFIX) as f:
                             cached_digests = json.load(f)
                         digests = self.get_digests(xresource)
                         if cached_digests == digests:
                             need_to_download = False
                     except IOError:
                         pass
                 else:
                     need_to_download = False
             if need_to_download:
                 # The path to the directory which the files will be
                 # downloaded to.
                 tmp_dir = cache_path + '.download'
                 try:
                     # Attempt to make tmp download directory. This will
                     # fail if another process (or previous attempt) has
                     # already created it. In that case this process will
                     # wait to see if that download finishes successfully,
                     # and if so use the cached version.
                     os.mkdir(tmp_dir)
                 except OSError as e:
                     if e.errno == errno.EEXIST:
                         # Another process may be concurrently downloading
                         # the same file to the cache. Wait for
                         # 'race_cond_delay' seconds and then check that it
                         # has been completed or assume interrupted and
                         # redownload.
                         self.delayed_download(
                             tmp_dir,
                             xresource,
                             xdataset,
                             dataset,
                             session.label,
                             cache_path,
                             delay=self.inputs.race_cond_delay)
                     else:
                         raise
                 else:
                     self.download_dataset(tmp_dir, xresource, xdataset,
                                           dataset, session.label,
                                           cache_path)
             outputs[dataset.name + PATH_SUFFIX] = cache_path
         for field in self.fields:
             prefixed_name = field.prefixed_name
             session = sessions[(field.frequency, field.derived)]
             outputs[field.name + FIELD_SUFFIX] = field.dtype(
                 session.fields[prefixed_name])
     return outputs