Ejemplo n.º 1
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         out_name = self.inputs.out_file
     else:
         base, ext = split_extension(os.path.basename(self.inputs.in_file))
         out_name = os.path.join(os.getcwd(),
                                 "{}_threshold{}".format(base, ext))
     return out_name
Ejemplo n.º 2
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         out_name = self.inputs.out_file
     else:
         base, _ = split_extension(os.path.basename(self.inputs.roi_file))
         out_name = os.path.join(
             os.getcwd(), "{}_fitted_params.mat".format(base))
     return out_name
Ejemplo n.º 3
0
 def _gen_grad_filename(self, comp):
     filename = getattr(self.inputs, comp + 's_file')
     if not isdefined(filename):
         base, _ = split_extension(os.path.basename(self.inputs.in_file))
         filename = os.path.join(
             os.getcwd(), "{base}_{comp}s.{comp}".format(base=base,
                                                         comp=comp))
     return filename
Ejemplo n.º 4
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         filename = self.inputs.out_file
     else:
         base, ext = split_extension(
             os.path.basename(self.inputs.in_files[0]))
         filename = os.path.join(os.getcwd(), "{}_avg{}".format(base, ext))
     return filename
Ejemplo n.º 5
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         fpath = self.inputs.out_file
     else:
         fname = (
             split_extension(os.path.basename(self.inputs.in_file))[0] +
             '_dicom')
         fpath = os.path.join(os.getcwd(), fname)
     return fpath
Ejemplo n.º 6
0
def detect_format(path, aux_files):
    ext = split_extension(path)[1]
    aux_names = set(aux_files.keys())
    for frmt in BIDS_FORMATS:
        if frmt.extension == ext and set(frmt.aux_files.keys()) == aux_names:
            return frmt
    raise BananaUnrecognisedBidsFormat(
        "No matching BIDS format matches provided path ({}) and aux files ({})"
        .format(path, aux_files))
Ejemplo n.º 7
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         filename = self.inputs.out_file
     else:
         if isdefined(self.inputs.out_ext):
             ext = self.inputs.out_ext
         else:
             _, ext = split_extension(
                 os.path.basename(self.inputs.operands[0]))
         filename = os.getcwd()
         for op in self.inputs.operands:
             try:
                 op_str = split_extension(os.path.basename(op))[0]
             except:
                 op_str = str(float(op))
             filename += '_' + op_str
         filename += '_' + self.inputs.operation + ext
     return filename
Ejemplo n.º 8
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         filename = self.inputs.out_file
     else:
         if isdefined(self.inputs.out_ext):
             ext = self.inputs.out_ext
             base, _ = split_extension(os.path.basename(
                 self.inputs.in_file))
         else:
             base, ext = split_extension(
                 os.path.basename(self.inputs.in_file))
         if isdefined(self.inputs.bzero):
             suffix = 'b0'
         else:
             suffix = 'dw'
         filename = os.path.join(os.getcwd(),
                                 "{}_{}{}".format(base, suffix, ext))
     return filename
Ejemplo n.º 9
0
 def from_path(cls, path, **kwargs):
     if not op.exists(path):
         raise ArcanaUsageError(
             "Attempting to read Fileset from path '{}' but it "
             "does not exist".format(path))
     if op.isdir(path):
         name = op.basename(path)
     else:
         name = split_extension(op.basename(path))[0]
     return cls(name, path=path, **kwargs)
Ejemplo n.º 10
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         if not self.inputs.out_file.endswith('.mat'):
             raise ArcanaError(
                 "Output NODDI ROI should be saved with '.mat' extension "
                 "(provided '{}')".format(self.inputs.out_file))
         out_name = self.inputs.out_file
     else:
         base, _ = split_extension(os.path.basename(self.inputs.in_file))
         out_name = os.path.join(os.getcwd(), "{}_ROI.mat".format(base))
     return out_name
Ejemplo n.º 11
0
    def assort_files(self, candidates):
        """
        Assorts candidate files into primary and auxiliary (and ignored) files
        corresponding to the format by their file extensions. Can be overridden
        in specialised subclasses to assort files based on other
        characteristics

        Parameters
        ----------
        candidates : list[str]
            The list of filenames to assort

        Returns
        -------
        primary_file : str
            Path to the selected primary file
        aux_files : dict[str, str]
            A dictionary mapping the auxiliary file name to the selected path
        """
        by_ext = defaultdict(list)
        for path in candidates:
            by_ext[split_extension(path)[1].lower()].append(path)
        try:
            primary_file = by_ext[self.ext]
        except KeyError:
            raise ArcanaFileFormatError(
                "No files match primary file extension of {} out of "
                "potential candidates of {}"
                .format(self, "', '".join(candidates)))
        if not primary_file:
            raise ArcanaFileFormatError(
                "No potential files for primary file of {}".format(self))
        elif len(primary_file) > 1:
            raise ArcanaFileFormatError(
                "Multiple potential files for '{}' primary file of {}"
                .format("', '".join(primary_file), self))
        else:
            primary_file = primary_file[0]
        aux_files = {}
        for aux_name, aux_ext in self.aux_files.items():
            aux = by_ext[aux_ext]
            if not aux:
                raise ArcanaFileFormatError(
                    "No files match auxiliary file extension '{}' of {} out of"
                    " potential candidates of {}"
                    .format(aux_ext, self, "', '".join(candidates)))
            elif len(aux) > 1:
                raise ArcanaFileFormatError(
                    ("Multiple potential files for '{}' auxiliary file ext. " +
                     "({}) of {}".format("', '".join(aux),
                                         self)))
            else:
                aux_files[aux_name] = aux[0]
        return primary_file, aux_files
Ejemplo n.º 12
0
 def _gen_outfilename(self):
     if isdefined(self.inputs.out_file):
         out_name = self.inputs.out_file
     else:
         base, ext = split_extension(os.path.basename(self.inputs.in_file))
         if isdefined(self.inputs.out_file_ext):
             extension = self.inputs.out_file_ext
         else:
             extension = ext
         out_name = "{}_preproc{}".format(base, extension)
     return out_name
Ejemplo n.º 13
0
 def __init__(self, filesets, fields, records):
     if filesets is None:
         filesets = []
     if fields is None:
         fields = []
     if records is None:
         records = []
     # Save filesets and fields in ordered dictionary by name and
     # name of analysis that generated them (if applicable)
     self._filesets = OrderedDict()
     for fileset in sorted(filesets):
         id_key = (fileset.id, fileset.from_analysis)
         try:
             dct = self._filesets[id_key]
         except KeyError:
             dct = self._filesets[id_key] = OrderedDict()
         if fileset.format_name is not None:
             format_key = fileset.format_name
         else:
             format_key = split_extension(fileset.path)[1]
         if format_key in dct:
             raise ArcanaRepositoryError(
                 "Attempting to add duplicate filesets to tree ({} and {})".
                 format(fileset, dct[format_key]))
         dct[format_key] = fileset
     self._fields = OrderedDict(
         ((f.name, f.from_analysis), f) for f in sorted(fields))
     self._records = OrderedDict(
         ((r.pipeline_name, r.from_analysis), r) for r in sorted(
             records,
             key=lambda r: (r.subject_id, r.visit_id, r.from_analysis)))
     self._missing_records = []
     self._duplicate_records = []
     self._tree = None
     # Match up provenance records with items in the node
     for item in chain(self.filesets, self.fields):
         if not item.derived:
             continue  # Skip acquired items
         records = [
             r for r in self.records
             if (item.from_analysis == r.from_analysis
                 and item.name in r.outputs)
         ]
         if not records:
             self._missing_records.append(item.name)
         elif len(records) > 1:
             item.record = sorted(records, key=attrgetter('datetime'))[-1]
             self._duplicate_records.append(item.name)
         else:
             item.record = records[0]
Ejemplo n.º 14
0
 def _link_into_dir(self, fpaths, dirpath):
     """
     Symlinks the given file paths into the given directory, making the
     directory if necessary
     """
     try:
         os.makedirs(dirpath)
     except OSError as e:
         if e.errno != errno.EEXIST:
             raise
     num_digits = int(math.ceil(math.log(len(fpaths), 10)))
     for i, fpath in enumerate(fpaths):
         _, ext = split_extension(fpath)
         os.symlink(fpath,
                    os.path.join(dirpath,
                                 str(i).zfill(num_digits) + ext))
Ejemplo n.º 15
0
 def _download_fileset(cls, tmp_dir, xresource, xfileset, fileset,
                       session_label, cache_path):
     # Download resource to zip file
     zip_path = op.join(tmp_dir, 'download.zip')
     with open(zip_path, 'wb') as f:
         xresource.xnat_session.download_stream(xresource.uri + '/files',
                                                f,
                                                format='zip',
                                                verbose=True)
     digests = cls._get_digests(xresource)
     # Extract downloaded zip file
     expanded_dir = op.join(tmp_dir, 'expanded')
     try:
         with ZipFile(zip_path) as zip_file:
             zip_file.extractall(expanded_dir)
     except BadZipfile as e:
         raise ArcanaError("Could not unzip file '{}' ({})".format(
             xresource.id, e))
     data_path = op.join(
         expanded_dir, session_label, 'scans',
         (xfileset.id + '-' + special_char_re.sub('_', xfileset.type)),
         'resources', xresource.label, 'files')
     if not fileset.format.directory:
         # If the dataformat is not a directory (e.g. DICOM),
         # attempt to locate a single file within the resource
         # directory with the appropriate filename and add that
         # to be the complete data path.
         fnames = os.listdir(data_path)
         match_fnames = [
             f for f in fnames if (lower(split_extension(f)[-1]) == lower(
                 fileset.format.extension))
         ]
         if len(match_fnames) == 1:
             data_path = op.join(data_path, match_fnames[0])
         else:
             raise ArcanaMissingDataException(
                 "Did not find single file with extension '{}' "
                 "(found '{}') in resource '{}'".format(
                     fileset.format.extension, "', '".join(fnames),
                     data_path))
     shutil.move(data_path, cache_path)
     with open(cache_path + XnatRepository.MD5_SUFFIX, 'w',
               **JSON_ENCODING) as f:
         json.dump(digests, f)
     shutil.rmtree(tmp_dir)
Ejemplo n.º 16
0
Archivo: bids.py Proyecto: amrka/banana
 def __init__(
         self,
         path,
         type,
         subject_id,
         visit_id,
         repository,  # @ReservedAssignment @IgnorePep8
         modality=None,
         task=None,
         checksums=None):
     Fileset.__init__(self,
                      name=op.basename(path),
                      format=FileFormat.by_ext(split_extension(path)[1]),
                      frequency='per_session',
                      path=path,
                      subject_id=subject_id,
                      visit_id=visit_id,
                      repository=repository,
                      checksums=checksums)
     BaseBidsFileset.__init__(self, type, modality, task)
Ejemplo n.º 17
0
 def _list_outputs(self):
     if (not isdefined(self.inputs.compression)
             or (self.inputs.compression == 'y'
                 or self.inputs.compression == 'i')):
         im_ext = '.nii.gz'
     else:
         im_ext = '.nii'
     outputs = self._outputs().get()
     # As Dcm2niix sometimes prepends a prefix onto the filenames to avoid
     # name clashes with multiple echos, we need to check the output folder
     # for all filenames that end with the "generated filename".
     out_dir = self._gen_filename('out_dir')
     fname = self._gen_filename('filename') + im_ext
     base, ext = split_extension(fname)
     match_re = re.compile(r'(_e\d+)?{}(_(?:e|c)\d+)?{}'.format(
         base, ext if ext is not None else ''))
     products = [
         os.path.join(out_dir, f) for f in os.listdir(out_dir)
         if match_re.match(f) is not None
     ]
     if len(products) == 1:
         converted = products[0]
     elif len(products) > 1 and self.inputs.multifile_concat:
         ex_file = nib.load(products[0])
         data = ex_file.get_data()
         merged_file = np.zeros(
             (data.shape[0], data.shape[1], data.shape[2], len(products)))
         for i, el in enumerate(products):
             f = nib.load(el)
             merged_file[:, :, :, i] = f.get_data()
         im2save = nib.Nifti1Image(merged_file, ex_file.affine)
         nib.save(im2save, out_dir + fname)
         converted = out_dir + fname
     elif len(products) > 1 and not self.inputs.multifile_concat:
         converted = products[-1]
     else:
         raise ArcanaError("No products produced by dcm2niix ({})".format(
             ', '.join(os.listdir(out_dir))))
     outputs['converted'] = converted
     return outputs
Ejemplo n.º 18
0
def download_resource(download_path, dataset, file_format, session_label):
    xresource = None
    for resource_name in file_format.xnat_resource_names:
        try:
            xresource = dataset.resources[resource_name]
            break
        except KeyError:
            logger.debug("Did not find resource corresponding to '{}' for {}, "
                         "will try alternatives if available".format(
                             resource_name, dataset))
            continue
    if xresource is None:
        raise ArcanaError(
            "Didn't find '{}' resource(s) in {} dataset matching "
            "'{}' in {}".format("', '".join(file_format.xnat_resource_names),
                                dataset.type))
    tmp_dir = download_path + '.download'
    xresource.download_dir(tmp_dir)
    dataset_label = dataset.id + '-' + special_char_re.sub('_', dataset.type)
    src_path = os.path.join(tmp_dir, session_label, 'scans', dataset_label,
                            'resources', xresource.label, 'files')
    if not file_format.directory:
        fnames = os.listdir(src_path)
        match_fnames = [
            f for f in fnames
            if lower(split_extension(f)[-1]) == lower(file_format.extension)
        ]
        if len(match_fnames) == 1:
            src_path = os.path.join(src_path, match_fnames[0])
        else:
            raise ArcanaMissingDataException(
                "Did not find single file with extension '{}' "
                "(found '{}') in resource '{}'".format(file_format.extension,
                                                       "', '".join(fnames),
                                                       src_path))
    shutil.move(src_path, download_path)
    shutil.rmtree(tmp_dir)
Ejemplo n.º 19
0
    def matches(self, fileset):
        """
        Checks to see whether the format matches the given fileset

        Parameters
        ----------
        fileset : Fileset
            The fileset to check
        """
        if fileset._resource_name is not None:
            return (fileset._resource_name in self.resource_names(
                fileset.repository.type))
        elif self.directory:
            if op.isdir(fileset.path):
                if self.within_dir_exts is None:
                    return True
                else:
                    # Get set of all extensions in the directory
                    return self.within_dir_exts == frozenset(
                        split_extension(f)[1] for f in os.listdir(fileset.path)
                        if not f.startswith('.'))
            else:
                return False
        else:
            if op.isfile(fileset.path):
                all_paths = [fileset.path]
                if fileset._potential_aux_files is not None:
                    all_paths += fileset._potential_aux_files
                try:
                    primary_path = self.assort_files(all_paths)[0]
                except ArcanaFileFormatError:
                    return False
                else:
                    return primary_path == fileset.path
            else:
                return False
Ejemplo n.º 20
0
    def find_data(self, dataset, subject_ids=None, visit_ids=None, **kwargs):
        """
        Find all data within a repository, registering filesets, fields and
        provenance with the found_fileset, found_field and found_provenance
        methods, respectively

        Parameters
        ----------
        subject_ids : list(str)
            List of subject IDs with which to filter the tree with. If
            None all are returned
        visit_ids : list(str)
            List of visit IDs with which to filter the tree with. If
            None all are returned
        root_dir : str
            The root dir to use instead of the 'name' (path) of the dataset.
            Only for use in sub-classes (e.g. BIDS)
        all_from_analysis : str
            Global 'from_analysis' to be applied to every found item.
            Only for use in sub-classes (e.g. BIDS)

        Returns
        -------
        filesets : list[Fileset]
            All the filesets found in the repository
        fields : list[Field]
            All the fields found in the repository
        records : list[Record]
            The provenance records found in the repository
        """
        all_filesets = []
        all_fields = []
        all_records = []
        # if root_dir is None:
        root_dir = dataset.name
        for session_path, dirs, files in os.walk(root_dir):
            relpath = op.relpath(session_path, root_dir)
            path_parts = relpath.split(op.sep) if relpath != '.' else []
            ids = self._extract_ids_from_path(dataset.depth, path_parts, dirs,
                                              files)
            if ids is None:
                continue
            subj_id, visit_id, from_analysis = ids
            # if all_from_analysis is not None:
            #     if from_analysis is not None:
            #         raise ArcanaRepositoryError(
            #             "Found from_analysis sub-directory '{}' when global "
            #             "from analysis '{}' was passed".format(
            #                 from_analysis, all_from_analysis))
            #     else:
            #         from_analysis = all_from_analysis
            # Check for summaries and filtered IDs
            if subj_id == self.SUMMARY_NAME:
                subj_id = None
            elif subject_ids is not None and subj_id not in subject_ids:
                continue
            if visit_id == self.SUMMARY_NAME:
                visit_id = None
            elif visit_ids is not None and visit_id not in visit_ids:
                continue
            # Map IDs into ID space of analysis
            subj_id = dataset.map_subject_id(subj_id)
            visit_id = dataset.map_visit_id(visit_id)
            # Determine frequency of session|summary
            if (subj_id, visit_id) == (None, None):
                frequency = 'per_dataset'
            elif subj_id is None:
                frequency = 'per_visit'
            elif visit_id is None:
                frequency = 'per_subject'
            else:
                frequency = 'per_session'
            filtered_files = self._filter_files(files, session_path)
            for fname in filtered_files:
                basename = split_extension(fname)[0]
                all_filesets.append(
                    Fileset.from_path(op.join(session_path, fname),
                                      frequency=frequency,
                                      subject_id=subj_id,
                                      visit_id=visit_id,
                                      dataset=dataset,
                                      from_analysis=from_analysis,
                                      potential_aux_files=[
                                          f for f in filtered_files
                                          if (split_extension(f)[0] == basename
                                              and f != fname)
                                      ],
                                      **kwargs))
            for fname in self._filter_dirs(dirs, session_path):
                all_filesets.append(
                    Fileset.from_path(op.join(session_path, fname),
                                      frequency=frequency,
                                      subject_id=subj_id,
                                      visit_id=visit_id,
                                      dataset=dataset,
                                      from_analysis=from_analysis,
                                      **kwargs))
            if self.FIELDS_FNAME in files:
                with open(op.join(session_path, self.FIELDS_FNAME), 'r') as f:
                    dct = json.load(f)
                all_fields.extend(
                    Field(name=k,
                          value=v,
                          frequency=frequency,
                          subject_id=subj_id,
                          visit_id=visit_id,
                          dataset=dataset,
                          from_analysis=from_analysis,
                          **kwargs) for k, v in list(dct.items()))
            if self.PROV_DIR in dirs:
                if from_analysis is None:
                    raise ArcanaRepositoryError(
                        "Found provenance directory in session directory (i.e."
                        " not in analysis-specific sub-directory)")
                base_prov_dir = op.join(session_path, self.PROV_DIR)
                for fname in os.listdir(base_prov_dir):
                    all_records.append(
                        Record.load(
                            split_extension(fname)[0], frequency, subj_id,
                            visit_id, from_analysis,
                            op.join(base_prov_dir, fname)))
        return all_filesets, all_fields, all_records