def _gen_outfilename(self): if isdefined(self.inputs.out_file): out_name = self.inputs.out_file else: base, ext = split_extension(os.path.basename(self.inputs.in_file)) out_name = os.path.join(os.getcwd(), "{}_threshold{}".format(base, ext)) return out_name
def _gen_outfilename(self): if isdefined(self.inputs.out_file): out_name = self.inputs.out_file else: base, _ = split_extension(os.path.basename(self.inputs.roi_file)) out_name = os.path.join( os.getcwd(), "{}_fitted_params.mat".format(base)) return out_name
def _gen_grad_filename(self, comp): filename = getattr(self.inputs, comp + 's_file') if not isdefined(filename): base, _ = split_extension(os.path.basename(self.inputs.in_file)) filename = os.path.join( os.getcwd(), "{base}_{comp}s.{comp}".format(base=base, comp=comp)) return filename
def _gen_outfilename(self): if isdefined(self.inputs.out_file): filename = self.inputs.out_file else: base, ext = split_extension( os.path.basename(self.inputs.in_files[0])) filename = os.path.join(os.getcwd(), "{}_avg{}".format(base, ext)) return filename
def _gen_outfilename(self): if isdefined(self.inputs.out_file): fpath = self.inputs.out_file else: fname = ( split_extension(os.path.basename(self.inputs.in_file))[0] + '_dicom') fpath = os.path.join(os.getcwd(), fname) return fpath
def detect_format(path, aux_files): ext = split_extension(path)[1] aux_names = set(aux_files.keys()) for frmt in BIDS_FORMATS: if frmt.extension == ext and set(frmt.aux_files.keys()) == aux_names: return frmt raise BananaUnrecognisedBidsFormat( "No matching BIDS format matches provided path ({}) and aux files ({})" .format(path, aux_files))
def _gen_outfilename(self): if isdefined(self.inputs.out_file): filename = self.inputs.out_file else: if isdefined(self.inputs.out_ext): ext = self.inputs.out_ext else: _, ext = split_extension( os.path.basename(self.inputs.operands[0])) filename = os.getcwd() for op in self.inputs.operands: try: op_str = split_extension(os.path.basename(op))[0] except: op_str = str(float(op)) filename += '_' + op_str filename += '_' + self.inputs.operation + ext return filename
def _gen_outfilename(self): if isdefined(self.inputs.out_file): filename = self.inputs.out_file else: if isdefined(self.inputs.out_ext): ext = self.inputs.out_ext base, _ = split_extension(os.path.basename( self.inputs.in_file)) else: base, ext = split_extension( os.path.basename(self.inputs.in_file)) if isdefined(self.inputs.bzero): suffix = 'b0' else: suffix = 'dw' filename = os.path.join(os.getcwd(), "{}_{}{}".format(base, suffix, ext)) return filename
def from_path(cls, path, **kwargs): if not op.exists(path): raise ArcanaUsageError( "Attempting to read Fileset from path '{}' but it " "does not exist".format(path)) if op.isdir(path): name = op.basename(path) else: name = split_extension(op.basename(path))[0] return cls(name, path=path, **kwargs)
def _gen_outfilename(self): if isdefined(self.inputs.out_file): if not self.inputs.out_file.endswith('.mat'): raise ArcanaError( "Output NODDI ROI should be saved with '.mat' extension " "(provided '{}')".format(self.inputs.out_file)) out_name = self.inputs.out_file else: base, _ = split_extension(os.path.basename(self.inputs.in_file)) out_name = os.path.join(os.getcwd(), "{}_ROI.mat".format(base)) return out_name
def assort_files(self, candidates): """ Assorts candidate files into primary and auxiliary (and ignored) files corresponding to the format by their file extensions. Can be overridden in specialised subclasses to assort files based on other characteristics Parameters ---------- candidates : list[str] The list of filenames to assort Returns ------- primary_file : str Path to the selected primary file aux_files : dict[str, str] A dictionary mapping the auxiliary file name to the selected path """ by_ext = defaultdict(list) for path in candidates: by_ext[split_extension(path)[1].lower()].append(path) try: primary_file = by_ext[self.ext] except KeyError: raise ArcanaFileFormatError( "No files match primary file extension of {} out of " "potential candidates of {}" .format(self, "', '".join(candidates))) if not primary_file: raise ArcanaFileFormatError( "No potential files for primary file of {}".format(self)) elif len(primary_file) > 1: raise ArcanaFileFormatError( "Multiple potential files for '{}' primary file of {}" .format("', '".join(primary_file), self)) else: primary_file = primary_file[0] aux_files = {} for aux_name, aux_ext in self.aux_files.items(): aux = by_ext[aux_ext] if not aux: raise ArcanaFileFormatError( "No files match auxiliary file extension '{}' of {} out of" " potential candidates of {}" .format(aux_ext, self, "', '".join(candidates))) elif len(aux) > 1: raise ArcanaFileFormatError( ("Multiple potential files for '{}' auxiliary file ext. " + "({}) of {}".format("', '".join(aux), self))) else: aux_files[aux_name] = aux[0] return primary_file, aux_files
def _gen_outfilename(self): if isdefined(self.inputs.out_file): out_name = self.inputs.out_file else: base, ext = split_extension(os.path.basename(self.inputs.in_file)) if isdefined(self.inputs.out_file_ext): extension = self.inputs.out_file_ext else: extension = ext out_name = "{}_preproc{}".format(base, extension) return out_name
def __init__(self, filesets, fields, records): if filesets is None: filesets = [] if fields is None: fields = [] if records is None: records = [] # Save filesets and fields in ordered dictionary by name and # name of analysis that generated them (if applicable) self._filesets = OrderedDict() for fileset in sorted(filesets): id_key = (fileset.id, fileset.from_analysis) try: dct = self._filesets[id_key] except KeyError: dct = self._filesets[id_key] = OrderedDict() if fileset.format_name is not None: format_key = fileset.format_name else: format_key = split_extension(fileset.path)[1] if format_key in dct: raise ArcanaRepositoryError( "Attempting to add duplicate filesets to tree ({} and {})". format(fileset, dct[format_key])) dct[format_key] = fileset self._fields = OrderedDict( ((f.name, f.from_analysis), f) for f in sorted(fields)) self._records = OrderedDict( ((r.pipeline_name, r.from_analysis), r) for r in sorted( records, key=lambda r: (r.subject_id, r.visit_id, r.from_analysis))) self._missing_records = [] self._duplicate_records = [] self._tree = None # Match up provenance records with items in the node for item in chain(self.filesets, self.fields): if not item.derived: continue # Skip acquired items records = [ r for r in self.records if (item.from_analysis == r.from_analysis and item.name in r.outputs) ] if not records: self._missing_records.append(item.name) elif len(records) > 1: item.record = sorted(records, key=attrgetter('datetime'))[-1] self._duplicate_records.append(item.name) else: item.record = records[0]
def _link_into_dir(self, fpaths, dirpath): """ Symlinks the given file paths into the given directory, making the directory if necessary """ try: os.makedirs(dirpath) except OSError as e: if e.errno != errno.EEXIST: raise num_digits = int(math.ceil(math.log(len(fpaths), 10))) for i, fpath in enumerate(fpaths): _, ext = split_extension(fpath) os.symlink(fpath, os.path.join(dirpath, str(i).zfill(num_digits) + ext))
def _download_fileset(cls, tmp_dir, xresource, xfileset, fileset, session_label, cache_path): # Download resource to zip file zip_path = op.join(tmp_dir, 'download.zip') with open(zip_path, 'wb') as f: xresource.xnat_session.download_stream(xresource.uri + '/files', f, format='zip', verbose=True) digests = cls._get_digests(xresource) # Extract downloaded zip file expanded_dir = op.join(tmp_dir, 'expanded') try: with ZipFile(zip_path) as zip_file: zip_file.extractall(expanded_dir) except BadZipfile as e: raise ArcanaError("Could not unzip file '{}' ({})".format( xresource.id, e)) data_path = op.join( expanded_dir, session_label, 'scans', (xfileset.id + '-' + special_char_re.sub('_', xfileset.type)), 'resources', xresource.label, 'files') if not fileset.format.directory: # If the dataformat is not a directory (e.g. DICOM), # attempt to locate a single file within the resource # directory with the appropriate filename and add that # to be the complete data path. fnames = os.listdir(data_path) match_fnames = [ f for f in fnames if (lower(split_extension(f)[-1]) == lower( fileset.format.extension)) ] if len(match_fnames) == 1: data_path = op.join(data_path, match_fnames[0]) else: raise ArcanaMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format( fileset.format.extension, "', '".join(fnames), data_path)) shutil.move(data_path, cache_path) with open(cache_path + XnatRepository.MD5_SUFFIX, 'w', **JSON_ENCODING) as f: json.dump(digests, f) shutil.rmtree(tmp_dir)
def __init__( self, path, type, subject_id, visit_id, repository, # @ReservedAssignment @IgnorePep8 modality=None, task=None, checksums=None): Fileset.__init__(self, name=op.basename(path), format=FileFormat.by_ext(split_extension(path)[1]), frequency='per_session', path=path, subject_id=subject_id, visit_id=visit_id, repository=repository, checksums=checksums) BaseBidsFileset.__init__(self, type, modality, task)
def _list_outputs(self): if (not isdefined(self.inputs.compression) or (self.inputs.compression == 'y' or self.inputs.compression == 'i')): im_ext = '.nii.gz' else: im_ext = '.nii' outputs = self._outputs().get() # As Dcm2niix sometimes prepends a prefix onto the filenames to avoid # name clashes with multiple echos, we need to check the output folder # for all filenames that end with the "generated filename". out_dir = self._gen_filename('out_dir') fname = self._gen_filename('filename') + im_ext base, ext = split_extension(fname) match_re = re.compile(r'(_e\d+)?{}(_(?:e|c)\d+)?{}'.format( base, ext if ext is not None else '')) products = [ os.path.join(out_dir, f) for f in os.listdir(out_dir) if match_re.match(f) is not None ] if len(products) == 1: converted = products[0] elif len(products) > 1 and self.inputs.multifile_concat: ex_file = nib.load(products[0]) data = ex_file.get_data() merged_file = np.zeros( (data.shape[0], data.shape[1], data.shape[2], len(products))) for i, el in enumerate(products): f = nib.load(el) merged_file[:, :, :, i] = f.get_data() im2save = nib.Nifti1Image(merged_file, ex_file.affine) nib.save(im2save, out_dir + fname) converted = out_dir + fname elif len(products) > 1 and not self.inputs.multifile_concat: converted = products[-1] else: raise ArcanaError("No products produced by dcm2niix ({})".format( ', '.join(os.listdir(out_dir)))) outputs['converted'] = converted return outputs
def download_resource(download_path, dataset, file_format, session_label): xresource = None for resource_name in file_format.xnat_resource_names: try: xresource = dataset.resources[resource_name] break except KeyError: logger.debug("Did not find resource corresponding to '{}' for {}, " "will try alternatives if available".format( resource_name, dataset)) continue if xresource is None: raise ArcanaError( "Didn't find '{}' resource(s) in {} dataset matching " "'{}' in {}".format("', '".join(file_format.xnat_resource_names), dataset.type)) tmp_dir = download_path + '.download' xresource.download_dir(tmp_dir) dataset_label = dataset.id + '-' + special_char_re.sub('_', dataset.type) src_path = os.path.join(tmp_dir, session_label, 'scans', dataset_label, 'resources', xresource.label, 'files') if not file_format.directory: fnames = os.listdir(src_path) match_fnames = [ f for f in fnames if lower(split_extension(f)[-1]) == lower(file_format.extension) ] if len(match_fnames) == 1: src_path = os.path.join(src_path, match_fnames[0]) else: raise ArcanaMissingDataException( "Did not find single file with extension '{}' " "(found '{}') in resource '{}'".format(file_format.extension, "', '".join(fnames), src_path)) shutil.move(src_path, download_path) shutil.rmtree(tmp_dir)
def matches(self, fileset): """ Checks to see whether the format matches the given fileset Parameters ---------- fileset : Fileset The fileset to check """ if fileset._resource_name is not None: return (fileset._resource_name in self.resource_names( fileset.repository.type)) elif self.directory: if op.isdir(fileset.path): if self.within_dir_exts is None: return True else: # Get set of all extensions in the directory return self.within_dir_exts == frozenset( split_extension(f)[1] for f in os.listdir(fileset.path) if not f.startswith('.')) else: return False else: if op.isfile(fileset.path): all_paths = [fileset.path] if fileset._potential_aux_files is not None: all_paths += fileset._potential_aux_files try: primary_path = self.assort_files(all_paths)[0] except ArcanaFileFormatError: return False else: return primary_path == fileset.path else: return False
def find_data(self, dataset, subject_ids=None, visit_ids=None, **kwargs): """ Find all data within a repository, registering filesets, fields and provenance with the found_fileset, found_field and found_provenance methods, respectively Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned root_dir : str The root dir to use instead of the 'name' (path) of the dataset. Only for use in sub-classes (e.g. BIDS) all_from_analysis : str Global 'from_analysis' to be applied to every found item. Only for use in sub-classes (e.g. BIDS) Returns ------- filesets : list[Fileset] All the filesets found in the repository fields : list[Field] All the fields found in the repository records : list[Record] The provenance records found in the repository """ all_filesets = [] all_fields = [] all_records = [] # if root_dir is None: root_dir = dataset.name for session_path, dirs, files in os.walk(root_dir): relpath = op.relpath(session_path, root_dir) path_parts = relpath.split(op.sep) if relpath != '.' else [] ids = self._extract_ids_from_path(dataset.depth, path_parts, dirs, files) if ids is None: continue subj_id, visit_id, from_analysis = ids # if all_from_analysis is not None: # if from_analysis is not None: # raise ArcanaRepositoryError( # "Found from_analysis sub-directory '{}' when global " # "from analysis '{}' was passed".format( # from_analysis, all_from_analysis)) # else: # from_analysis = all_from_analysis # Check for summaries and filtered IDs if subj_id == self.SUMMARY_NAME: subj_id = None elif subject_ids is not None and subj_id not in subject_ids: continue if visit_id == self.SUMMARY_NAME: visit_id = None elif visit_ids is not None and visit_id not in visit_ids: continue # Map IDs into ID space of analysis subj_id = dataset.map_subject_id(subj_id) visit_id = dataset.map_visit_id(visit_id) # Determine frequency of session|summary if (subj_id, visit_id) == (None, None): frequency = 'per_dataset' elif subj_id is None: frequency = 'per_visit' elif visit_id is None: frequency = 'per_subject' else: frequency = 'per_session' filtered_files = self._filter_files(files, session_path) for fname in filtered_files: basename = split_extension(fname)[0] all_filesets.append( Fileset.from_path(op.join(session_path, fname), frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, potential_aux_files=[ f for f in filtered_files if (split_extension(f)[0] == basename and f != fname) ], **kwargs)) for fname in self._filter_dirs(dirs, session_path): all_filesets.append( Fileset.from_path(op.join(session_path, fname), frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, **kwargs)) if self.FIELDS_FNAME in files: with open(op.join(session_path, self.FIELDS_FNAME), 'r') as f: dct = json.load(f) all_fields.extend( Field(name=k, value=v, frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, **kwargs) for k, v in list(dct.items())) if self.PROV_DIR in dirs: if from_analysis is None: raise ArcanaRepositoryError( "Found provenance directory in session directory (i.e." " not in analysis-specific sub-directory)") base_prov_dir = op.join(session_path, self.PROV_DIR) for fname in os.listdir(base_prov_dir): all_records.append( Record.load( split_extension(fname)[0], frequency, subj_id, visit_id, from_analysis, op.join(base_prov_dir, fname))) return all_filesets, all_fields, all_records