def input_tree(self): sessions = [] for subj_id in self.SUBJECT_IDS: for visit_id in self.VISIT_IDS: sessions.append( Session(subj_id, visit_id, filesets=[ Fileset('one_input', text_format, subject_id=subj_id, visit_id=visit_id), Fileset('ten_input', text_format, subject_id=subj_id, visit_id=visit_id) ])) subjects = [ Subject(i, sessions=[s for s in sessions if s.subject_id == i]) for i in self.SUBJECT_IDS ] visits = [ Visit(i, sessions=[s for s in sessions if s.visit == i]) for i in self.VISIT_IDS ] return Tree(subjects=subjects, visits=visits)
def input_tree(self): filesets = [] for subj_id in self.SUBJECT_IDS: for visit_id in self.VISIT_IDS: filesets.append( Fileset('one_input', text_format, subject_id=subj_id, visit_id=visit_id)) filesets.append( Fileset('ten_input', text_format, subject_id=subj_id, visit_id=visit_id)) return Tree.construct(self.dataset.repository, filesets=filesets)
def guess_depth(cls, root_dir): """ Try to guess the depth of a directory repository (i.e. whether it has sub-folders for multiple subjects or visits, depending on where files and/or derived label files are found in the hierarchy of sub-directories under the root dir. Parameters ---------- root_dir : str Path to the root directory of the repository """ deepest = -1 for path, dirs, files in os.walk(root_dir): depth = cls.path_depth(root_dir, path) filtered_files = cls._filter_files(files, path) if filtered_files: logger.info( "Guessing depth of directory repository at '{}' is" " {} due to unfiltered files ('{}') in '{}'".format( root_dir, depth, "', '".join(filtered_files), path)) return depth if cls.PROV_DIR in dirs: depth_to_return = max(depth - 1, 0) logger.info("Guessing depth of directory repository at '{}' is" "{} due to \"Derived label file\" in '{}'".format( root_dir, depth_to_return, path)) return depth_to_return if depth >= cls.MAX_DEPTH: logger.info("Guessing depth of directory repository at '{}' is" " {} as '{}' is already at maximum depth".format( root_dir, cls.MAX_DEPTH, path)) return cls.MAX_DEPTH try: for fpath in chain(filtered_files, cls._filter_dirs(dirs, path)): Fileset.from_path(fpath) except ArcanaError: pass else: if depth > deepest: deepest = depth if deepest == -1: raise ArcanaRepositoryError( "Could not guess depth of '{}' repository as did not find " "a valid session directory within sub-directories.".format( root_dir)) return deepest
def input_tree(self): sessions = [] visit_ids = set() for subj_id, visits in list(self.PROJECT_STRUCTURE.items()): for visit_id, filesets in list(visits.items()): sessions.append( Session(subj_id, visit_id, filesets=[ Fileset(d, text_format, subject_id=subj_id, visit_id=visit_id, from_study=((self.STUDY_NAME if d != 'one' else None))) for d in filesets ])) visit_ids.add(visit_id) subjects = [ Subject(i, sessions=[s for s in sessions if s.subject_id == i]) for i in self.PROJECT_STRUCTURE ] visits = [ Visit(i, sessions=[s for s in sessions if s.visit == i]) for i in visit_ids ] return Tree(subjects=subjects, visits=visits)
def input_directory(self): path = op.join(self.tempdir, 'directory') if not op.exists(path): # Create directory os.makedirs(path) with open(op.join(path, 'dummy.txt'), 'w') as f: f.write('blah') return Fileset.from_path(path, format=directory_format)
def input_zip(self): path = op.join(self.tempdir, 'zip.zip') if not op.exists(path): # Create zip file zipper = ZipDir() zipper.inputs.dirname = self.input_directory.path zipper.inputs.zipped = path zipper.run() return Fileset.from_path(path, format=zip_format)
def input_tree(self): filesets = [] for subj_id, visits in list(self.STRUCTURE.items()): for visit_id, fileset_names in list(visits.items()): filesets.extend( Fileset( d, text_format, subject_id=subj_id, visit_id=visit_id) for d in fileset_names) return Tree.construct(self.dataset.repository, filesets=filesets)
def input_tree(self): filesets = [] for subj_id, visit_ids in list(self.PROJECT_STRUCTURE.items()): for visit_id, fileset_names in list(visit_ids.items()): # Create filesets for name in fileset_names: from_analysis = self.STUDY_NAME if name != 'one' else None filesets.append( Fileset(name, text_format, subject_id=subj_id, visit_id=visit_id, from_analysis=from_analysis)) return Tree.construct(self.dataset.repository, filesets=filesets)
def setUp(self): self.reset_dirs() if self.INPUTS_FROM_REF_DIR: filesets = {} # Unzip reference directory if required if not os.path.exists(self.ref_dir) and os.path.exists( self.ref_dir + '.tar.gz'): sp.check_call( 'tar xzf {}.tar.gz'.format(self.ref_dir), shell=True, cwd=os.path.dirname(self.ref_dir)) for fname in os.listdir(self.ref_dir): if fname.startswith('.'): continue fileset = Fileset.from_path(op.join(self.ref_dir, fname)) filesets[fileset.name] = fileset else: filesets = getattr(self, 'INPUT_DATASETS', None) self.add_session(filesets=filesets, fields=getattr(self, 'INPUT_FIELDS', None))
def _get_filesets(self, xsession, **kwargs): """ Returns a list of filesets within an XNAT session Parameters ---------- xsession : xnat.classes.MrSessionData The XNAT session to extract the filesets from freq : str The frequency of the returned filesets (either 'per_session', 'per_subject', 'per_visit', or 'per_study') derived : bool Whether the session is derived or not Returns ------- filesets : list(arcana.data.Fileset) List of filesets within an XNAT session """ filesets = [] for xfileset in xsession.scans.values(): try: file_format = self._guess_file_format(xfileset) except ArcanaFileFormatError as e: logger.warning( "Ignoring '{}' as couldn't guess its file format:\n{}". format(xfileset.type, e)) filesets.append( Fileset( xfileset.type, format=file_format, # @ReservedAssignment @IgnorePep8 id=xfileset.id, uri=xfileset.uri, repository=self, **kwargs)) return sorted(filesets)
def find_data(self, dataset, subject_ids=None, visit_ids=None, **kwargs): """ Find all data within a repository, registering filesets, fields and provenance with the found_fileset, found_field and found_provenance methods, respectively Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned root_dir : str The root dir to use instead of the 'name' (path) of the dataset. Only for use in sub-classes (e.g. BIDS) all_from_analysis : str Global 'from_analysis' to be applied to every found item. Only for use in sub-classes (e.g. BIDS) Returns ------- filesets : list[Fileset] All the filesets found in the repository fields : list[Field] All the fields found in the repository records : list[Record] The provenance records found in the repository """ all_filesets = [] all_fields = [] all_records = [] # if root_dir is None: root_dir = dataset.name for session_path, dirs, files in os.walk(root_dir): relpath = op.relpath(session_path, root_dir) path_parts = relpath.split(op.sep) if relpath != '.' else [] ids = self._extract_ids_from_path(dataset.depth, path_parts, dirs, files) if ids is None: continue subj_id, visit_id, from_analysis = ids # if all_from_analysis is not None: # if from_analysis is not None: # raise ArcanaRepositoryError( # "Found from_analysis sub-directory '{}' when global " # "from analysis '{}' was passed".format( # from_analysis, all_from_analysis)) # else: # from_analysis = all_from_analysis # Check for summaries and filtered IDs if subj_id == self.SUMMARY_NAME: subj_id = None elif subject_ids is not None and subj_id not in subject_ids: continue if visit_id == self.SUMMARY_NAME: visit_id = None elif visit_ids is not None and visit_id not in visit_ids: continue # Map IDs into ID space of analysis subj_id = dataset.map_subject_id(subj_id) visit_id = dataset.map_visit_id(visit_id) # Determine frequency of session|summary if (subj_id, visit_id) == (None, None): frequency = 'per_dataset' elif subj_id is None: frequency = 'per_visit' elif visit_id is None: frequency = 'per_subject' else: frequency = 'per_session' filtered_files = self._filter_files(files, session_path) for fname in filtered_files: basename = split_extension(fname)[0] all_filesets.append( Fileset.from_path(op.join(session_path, fname), frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, potential_aux_files=[ f for f in filtered_files if (split_extension(f)[0] == basename and f != fname) ], **kwargs)) for fname in self._filter_dirs(dirs, session_path): all_filesets.append( Fileset.from_path(op.join(session_path, fname), frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, **kwargs)) if self.FIELDS_FNAME in files: with open(op.join(session_path, self.FIELDS_FNAME), 'r') as f: dct = json.load(f) all_fields.extend( Field(name=k, value=v, frequency=frequency, subject_id=subj_id, visit_id=visit_id, dataset=dataset, from_analysis=from_analysis, **kwargs) for k, v in list(dct.items())) if self.PROV_DIR in dirs: if from_analysis is None: raise ArcanaRepositoryError( "Found provenance directory in session directory (i.e." " not in analysis-specific sub-directory)") base_prov_dir = op.join(session_path, self.PROV_DIR) for fname in os.listdir(base_prov_dir): all_records.append( Record.load( split_extension(fname)[0], frequency, subj_id, visit_id, from_analysis, op.join(base_prov_dir, fname))) return all_filesets, all_fields, all_records
def get_tree(self, dataset, sync_with_repo=False): filesets = [ # Subject1 Fileset('ones', text_format, frequency='per_subject', subject_id='subject1', resource_name='text', dataset=dataset), Fileset('tens', text_format, frequency='per_subject', subject_id='subject1', resource_name='text', dataset=dataset), # subject1/visit1 Fileset('hundreds', text_format, subject_id='subject1', visit_id='visit1', resource_name='text', dataset=dataset), Fileset('ones', text_format, subject_id='subject1', visit_id='visit1', resource_name='text', dataset=dataset), Fileset('tens', text_format, subject_id='subject1', visit_id='visit1', resource_name='text', dataset=dataset), Fileset('with_header', text_format, frequency='per_session', subject_id='subject1', visit_id='visit1', resource_name='text', dataset=dataset), # subject1/visit2 Fileset('ones', text_format, subject_id='subject1', visit_id='visit2', resource_name='text', dataset=dataset), Fileset('tens', text_format, subject_id='subject1', visit_id='visit2', resource_name='text', dataset=dataset), # Subject 2 Fileset('ones', text_format, frequency='per_subject', subject_id='subject2', resource_name='text', dataset=dataset), Fileset('tens', text_format, frequency='per_subject', subject_id='subject2', resource_name='text', dataset=dataset), # subject2/visit1 Fileset('ones', text_format, subject_id='subject2', visit_id='visit1', resource_name='text', dataset=dataset), Fileset('tens', text_format, subject_id='subject2', visit_id='visit1', resource_name='text', dataset=dataset), # subject2/visit2 Fileset('ones', text_format, subject_id='subject2', visit_id='visit2', resource_name='text', dataset=dataset), Fileset('tens', text_format, subject_id='subject2', visit_id='visit2', resource_name='text', dataset=dataset), # Visit 1 Fileset('ones', text_format, frequency='per_visit', visit_id='visit1', resource_name='text', dataset=dataset), # Analysis Fileset('ones', text_format, frequency='per_dataset', resource_name='text', dataset=dataset)] fields = [ # Subject 2 Field('e', value=3.33333, frequency='per_subject', subject_id='subject2', dataset=dataset), # subject2/visit2 Field('a', value=22, subject_id='subject2', visit_id='visit2', dataset=dataset), Field('b', value=220, subject_id='subject2', visit_id='visit2', dataset=dataset), Field('c', value='buggy', subject_id='subject2', visit_id='visit2', dataset=dataset), # Subject1 Field('e', value=4.44444, frequency='per_subject', subject_id='subject1', dataset=dataset), # subject1/visit1 Field('a', value=1, subject_id='subject1', visit_id='visit1', dataset=dataset), Field('b', value=10, subject_id='subject1', visit_id='visit1', dataset=dataset), Field('d', value=42.42, subject_id='subject1', visit_id='visit1', dataset=dataset), # subject1/visit2 Field('a', value=2, subject_id='subject1', visit_id='visit2', dataset=dataset), Field('c', value='van', subject_id='subject1', visit_id='visit2', dataset=dataset), # Visit 1 Field('f', value='dog', frequency='per_visit', visit_id='visit1', dataset=dataset), # Visit 2 Field('f', value='cat', frequency='per_visit', visit_id='visit2', dataset=dataset), # Analysis Field('g', value=100, frequency='per_dataset', dataset=dataset)] # Set URI and IDs if necessary for repository type if sync_with_repo: for fileset in filesets: fileset.get() for field in fields: field.get() tree = Tree.construct(self.dataset, filesets, fields) return tree
def tree(self, subject_ids=None, visit_ids=None): """ Return subject and session information for a project in the local repository Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned Returns ------- project : arcana.repository.Tree A hierarchical tree of subject, session and fileset information for the repository """ bids_filesets = defaultdict(lambda: defaultdict(dict)) derived_tree = super(BidsRepository, self).tree(subject_ids=None, visit_ids=None) for bids_obj in self.layout.get(return_type='object'): subj_id = bids_obj.entities['subject'] if subject_ids is not None and subj_id not in subject_ids: continue visit_id = bids_obj.entities['session'] if visit_ids is not None and visit_id not in visit_ids: continue bids_filesets[subj_id][visit_id] = Fileset.from_path( bids_obj.path, frequency='per_session', subject_id=subj_id, visit_id=visit_id, repository=self, bids_attrs=bids_obj) # Need to pull out all filesets and fields all_sessions = defaultdict(dict) all_visit_ids = set() for subj_id, visits in bids_filesets.items(): for visit_id, filesets in visits.items(): session = Session(subject_id=subj_id, visit_id=visit_id, filesets=filesets) try: session.derived = derived_tree.subject(subj_id).visit( visit_id) except ArcanaNameError: pass # No matching derived session all_sessions[subj_id][visit_id] = session all_visit_ids.add(visit_id) subjects = [] for subj_id, subj_sessions in list(all_sessions.items()): try: derived_subject = derived_tree.subject(subj_id) except ArcanaNameError: filesets = [] fields = [] else: filesets = derived_subject.filesets fields = derived_subject.fields subjects.append( Subject(subj_id, sorted(subj_sessions.values()), filesets, fields)) visits = [] for visit_id in all_visit_ids: try: derived_visit = derived_tree.visit(subj_id) except ArcanaNameError: filesets = [] fields = [] else: filesets = derived_visit.filesets fields = derived_visit.fields visit_sessions = list( chain(sess[visit_id] for sess in list(all_sessions.values()))) visits.append( Visit(visit_id, sorted(visit_sessions), filesets, fields)) return Tree(sorted(subjects), sorted(visits), derived_tree.filesets, derived_tree.fields)
def tree(self, subject_ids=None, visit_ids=None, **kwargs): """ Return subject and session information for a project in the local repository Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned Returns ------- project : arcana.repository.Tree A hierarchical tree of subject, session and fileset information for the repository """ all_data = defaultdict(dict) all_visit_ids = set() for session_path, dirs, files in os.walk(self.root_dir): relpath = op.relpath(session_path, self.root_dir) if relpath == '.': path_parts = [] else: path_parts = relpath.split(op.sep) depth = len(path_parts) if depth == self._depth: # Load input data from_study = None elif (depth == (self._depth + 1) and self.DERIVED_LABEL_FNAME in files): # Load study output from_study = path_parts.pop() elif (depth < self._depth and any(not f.startswith('.') for f in files)): # Check to see if there are files in upper level # directories, which shouldn't be there (ignoring # "hidden" files that start with '.') raise ArcanaBadlyFormattedDirectoryRepositoryError( "Files ('{}') not permitted at {} level in local " "repository".format("', '".join(files), ('subject' if depth else 'project'))) else: # Not a directory that contains data files or directories continue if len(path_parts) == 2: subj_id, visit_id = path_parts elif len(path_parts) == 1: subj_id = path_parts[0] visit_id = self.DEFAULT_SUBJECT_ID else: subj_id = self.DEFAULT_SUBJECT_ID visit_id = self.DEFAULT_VISIT_ID subj_id = subj_id if subj_id != self.SUMMARY_NAME else None visit_id = visit_id if visit_id != self.SUMMARY_NAME else None if (subject_ids is not None and subj_id is not None and subj_id not in subject_ids): continue if (visit_ids is not None and visit_id is not None and visit_id not in visit_ids): continue if (subj_id, visit_id) == (None, None): frequency = 'per_study' elif subj_id is None: frequency = 'per_visit' all_visit_ids.add(visit_id) elif visit_id is None: frequency = 'per_subject' else: frequency = 'per_session' all_visit_ids.add(visit_id) try: # Retrieve filesets and fields from other study directories # or root acquired directory filesets, fields = all_data[subj_id][visit_id] except KeyError: filesets = [] fields = [] for fname in chain(self._filter_files(files, session_path), self._filter_dirs(dirs, session_path)): filesets.append( Fileset.from_path(op.join(session_path, fname), frequency=frequency, subject_id=subj_id, visit_id=visit_id, repository=self, from_study=from_study)) if self.FIELDS_FNAME in files: with open(op.join(session_path, self.FIELDS_FNAME), 'r') as f: dct = json.load(f) fields = [ Field(name=k, value=v, frequency=frequency, subject_id=subj_id, visit_id=visit_id, repository=self, from_study=from_study) for k, v in list(dct.items()) ] filesets = sorted(filesets) fields = sorted(fields) all_data[subj_id][visit_id] = (filesets, fields) all_sessions = defaultdict(dict) for subj_id, subj_data in all_data.items(): if subj_id is None: continue # Create Subject summaries later for visit_id, (filesets, fields) in subj_data.items(): if visit_id is None: continue # Create Visit summaries later all_sessions[subj_id][visit_id] = Session(subject_id=subj_id, visit_id=visit_id, filesets=filesets, fields=fields) subjects = [] for subj_id, subj_sessions in list(all_sessions.items()): try: filesets, fields = all_data[subj_id][None] except KeyError: filesets = [] fields = [] subjects.append( Subject(subj_id, sorted(subj_sessions.values()), filesets, fields)) visits = [] for visit_id in all_visit_ids: visit_sessions = list( chain(sess[visit_id] for sess in list(all_sessions.values()))) try: filesets, fields = all_data[None][visit_id] except KeyError: filesets = [] fields = [] visits.append( Visit(visit_id, sorted(visit_sessions), filesets, fields)) try: filesets, fields = all_data[None][None] except KeyError: filesets = [] fields = [] return Tree(sorted(subjects), sorted(visits), filesets, fields, **kwargs)
def get_tree(self, repository, set_ids=False): sessions = [ Session('subject1', 'visit1', filesets=[ Fileset('hundreds', text_format, subject_id='subject1', visit_id='visit1', repository=repository), Fileset('ones', text_format, subject_id='subject1', visit_id='visit1', repository=repository), Fileset('tens', text_format, subject_id='subject1', visit_id='visit1', repository=repository) ], fields=[ Field('a', value=1, subject_id='subject1', visit_id='visit1', repository=repository), Field('b', value=10, subject_id='subject1', visit_id='visit1', repository=repository), Field('d', value=42.42, subject_id='subject1', visit_id='visit1', repository=repository) ]), Session('subject1', 'visit2', filesets=[ Fileset('ones', text_format, subject_id='subject1', visit_id='visit2', repository=repository), Fileset('tens', text_format, subject_id='subject1', visit_id='visit2', repository=repository) ], fields=[ Field('a', value=2, subject_id='subject1', visit_id='visit2', repository=repository), Field('c', value='van', subject_id='subject1', visit_id='visit2', repository=repository) ]), Session('subject2', 'visit1', filesets=[ Fileset('ones', text_format, subject_id='subject2', visit_id='visit1', repository=repository), Fileset('tens', text_format, subject_id='subject2', visit_id='visit1', repository=repository) ], fields=[]), Session('subject2', 'visit2', filesets=[ Fileset('ones', text_format, subject_id='subject2', visit_id='visit2', repository=repository), Fileset('tens', text_format, subject_id='subject2', visit_id='visit2', repository=repository) ], fields=[ Field('a', value=22, subject_id='subject2', visit_id='visit2', repository=repository), Field('b', value=220, subject_id='subject2', visit_id='visit2', repository=repository), Field('c', value='buggy', subject_id='subject2', visit_id='visit2', repository=repository) ]) ] project = Tree( subjects=[ Subject('subject1', sessions=[ s for s in sessions if s.subject_id == 'subject1' ], filesets=[ Fileset('ones', text_format, frequency='per_subject', subject_id='subject1', repository=repository), Fileset('tens', text_format, frequency='per_subject', subject_id='subject1', repository=repository) ], fields=[ Field('e', value=4.44444, frequency='per_subject', subject_id='subject1', repository=repository) ]), Subject('subject2', sessions=[ s for s in sessions if s.subject_id == 'subject2' ], filesets=[ Fileset('ones', text_format, frequency='per_subject', subject_id='subject2', repository=repository), Fileset('tens', text_format, frequency='per_subject', subject_id='subject2', repository=repository) ], fields=[ Field('e', value=3.33333, frequency='per_subject', subject_id='subject2', repository=repository) ]) ], visits=[ Visit('visit1', sessions=[s for s in sessions if s.visit_id == 'visit1'], filesets=[ Fileset('ones', text_format, frequency='per_visit', visit_id='visit1', repository=repository) ], fields=[ Field('f', value='dog', frequency='per_visit', visit_id='visit1', repository=repository) ]), Visit('visit2', sessions=[s for s in sessions if s.visit_id == 'visit2'], filesets=[], fields=[ Field('f', value='cat', frequency='per_visit', visit_id='visit2', repository=repository) ]) ], filesets=[ Fileset('ones', text_format, frequency='per_study', repository=repository) ], fields=[ Field('g', value=100, frequency='per_study', repository=repository) ]) if set_ids: # For xnat repository for fileset in project.filesets: fileset._id = fileset.name for visit in project.visits: for fileset in visit.filesets: fileset._id = fileset.name for subject in project.subjects: for fileset in subject.filesets: fileset._id = fileset.name for session in subject.sessions: for fileset in session.filesets: fileset._id = fileset.name return project
def find_data(self, dataset, subject_ids=None, visit_ids=None, **kwargs): """ Find all filesets, fields and provenance records within an XNAT project Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned Returns ------- filesets : list[Fileset] All the filesets found in the repository fields : list[Field] All the fields found in the repository records : list[Record] The provenance records found in the repository """ subject_ids = self.convert_subject_ids(subject_ids) # Add derived visit IDs to list of visit ids to filter all_filesets = [] all_fields = [] all_records = [] project_id = dataset.name # Note we prefer the use of raw REST API calls here for performance # reasons over using XnatPy's data structures. with self: # Get map of internal subject IDs to subject labels in project subject_xids_to_labels = { s['ID']: s['label'] for s in self._login.get_json( '/data/projects/{}/subjects'.format(project_id))[ 'ResultSet']['Result']} # Get list of all sessions within project session_xids = [ s['ID'] for s in self._login.get_json( '/data/projects/{}/experiments'.format(project_id))[ 'ResultSet']['Result'] if (self.session_filter is None or self.session_filter.match(s['label']))] for session_xid in tqdm(session_xids, "Scanning sessions in '{}' project" .format(project_id)): session_json = self._login.get_json( '/data/projects/{}/experiments/{}'.format( project_id, session_xid))['items'][0] subject_xid = session_json['data_fields']['subject_ID'] subject_id = subject_xids_to_labels[subject_xid] session_label = session_json['data_fields']['label'] session_uri = ( '/data/archive/projects/{}/subjects/{}/experiments/{}' .format(project_id, subject_xid, session_xid)) # Get field values. We do this first so we can check for the # DERIVED_FROM_FIELD to determine the correct session label and # analysis name field_values = {} try: fields_json = next( c['items'] for c in session_json['children'] if c['field'] == 'fields/field') except StopIteration: pass else: for js in fields_json: try: value = js['data_fields']['field'] except KeyError: pass else: field_values[js['data_fields']['name']] = value # Extract analysis name and derived-from session if self.DERIVED_FROM_FIELD in field_values: df_sess_label = field_values.pop(self.DERIVED_FROM_FIELD) from_analysis = session_label[len(df_sess_label) + 1:] session_label = df_sess_label else: from_analysis = None # Strip subject ID from session label if required if session_label.startswith(subject_id + '_'): visit_id = session_label[len(subject_id) + 1:] else: visit_id = session_label # Strip project ID from subject ID if required if subject_id.startswith(project_id + '_'): subject_id = subject_id[len(project_id) + 1:] # Check subject is summary or not and whether it is to be # filtered if subject_id == XnatRepo.SUMMARY_NAME: subject_id = None elif not (subject_ids is None or subject_id in subject_ids): continue # Check visit is summary or not and whether it is to be # filtered if visit_id == XnatRepo.SUMMARY_NAME: visit_id = None elif not (visit_ids is None or visit_id in visit_ids): continue # Determine frequency if (subject_id, visit_id) == (None, None): frequency = 'per_dataset' elif visit_id is None: frequency = 'per_subject' elif subject_id is None: frequency = 'per_visit' else: frequency = 'per_session' # Append fields for name, value in field_values.items(): value = value.replace('"', '"') all_fields.append(Field( name=name, value=value, dataset=dataset, frequency=frequency, subject_id=subject_id, visit_id=visit_id, from_analysis=from_analysis, **kwargs)) # Extract part of JSON relating to files try: scans_json = next( c['items'] for c in session_json['children'] if c['field'] == 'scans/scan') except StopIteration: scans_json = [] for scan_json in scans_json: scan_id = scan_json['data_fields']['ID'] scan_type = scan_json['data_fields'].get('type', '') scan_quality = scan_json['data_fields'].get('quality', None) scan_uri = '{}/scans/{}'.format(session_uri, scan_id) try: resources_json = next( c['items'] for c in scan_json['children'] if c['field'] == 'file') except StopIteration: resources = {} else: resources = {js['data_fields']['label']: js['data_fields'].get('format', None) for js in resources_json} # Remove auto-generated snapshots directory resources.pop('SNAPSHOTS', None) if scan_type == self.PROV_SCAN: # Download provenance JSON files and parse into # records temp_dir = tempfile.mkdtemp() try: with tempfile.TemporaryFile() as temp_zip: self._login.download_stream( scan_uri + '/files', temp_zip, format='zip') with ZipFile(temp_zip) as zip_file: zip_file.extractall(temp_dir) for base_dir, _, fnames in os.walk(temp_dir): for fname in fnames: if fname.endswith('.json'): pipeline_name = fname[:-len('.json')] json_path = op.join(base_dir, fname) all_records.append( Record.load( pipeline_name, frequency, subject_id, visit_id, from_analysis, json_path)) finally: shutil.rmtree(temp_dir, ignore_errors=True) else: for resource in resources: all_filesets.append(Fileset( scan_type, id=scan_id, uri=scan_uri, dataset=dataset, frequency=frequency, subject_id=subject_id, visit_id=visit_id, from_analysis=from_analysis, quality=scan_quality, resource_name=resource, **kwargs)) logger.debug("Found node {}:{} on {}:{}".format( subject_id, visit_id, self.server, project_id)) return all_filesets, all_fields, all_records