def get_BIDSLayout_with_conf(dir_, **kwargs): """Get BIDSLayout with bids, derivatives, and pndni_bids configuration files loaded""" if "pndni_bids" not in bids_config.get_option("config_paths"): layout.add_config_paths(pndni_bids=resource_filename( 'pndniworkflows', 'config/pndni_bids.json')) l = layout.BIDSLayout(dir_, config=['bids', 'derivatives', 'pndni_bids'], **kwargs) return l
def synthetic(request): root = join(get_test_data_path(), 'synthetic') default_preproc = get_option('loop_preproc') if request.param == 'preproc': set_option('loop_preproc', True) layout = BIDSLayout((root, ['bids', 'derivatives'])) else: set_option('loop_preproc', default_preproc) layout = BIDSLayout(root, exclude='derivatives') yield request.param, load_variables(layout, skip_empty=True) set_option('loop_preproc', default_preproc)
def __init__(self, root, validate=True, index_associated=True, include=None, absolute_paths=True, derivatives=False, config=None, sources=None, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate self.metadata_index = MetadataIndex(self) self.derivatives = {} self.sources = listify(sources) # Validate arguments if not isinstance(root, six.string_types): raise ValueError("root argument must be a string specifying the" " directory containing the BIDS dataset.") if not os.path.exists(root): raise ValueError("BIDS root does not exist: %s" % root) self.root = root target = os.path.join(self.root, 'dataset_description.json') if not os.path.exists(target): if validate is True: raise ValueError( "'dataset_description.json' is missing from project root." " Every valid BIDS dataset must have this file.") else: self.description = None else: with open(target, 'r', encoding='utf-8') as desc_fd: self.description = json.load(desc_fd) if validate is True: for k in ['Name', 'BIDSVersion']: if k not in self.description: raise ValueError("Mandatory '%s' field missing from " "dataset_description.json." % k) # Determine which subdirectories to exclude from indexing excludes = {"code", "stimuli", "sourcedata", "models", "derivatives"} if include is not None: include = listify(include) if "derivatives" in include: raise ValueError("Do not pass 'derivatives' in the include " "list. To index derivatives, either set " "derivatives=True, or use add_derivatives().") excludes -= set([d.strip(os.path.sep) for d in include]) self._exclude_dirs = list(excludes) # Set up path and config for grabbit if config is None: config = 'bids' config_paths = get_option('config_paths') path = (root, [config_paths[c] for c in listify(config)]) # Initialize grabbit Layout super(BIDSLayout, self).__init__(path, root=self.root, dynamic_getters=True, absolute_paths=absolute_paths, **kwargs) # Add derivatives if any are found self.derivatives = {} if derivatives: if derivatives is True: derivatives = os.path.join(root, 'derivatives') self.add_derivatives(derivatives, validate=validate, index_associated=index_associated, include=include, absolute_paths=absolute_paths, derivatives=None, config=None, sources=self, **kwargs)
def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, drop_na=True, events=True, physio=True, stim=True, confounds=True, skip_empty=True, **selectors): ''' Loads all variables found in *_events.tsv files and returns them as a BIDSVariableCollection. Args: layout (BIDSLayout): A BIDSLayout to scan. dataset (NodeIndex): A BIDS NodeIndex container. If None, a new one is initialized. columns (list): Optional list of names specifying which columns in the event files to read. By default, reads all columns found. scan_length (float): Optional duration of runs (in seconds). By default, this will be extracted from the BOLD image. However, in cases where the user doesn't have access to the images (e.g., because only file handles are locally available), a fixed duration can be manually specified as a fallback. drop_na (bool): If True, removes all events where amplitude is n/a. If False, leaves n/a values intact. Note that in the latter case, transformations that requires numeric values may fail. events (bool): If True, extracts variables from events.tsv files. physio (bool): If True, extracts variables from _physio files. stim (bool): If True, extracts variables from _stim files. skip_empty (bool): Whether or not to skip empty Variables (i.e., where there are no rows/records in a file, or all onsets, durations, and amplitudes are 0). selectors (dict): Optional keyword arguments passed onto the BIDSLayout instance's get() method; can be used to constrain which data are loaded. Returns: A NodeIndex instance. ''' # Extract any non-keyword arguments kwargs = selectors.copy() domains = kwargs.get('domains', None) # Filter keyword args selectors = {k: v for k, v in selectors.items() if k in BASE_ENTITIES} if dataset is None: dataset = NodeIndex() if get_option('loop_preproc'): selectors['type'] = 'preproc' # Select any space, to only loop over each run once # Warning: If some spaces only apply to some runs, this may result in # unexpected behavior, althought his scenario is rare. spaces = layout.get_spaces(type='preproc') if len(spaces) > 1: selectors['space'] = spaces[0] else: selectors['modality'] = 'func' selectors['type'] = 'bold' images = layout.get(return_type='file', extensions='.nii.gz', **selectors) if not images: raise ValueError("No functional images that match criteria found.") # Main loop over images for img_f in images: entities = layout.files[img_f].entities # Run is not mandatory, but we need a default for proper indexing if 'run' in entities: entities['run'] = int(entities['run']) # Get duration of run: first try to get it directly from the image # header; if that fails, try to get NumberOfVolumes from the # run metadata; if that fails, look for a scan_length argument. try: img = nb.load(img_f) duration = img.shape[3] * img.header.get_zooms()[-1] except Exception as e: if scan_length is not None: duration = scan_length else: msg = ("Unable to extract scan duration from one or more " "BOLD runs, and no scan_length argument was provided " "as a fallback. Please check that the image files are " "available, or manually specify the scan duration.") raise ValueError(msg) tr = layout.get_metadata(img_f, type='bold', domains=domains, full_search=True)['RepetitionTime'] run = dataset.get_or_create_node('run', entities, image_file=img_f, duration=duration, repetition_time=tr) run_info = run.get_info() # Process event files if events: dfs = layout._get_nearest_helper(img_f, '.tsv', type='events', full_search=True, domains=domains) if dfs is not None: for _data in dfs: _data = pd.read_table(_data, sep='\t') if 'amplitude' in _data.columns: if (_data['amplitude'].astype(int) == 1).all() and \ 'trial_type' in _data.columns: msg = ("Column 'amplitude' with constant value 1 " "is unnecessary in event files; ignoring " "it.") _data = _data.drop('amplitude', axis=1) else: msg = ("Column name 'amplitude' is reserved; " "renaming " "it to 'amplitude_'.") _data = _data.rename( columns={'amplitude': 'amplitude_'}) warnings.warn(msg) _data = _data.replace('n/a', np.nan) # Replace BIDS' n/a _data = _data.apply(pd.to_numeric, errors='ignore') _cols = columns or list( set(_data.columns.tolist()) - {'onset', 'duration'}) # Construct a DataFrame for each extra column for col in _cols: df = _data[['onset', 'duration']].copy() df['amplitude'] = _data[col].values # Add in all of the run's entities as new columns for # index for entity, value in entities.items(): if entity in BASE_ENTITIES: df[entity] = value if drop_na: df = df.dropna(subset=['amplitude']) if df.empty: continue var = SparseRunVariable(col, df, run_info, 'events') run.add_variable(var) # Process confound files if confounds: sub_ents = { k: v for k, v in entities.items() if k in BASE_ENTITIES } confound_files = layout.get(type='confounds', **sub_ents) for cf in confound_files: _data = pd.read_csv(cf.filename, sep='\t', na_values='n/a') if columns is not None: conf_cols = list(set(_data.columns) & set(columns)) _data = _data.loc[:, conf_cols] for col in _data.columns: sr = 1. / run.repetition_time var = DenseRunVariable(col, _data[[col]], run_info, 'confounds', sr) run.add_variable(var) # Process recordinging files if physio or stim: rec_types = ['physio'] if physio else [] if stim: rec_types.append('stim') rec_files = layout.get_nearest(img_f, extensions='.tsv.gz', all_=True, type=rec_types, ignore_strict_entities=['type'], full_search=True, domains=domains) for rf in rec_files: metadata = layout.get_metadata(rf) if not metadata: raise ValueError("No .json sidecar found for '%s'." % rf) data = pd.read_csv(rf, sep='\t') freq = metadata['SamplingFrequency'] st = metadata['StartTime'] rf_cols = metadata['Columns'] data.columns = rf_cols # Filter columns if user passed names if columns is not None: rf_cols = list(set(rf_cols) & set(columns)) data = data.loc[:, rf_cols] n_cols = len(rf_cols) if not n_cols: continue # Keep only in-scan samples if st < 0: start_ind = np.floor(-st * freq) values = data.values[start_ind:, :] else: values = data.values if st > 0: n_pad = freq * st pad = np.zeros((n_pad, n_cols)) values = np.r_[pad, values] n_rows = int(run.duration * freq) if len(values) > n_rows: values = values[:n_rows, :] elif len(values) < n_rows: pad = np.zeros((n_rows - len(values), n_cols)) values = np.r_[values, pad] df = pd.DataFrame(values, columns=rf_cols) source = 'physio' if '_physio.tsv' in rf else 'stim' for col in df.columns: var = DenseRunVariable(col, df[[col]], run_info, source, freq) run.add_variable(var) return dataset