def test_get_collection_entities(run_coll_list): coll = run_coll_list[0] ents = coll.entities assert {'run', 'task', 'subject', 'suffix', 'datatype'} == set(ents.keys()) merged = merge_collections(run_coll_list[:3]) ents = merged.entities assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys()) assert ents['subject'] == '01' merged = merge_collections(run_coll_list[3:6]) ents = merged.entities assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys()) assert ents['subject'] == '02'
def setup(self, input_nodes=None, drop_na=False, **kwargs): """Set up the Step and construct the design matrix. Parameters ---------- input_nodes : list Optional list of Node objects produced by the preceding Step in the analysis. If None, uses any inputs passed in at Step initialization. drop_na : bool Boolean indicating whether or not to automatically drop events that have a n/a amplitude when reading in data from event files. kwargs : dict Optional keyword arguments to pass onto load_variables. """ self.output_nodes = [] input_nodes = input_nodes or self.input_nodes or [] # TODO: remove the scan_length argument entirely once we switch tests # to use the synthetic dataset with image headers. if self.level != 'run': kwargs = kwargs.copy() kwargs.pop('scan_length', None) collections = self.layout.get_collections(self.level, drop_na=drop_na, **kwargs) objects = collections + input_nodes objects, kwargs = self._filter_objects(objects, kwargs) groups = self._group_objects(objects) # Set up and validate variable lists model = self.model or {} X = model.get('x', []) for grp in groups: # Split into separate lists of Collections and Nodes input_nodes = [o for o in grp if isinstance(o, AnalysisNode)] colls = list(set(grp) - set(input_nodes)) if input_nodes: node_coll = self._concatenate_input_nodes(input_nodes) colls.append(node_coll) coll = merge_collections(colls) if len(colls) > 1 else colls[0] coll = apply_transformations(coll, self.transformations) if X: transform.Select(coll, X) node = AnalysisNode(self.level, coll, self.contrasts, input_nodes, self.dummy_contrasts) self.output_nodes.append(node)
def setup(self, input_nodes=None, auto_contrasts=None, **kwargs): ''' Set up the Block and construct the design matrix. Args: input_nodes (list): Optional list of Node objects produced by the preceding Block in the analysis. If None, uses any inputs passed in at Block initialization. auto_contrasts (bool): If True, a contrast is automatically created for each column in the design matrix. kwargs: Optional keyword arguments to pass onto load_variables. ''' self.output_nodes = [] input_nodes = input_nodes or self.input_nodes or [] if auto_contrasts is not None: self.auto_contrasts = auto_contrasts # TODO: remove the scan_length argument entirely once we switch tests # to use the synthetic dataset with image headers. if self.level != 'run': kwargs = kwargs.copy() kwargs.pop('scan_length', None) collections = self.layout.get_collections(self.level, **kwargs) objects = collections + input_nodes objects, kwargs = self._filter_objects(objects, kwargs) groups = self._group_objects(objects) for grp in groups: # Split into separate lists of Collections and Nodes input_nodes = [o for o in grp if isinstance(o, AnalysisNode)] colls = list(set(grp) - set(input_nodes)) if input_nodes: node_coll = self._concatenate_input_nodes(input_nodes) colls.append(node_coll) model = self.model or {} variables = set(model.get('variables', [])) hrf_variables = set(model.get('HRF_variables', [])) if not variables >= hrf_variables: raise ValueError("HRF_variables must be a subset ", "of variables in BIDS model.") coll = merge_collections(colls) if len(colls) > 1 else colls[0] coll = apply_transformations(coll, self.transformations) if model.get('variables'): transform.select(coll, model['variables']) node = AnalysisNode(self.level, coll, self.contrasts, input_nodes, self.auto_contrasts) self.output_nodes.append(node)
def setup(self, inputs=None, drop_na=False, **kwargs): """Set up the Step and construct the design matrix. Parameters ---------- inputs : list Optional list of BIDSVariableCollections produced as output by the preceding Step in the analysis. If None, uses inputs passed at initialization (if any). drop_na : bool Boolean indicating whether or not to automatically drop events that have a n/a amplitude when reading in data from event files. kwargs : dict Optional keyword arguments to pass onto load_variables. """ self._collections = [] # Convert input contrasts to a list of BIDSVariableCollections inputs = inputs or self.inputs or [] input_grps = self._merge_contrast_inputs(inputs) if inputs else {} # TODO: remove the scan_length argument entirely once we switch tests # to use the synthetic dataset with image headers. if self.level != 'run': kwargs = kwargs.copy() kwargs.pop('scan_length', None) # Now handle variables read from the BIDS dataset: read them in, filter # on passed selectors, and group by unit of current level collections = self.layout.get_collections(self.level, drop_na=drop_na, **kwargs) collections, _ = self._filter_collections(collections, kwargs) groups = self._group_objects_by_entities(collections) # Merge in the inputs for key, input_ in input_grps.items(): if key not in groups: groups[key] = [] groups[key].append(input_) # Set up and validate variable lists model = self.model or {} X = model.get('x', []) for grp, colls in groups.items(): coll = merge_collections(colls) colls = tm.TransformerManager().transform(coll, self.transformations) if X: tm.Select(coll, X) self._collections.append(coll)
def _collections_to_dfs(self, collections): """Merges collections and converts them to a pandas DataFrame.""" if not collections: return [] # group all collections by level coll_levels = defaultdict(list) [coll_levels[coll.level].append(coll) for coll in collections] var_names = list(set(self.node.model['x']) - {1}) grp_dfs = [] # merge all collections at each level and export to a DataFrame for level, colls in coll_levels.items(): # Note: we currently merge _before_ selecting variables. Selecting # variables first could be done by passing `variables=all_vars` as # an argument on the next line), but we can't do this right now # because we can't guarantee that all the variables named in `X` # in the model section already exist; some might be created by the # transformations. coll = merge_collections(colls) # apply transformations transformations = self.node.transformations if transformations: transformer = tm.TransformerManager( transformations['transformer']) coll = transformer.transform(coll.clone(), transformations['instructions']) # Take the intersection of variables and Model.X (var_names), ignoring missing # variables (usually contrasts) coll.variables = { v: coll.variables[v] for v in expand_wildcards(var_names, coll.variables) if v in coll.variables } if not coll.variables: continue # run collections need to be handled separately because to_df() # takes extra arguments related to handling of time if level == 'run': if self.force_dense: coll = coll.to_dense(sampling_rate=self.sampling_rate) coll = coll.to_df(sampling_rate=self.sampling_rate) else: coll = coll.to_df() grp_dfs.append(coll) return grp_dfs
def setup(self, input_nodes=None, drop_na=False, **kwargs): ''' Set up the Step and construct the design matrix. Args: input_nodes (list): Optional list of Node objects produced by the preceding Step in the analysis. If None, uses any inputs passed in at Step initialization. drop_na (bool): Boolean indicating whether or not to automatically drop events that have a n/a amplitude when reading in data from event files. kwargs: Optional keyword arguments to pass onto load_variables. ''' self.output_nodes = [] input_nodes = input_nodes or self.input_nodes or [] # TODO: remove the scan_length argument entirely once we switch tests # to use the synthetic dataset with image headers. if self.level != 'run': kwargs = kwargs.copy() kwargs.pop('scan_length', None) collections = self.layout.get_collections(self.level, drop_na=drop_na, **kwargs) objects = collections + input_nodes objects, kwargs = self._filter_objects(objects, kwargs) groups = self._group_objects(objects) # Set up and validate variable lists model = self.model or {} X = model.get('x', []) for grp in groups: # Split into separate lists of Collections and Nodes input_nodes = [o for o in grp if isinstance(o, AnalysisNode)] colls = list(set(grp) - set(input_nodes)) if input_nodes: node_coll = self._concatenate_input_nodes(input_nodes) colls.append(node_coll) coll = merge_collections(colls) if len(colls) > 1 else colls[0] coll = apply_transformations(coll, self.transformations) if X: transform.Select(coll, X) node = AnalysisNode(self.level, coll, self.contrasts, input_nodes, self.auto_contrasts) self.output_nodes.append(node)
def setup(self, inputs=None, **kwargs): """Set up the Step. Processes inputs from previous step, combines it with currently loaded data, and applies transformations to produce a design matrix-ready set of variable collections. Parameters ---------- inputs : list Optional list of BIDSVariableCollections produced as output by the preceding Step in the analysis. If None, uses inputs passed at initialization (if any). kwargs : dict Optional keyword arguments constraining the collections to include. """ inputs = inputs or self.inputs or [] input_grps = self._merge_contrast_inputs(inputs) if inputs else {} # filter on passed selectors and group by unit of current level collections, _ = self._filter_collections(self._raw_collections, kwargs) groups = self._group_objects_by_entities(collections) # Merge in the inputs for key, input_ in input_grps.items(): if key not in groups: groups[key] = [] groups[key].append(input_) # Set up and validate variable lists model = self.model or {} X = model.get('x', []) for grp, colls in groups.items(): coll = merge_collections(colls) colls = tm.TransformerManager().transform(coll, self.transformations) if X: tm.Select(coll, X) self._collections.append(coll)
def setup(self, input_nodes=None, **kwargs): ''' Set up the Block and construct the design matrix. Args: input_nodes (list): Optional list of Node objects produced by the preceding Block in the analysis. If None, uses any inputs passed in at Block initialization. kwargs: Optional keyword arguments to pass onto load_variables. ''' self.output_nodes = [] input_nodes = input_nodes or self.input_nodes or [] # TODO: remove the scan_length argument entirely once we switch tests # to use the synthetic dataset with image headers. if self.level != 'run': kwargs = kwargs.copy() kwargs.pop('scan_length', None) collections = self.layout.get_collections(self.level, **kwargs) objects = collections + input_nodes objects, kwargs = self._filter_objects(objects, kwargs) groups = self._group_objects(objects) # Set up and validate variable lists model = self.model or {} X = model.get('X', []) for grp in groups: # Split into separate lists of Collections and Nodes input_nodes = [o for o in grp if isinstance(o, AnalysisNode)] colls = list(set(grp) - set(input_nodes)) if input_nodes: node_coll = self._concatenate_input_nodes(input_nodes) colls.append(node_coll) coll = merge_collections(colls) if len(colls) > 1 else colls[0] coll = apply_transformations(coll, self.transformations) if X: transform.Select(coll, X) node = AnalysisNode(self.level, coll, self.contrasts, input_nodes, self.auto_contrasts) self.output_nodes.append(node)
def test_merge_collections(run_coll, run_coll_list): df1 = run_coll.to_df().sort_values(['subject', 'run', 'onset']) rcl = [c.clone() for c in run_coll_list] coll = merge_collections(rcl) df2 = coll.to_df().sort_values(['subject', 'run', 'onset']) assert df1.equals(df2)