def signal_confound_design(self, run): """Build a matrix of signal confound variables.""" analysis_dir = PROJECT["analysis_dir"] if self.exp == "dots": tr = 2 elif self.exp == "sticks": tr = .72 fstem = op.join(analysis_dir, self.exp, self.subj, "preproc", "run_{}".format(run)) motion = (pd.read_csv(op.join(fstem, "realignment_params.csv")).filter( regex="rot|trans").apply(stats.zscore)) nuisance = (pd.read_csv(op.join( fstem, "nuisance_variables.csv")).filter(regex="wm_").apply(stats.zscore)) artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv")).any(axis=1)) confounds = pd.concat([motion, nuisance], axis=1) dmat = glm.DesignMatrix(confounds=confounds, artifacts=artifacts, hpf_kernel=self.hpf_kernel, tr=tr) return dmat.design_matrix
def estimate_voxel_params(subj, data, model, runs, conditions): """Fit a univariate model in each voxel of a ROI data array.""" # Load the task design design_file = design_temp.format(subj, model) design = pd.read_csv(design_file) # Precompute the highpass filter kernel and HRF ntp = data.shape[1] hpf_kernel = glm.fsl_highpass_matrix(ntp, 128) hrf = glm.GammaDifferenceHRF() # Build a design matrix for each run separately and then combine Xs = [] for run in runs: run_design = design.query("run == @run") X = glm.DesignMatrix(run_design, hrf, ntp, hpf_kernel=hpf_kernel, condition_names=conditions) Xs.append(X.design_matrix) X = pd.concat(Xs).reset_index(drop=True) # Rotate the data around to stack runs together np.testing.assert_equal(len(data), len(runs)) data = data.reshape(-1, data.shape[-1]) # Fit the model model = sm.OLS(data, X).fit() # Return the params return model.params
def confound_design(exp, subj, run, hpf_kernel): """Build a matrix of confound variables.""" analysis_dir = PROJECT["analysis_dir"] if exp == "dots": tr = 1 elif exp == "sticks": tr = .72 # Load in the confound information fstem = op.join(analysis_dir, exp, subj, "preproc", "run_{}".format(run)) motion = (pd.read_csv(op.join(fstem, "realignment_params.csv")).filter( regex="rot|trans").apply(stats.zscore)) nuisance = (pd.read_csv(op.join( fstem, "nuisance_variables.csv")).filter(regex="wm_").apply(stats.zscore)) artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv")).any(axis=1)) # Upsample dots data if exp == "dots": motion = pd.DataFrame(moss.upsample(motion, 2)) nuisance = pd.DataFrame(moss.upsample(nuisance, 2)) new_tps = np.arange(0, 229.5, .5) artifacts = artifacts.reindex(new_tps).ffill().reset_index(drop=True) # Build this portion of the design matrix confounds = pd.concat([motion, nuisance], axis=1) dmat = glm.DesignMatrix(confounds=confounds, artifacts=artifacts, hpf_kernel=hpf_kernel, tr=tr) return dmat.design_matrix
def estimate_subject_roi_fir(subj, mask, model, conditions=None): # Load the cached ROI dataset data = load_cached_roi_data(subj, mask) # Average the data over voxels data = data.mean(axis=-1) # Upsample the data to 1s resolution data = moss.upsample(data.T, 2).T # Convert the data to percent signal change over runs data = percent_change(data, 1) # Count the number of timepoints ntp = data.shape[1] # Concatenate the data into one long vector data = np.concatenate(data) # Load the design, make events impulses, get a list of conditions design = pd.read_csv(design_temp.format(subj, model)) design["duration"] = 0 if conditions is None: conditions = design["condition"].unique() # Precache the hpf kernel hpf_kernel = glm.fsl_highpass_matrix(ntp, 128) # Make a design matrix for each run and then concatenate Xs = [] for run, run_df in design.groupby("run"): X = glm.DesignMatrix(run_df, glm.FIR(tr=1, nbasis=24, offset=-2), condition_names=conditions, hpf_cutoff=None, hpf_kernel=hpf_kernel, ntp=ntp, tr=1, oversampling=1) Xs.append(X.design_matrix) X = pd.concat(Xs) # Fit the model model = sm.OLS(data, X).fit() # Add metadata about the beta for each timepoint and condition params = model.params.reset_index(name="coef") params["timepoint"] = params["index"].str[-2:].astype(int) params["timepoint"] -= 1 params["condition"] = params["index"].str[:-3] params["subj"] = subj params["roi"] = mask # Return the model parameters return params
def schedule_efficiency(schedule, nbasis, leadout_trs): """Compute the FIR design matrix efficiency for a given schedule.""" par = pd.DataFrame(dict(onset=schedule.trial_time_tr, condition=schedule.context)) fir = glm.FIR(tr=1, nbasis=nbasis, offset=-1) ntp = par.onset.max() + leadout_trs X = glm.DesignMatrix(par, fir, ntp, hpf_cutoff=None, tr=1, oversampling=1).design_matrix.values eff = 1 / np.trace(inv(X.T.dot(X))) return schedule, eff
def regress_task(exp, subj, data): """Fit a model of the task and return the residuals.""" if exp == "dots": ntp = 459 design, _ = dots_design(subj) design.loc[:, "duration"] = 0 hpf_kernel = glm.fsl_highpass_matrix(ntp, 128, 1) elif exp == "sticks": ntp = 515 design, _ = sticks_design(subj) design.loc[:, "duration"] = 0 design.loc[:, "onset"] = (design["onset"] / .72).round() hpf_kernel = glm.fsl_highpass_matrix(ntp, 178, 1) conditions = design["condition"].unique() # Upsample the dots data to match the design resolution if exp == "dots": run_data = np.split(data, 12) data = np.concatenate([moss.upsample(d, 2) for d in run_data]) # Make a design matrix for each run and then concatenate Xs = [] for run, run_df in design.groupby("run"): Xrun = glm.DesignMatrix(run_df, glm.FIR(tr=1, nbasis=24, offset=-2), condition_names=conditions, hpf_kernel=hpf_kernel, ntp=ntp, tr=1, oversampling=1).design_matrix # Regress confounds out of the design matrix confounds = confound_design(exp, subj, run, hpf_kernel) assert len(confounds) == len(Xrun) confounds.index = Xrun.index Xrun = OLS(Xrun, confounds).fit().resid Xs.append(Xrun) X = pd.concat(Xs) resid = OLS(data, X).fit().resid.values return resid
def _run_interface(self, runtime): # Get all the information for the design design_kwargs = self.build_design_information() # Initialize the design matrix object X = glm.DesignMatrix(**design_kwargs) # Report on the design self.design_report(self.inputs.exp_info, X, design_kwargs) # Write out the design object as a pkl to pass to the report function X.to_pickle("design.pkl") # Finally, write out the design files in FSL format X.to_fsl_files("design", self.inputs.exp_info["contrasts"]) return runtime
def residualize_roi_data(subj, mask, model, conditions=None): """Residualize cached ROI data against task model.""" orig_data = load_cached_roi_data(subj, mask) # De-mean the data by run and voxel orig_data = signal.detrend(orig_data, axis=1, type="constant") # Precompute the highpass filter kernel and HRF ntp = orig_data.shape[1] hpf_kernel = glm.fsl_highpass_matrix(ntp, 128) hrf = glm.GammaDifferenceHRF(temporal_deriv=True) # Load the task design design_file = op.join(data_dir, subj, "design", model + ".csv") design = pd.read_csv(design_file) if conditions is None: conditions = sorted(design["condition"].unique()) # Set up the output data structure out_data = np.empty_like(orig_data) # Loop over the runs and get the residual data for each for run_i, run_data in enumerate(orig_data): # Generate the design matrix run_design = design.query("run == (@run_i + 1)") X = glm.DesignMatrix(run_design, hrf, ntp, condition_names=conditions, hpf_kernel=hpf_kernel) # Fit the model ols = sm.OLS(run_data, X.design_matrix).fit() # Save the residuals out_data[run_i] = ols.resid # Z-score the residuals by run and voxel out_data = stats.zscore(out_data, axis=1) assert not np.any(np.isnan(out_data)) return out_data
def deconvolve(self): """Fit a model for each run to get condition amplitude estimates.""" beta_list = [] # Set experiment-specific variables if self.exp == "dots": tr = 2 ntp = 230 condition_names = ["context", "trial_type", "cue"] context_map = dict(motion=0, color=1) elif self.exp == "sticks": tr = .72 ntp = 515 condition_names = ["context", "cue", "ori_diff", "hue_diff"] context_map = dict(ori=0, hue=1) # Initialize the data and design design, info = self.design_info() all_conditions = design.condition.sort(inplace=False).unique() run_data = np.split(self.data, 12, axis=0) hrf_model = glm.GammaDifferenceHRF(temporal_deriv=True, tr=tr) # Keep track of voxels with nonzero variance assert np.unique([d.shape[0] for d in run_data]).size == 1 n_voxels = run_data[0].shape[1] good_voxels = np.ones(n_voxels, np.bool) for run, run_design in design.groupby("run"): # Build the design matrix dmat = glm.DesignMatrix(run_design, hrf_model, ntp, hpf_kernel=self.hpf_kernel, condition_names=all_conditions, tr=tr) # Set up the regression variables X = dmat.design_matrix Y = pd.DataFrame(run_data[run - 1], index=X.index) # Regress signal confounds out of the design matrix # (They have already been removed from the data) signal_confounds = self.signal_confound_design(run) X = OLS(X, signal_confounds).fit().resid # Fit the experiment model and extract the condition betas betas = OLS(Y, X).fit().params.ix[info.ix[run].index] beta_list.append(betas) # Identify bad voxels good_voxels &= (Y.var(axis=0) > 0).values # Reformat the condition information by each variable conditions = pd.DataFrame( (info.index.get_level_values("condition").str.split("-").tolist()), columns=condition_names) # Build the relevant objects for classification samples = conditions.index runs = pd.Series(info.index.get_level_values("run"), index=samples) betas = pd.concat(beta_list, ignore_index=True) rt = pd.Series(info["rt"].values, index=samples) y = conditions.context.map(context_map) # Remove null or single observation samples use = pd.Series(info["count"].values > 1, index=samples) runs, betas, rt, y = runs[use], betas[use], rt[use], y[use] # Assign instance attributes self.design = design self.betas = betas self.runs = runs self.rt = rt self.y = y self.good_voxels = good_voxels
def _run_interface(self, runtime): subject = self.inputs.subject session = self.inputs.session run = self.inputs.run exp_info = Bunch(self.inputs.exp_info) model_info = Bunch(self.inputs.model_info) data_dir = self.inputs.data_dir # Load the timeseries ts_img = nib.load(self.inputs.ts_file) affine, header = ts_img.affine, ts_img.header # Load the anatomical segmentation and fine analysis mask run_mask = nib.load(self.inputs.mask_file).get_data() > 0 seg_img = nib.load(self.inputs.seg_file) seg = seg_img.get_data() mask = (seg > 0) & (seg < 5) & run_mask n_vox = mask.sum() mask_img = nib.Nifti1Image(mask.astype(np.int8), affine, header) # Load the noise segmentation # TODO implement noisy voxel removal noise_img = nib.load(self.inputs.noise_file) # Spatially filter the data fwhm = model_info.smooth_fwhm # TODO use smooth_segmentation instead? signals.smooth_volume(ts_img, fwhm, mask_img, noise_img, inplace=True) if model_info.surface_smoothing: vert_data = nib.load(self.inputs.surf_file).get_data() for i, mesh_file in enumerate(self.inputs.mesh_files): sm = surface.SurfaceMeasure.from_file(mesh_file) vert_img = nib.Nifti1Image(vert_data[..., i], affine) signals.smooth_surface(ts_img, vert_img, sm, fwhm, noise_img, inplace=True) # Compute the mean image for later # TODO limit to gray matter voxels? data = ts_img.get_data() mean = data.mean(axis=-1) mean_img = nib.Nifti1Image(mean, affine, header) # Temporally filter the data n_tp = ts_img.shape[-1] hpf_matrix = glm.highpass_filter_matrix(n_tp, model_info.hpf_cutoff, exp_info.tr) data[mask] = np.dot(hpf_matrix, data[mask].T).T # TODO remove the mean from the data # data[gray_mask] += mean[gray_mask, np.newaxis] data[~mask] = 0 # TODO this is done within smoothing actually # Define confound regressons from various sources # TODO mc_data = pd.read_csv(self.inputs.mc_file) # Detect artifact frames # TODO # Convert to percent signal change? # TODO # Build the design matrix # TODO move out of moss and simplify design_file = op.join(data_dir, subject, "design", model_info.name + ".csv") design = pd.read_csv(design_file) run_rows = (design.session == session) & (design.run == run) design = design.loc[run_rows] # TODO better error when this fails (maybe check earlier too) assert len(design) > 0 dmat = mossglm.DesignMatrix(design, ntp=n_tp, tr=exp_info.tr) X = dmat.design_matrix.values # Save out the design matrix design_file = self.define_output("design_file", "design.csv") dmat.design_matrix.to_csv(design_file, index=False) # Prewhiten the data ts_img = nib.Nifti1Image(data, affine) WY, WX = glm.prewhiten_image_data(ts_img, mask_img, X) # Fit the final model B, SS, XtXinv, E = glm.iterative_ols_fit(WY, WX) # TODO should we re-compute the tSNR on the residuals? # Convert outputs to image format beta_img = matrix_to_image(B.T, mask_img) error_img = matrix_to_image(SS, mask_img) XtXinv_flat = XtXinv.reshape(n_vox, -1) ols_img = matrix_to_image(XtXinv_flat.T, mask_img) resid_img = matrix_to_image(E, mask_img, ts_img) # Write out the results self.write_image("mask_file", "mask.nii.gz", mask_img) self.write_image("beta_file", "beta.nii.gz", beta_img) self.write_image("error_file", "error.nii.gz", error_img) self.write_image("ols_file", "ols.nii.gz", ols_img) if model_info.save_residuals: self.write_image("resid_file", "resid.nii.gz", resid_img) # Make some QC plots # We want a version of the resid data with an intact mean so that # the carpet plot can compute percent signal change. # (Maybe carpetplot should accept a mean image and handle that # internally)? # TODO standarize the representation of mean in this method resid_data = np.zeros(ts_img.shape, np.float32) resid_data += np.expand_dims(mean * mask, axis=-1) resid_data[mask] += E.T resid_img = nib.Nifti1Image(resid_data, affine, header) p = CarpetPlot(resid_img, seg_img, mc_data) self.write_visualization("resid_plot", "resid.png", p) # Plot the deisgn matrix # TODO update when improving design matrix code design_plot = self.define_output("design_plot", "design.png") dmat.plot(fname=design_plot, close=True) # Plot the sigma squares image for QC error_m = Mosaic(mean_img, error_img, mask_img) error_m.plot_overlay("cube:.8:.2", 0, fmt=".0f") self.write_visualization("error_plot", "error.png", error_m) return runtime
def design_report(self, exp_info, X, design_kwargs): """Generate static images summarizing the design.""" # Plot the design itself design_png = op.abspath("design.png") X.plot(fname=design_png, close=True) with sns.axes_style("whitegrid"): # Plot the eigenvalue spectrum svd_png = op.abspath("design_singular_values.png") X.plot_singular_values(fname=svd_png, close=True) # Plot the correlations between design elements and confounds corr_png = op.abspath("design_correlation.png") if design_kwargs["design"] is None: with open(corr_png, "wb"): pass else: X.plot_confound_correlation(fname=corr_png, close=True) # Build a list of images sumarrizing the model report = [design_png, corr_png, svd_png] # Now plot the information loss from the high-pass filter design_kwargs["hpf_cutoff"] = None X_unfiltered = glm.DesignMatrix(**design_kwargs) tr = design_kwargs["tr"] ntp = design_kwargs["ntp"] # Plot for each contrast for i, (name, cols, weights) in enumerate(exp_info["contrasts"], 1): # Compute the contrast predictors C = X.contrast_vector(cols, weights) y_filt = X.design_matrix.dot(C) y_unfilt = X_unfiltered.design_matrix.dot(C) # Compute the spectral density for filtered and unfiltered fs, pxx_filt = signal.welch(y_filt, 1. / tr, nperseg=ntp) fs, pxx_unfilt = signal.welch(y_unfilt, 1. / tr, nperseg=ntp) # Draw the spectral density with sns.axes_style("whitegrid"): f, ax = plt.subplots(figsize=(9, 3)) ax.fill_between(fs, pxx_unfilt, color="#C41E3A") if exp_info["hpf_cutoff"] is not None: ax.axvline(1.0 / exp_info["hpf_cutoff"], c=".3", ls=":", lw=1.5) ax.fill_between(fs, pxx_filt, color=".5") # Label the plot ax.set(xlabel="Frequency", ylabel="Spectral Density", xlim=(0, .15)) plt.tight_layout() # Save the plot fname = op.abspath("cope%d_filter.png" % i) f.savefig(fname, dpi=100) plt.close(f) report.append(fname) # Store the report files for later self.report_files = report