def signal_confound_design(self, run):
        """Build a matrix of signal confound variables."""
        analysis_dir = PROJECT["analysis_dir"]

        if self.exp == "dots":
            tr = 2
        elif self.exp == "sticks":
            tr = .72

        fstem = op.join(analysis_dir, self.exp, self.subj, "preproc",

        motion = (pd.read_csv(op.join(fstem, "realignment_params.csv")).filter(
        nuisance = (pd.read_csv(op.join(
        artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv")).any(axis=1))

        confounds = pd.concat([motion, nuisance], axis=1)
        dmat = glm.DesignMatrix(confounds=confounds,

        return dmat.design_matrix
def estimate_voxel_params(subj, data, model, runs, conditions):
    """Fit a univariate model in each voxel of a ROI data array."""

    # Load the task design
    design_file = design_temp.format(subj, model)
    design = pd.read_csv(design_file)

    # Precompute the highpass filter kernel and HRF
    ntp = data.shape[1]
    hpf_kernel = glm.fsl_highpass_matrix(ntp, 128)
    hrf = glm.GammaDifferenceHRF()

    # Build a design matrix for each run separately and then combine
    Xs = []
    for run in runs:
        run_design = design.query("run == @run")
        X = glm.DesignMatrix(run_design,

    X = pd.concat(Xs).reset_index(drop=True)

    # Rotate the data around to stack runs together
    np.testing.assert_equal(len(data), len(runs))
    data = data.reshape(-1, data.shape[-1])

    # Fit the model
    model = sm.OLS(data, X).fit()

    # Return the params
    return model.params
def confound_design(exp, subj, run, hpf_kernel):
    """Build a matrix of confound variables."""
    analysis_dir = PROJECT["analysis_dir"]

    if exp == "dots":
        tr = 1
    elif exp == "sticks":
        tr = .72

    # Load in the confound information
    fstem = op.join(analysis_dir, exp, subj, "preproc", "run_{}".format(run))
    motion = (pd.read_csv(op.join(fstem, "realignment_params.csv")).filter(
    nuisance = (pd.read_csv(op.join(
    artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv")).any(axis=1))

    # Upsample dots data
    if exp == "dots":
        motion = pd.DataFrame(moss.upsample(motion, 2))
        nuisance = pd.DataFrame(moss.upsample(nuisance, 2))
        new_tps = np.arange(0, 229.5, .5)
        artifacts = artifacts.reindex(new_tps).ffill().reset_index(drop=True)

    # Build this portion of the design matrix
    confounds = pd.concat([motion, nuisance], axis=1)
    dmat = glm.DesignMatrix(confounds=confounds,

    return dmat.design_matrix
def estimate_subject_roi_fir(subj, mask, model, conditions=None):

    # Load the cached ROI dataset
    data = load_cached_roi_data(subj, mask)

    # Average the data over voxels
    data = data.mean(axis=-1)

    # Upsample the data to 1s resolution
    data = moss.upsample(data.T, 2).T

    # Convert the data to percent signal change over runs
    data = percent_change(data, 1)

    # Count the number of timepoints
    ntp = data.shape[1]

    # Concatenate the data into one long vector
    data = np.concatenate(data)

    # Load the design, make events impulses, get a list of conditions
    design = pd.read_csv(design_temp.format(subj, model))
    design["duration"] = 0
    if conditions is None:
        conditions = design["condition"].unique()

    # Precache the hpf kernel
    hpf_kernel = glm.fsl_highpass_matrix(ntp, 128)

    # Make a design matrix for each run and then concatenate
    Xs = []
    for run, run_df in design.groupby("run"):
        X = glm.DesignMatrix(run_df,
                             glm.FIR(tr=1, nbasis=24, offset=-2),
    X = pd.concat(Xs)

    # Fit the model
    model = sm.OLS(data, X).fit()

    # Add metadata about the beta for each timepoint and condition
    params = model.params.reset_index(name="coef")
    params["timepoint"] = params["index"].str[-2:].astype(int)
    params["timepoint"] -= 1
    params["condition"] = params["index"].str[:-3]
    params["subj"] = subj
    params["roi"] = mask

    # Return the model parameters
    return params
def schedule_efficiency(schedule, nbasis, leadout_trs):
    """Compute the FIR design matrix efficiency for a given schedule."""
    par = pd.DataFrame(dict(onset=schedule.trial_time_tr,

    fir = glm.FIR(tr=1, nbasis=nbasis, offset=-1)
    ntp = par.onset.max() + leadout_trs

    X = glm.DesignMatrix(par, fir, ntp,
                         tr=1, oversampling=1).design_matrix.values
    eff = 1 / np.trace(inv(
    return schedule, eff
def regress_task(exp, subj, data):
    """Fit a model of the task and return the residuals."""
    if exp == "dots":
        ntp = 459
        design, _ = dots_design(subj)
        design.loc[:, "duration"] = 0
        hpf_kernel = glm.fsl_highpass_matrix(ntp, 128, 1)

    elif exp == "sticks":
        ntp = 515
        design, _ = sticks_design(subj)
        design.loc[:, "duration"] = 0
        design.loc[:, "onset"] = (design["onset"] / .72).round()
        hpf_kernel = glm.fsl_highpass_matrix(ntp, 178, 1)

    conditions = design["condition"].unique()

    # Upsample the dots data to match the design resolution
    if exp == "dots":
        run_data = np.split(data, 12)
        data = np.concatenate([moss.upsample(d, 2) for d in run_data])

    # Make a design matrix for each run and then concatenate
    Xs = []
    for run, run_df in design.groupby("run"):

        Xrun = glm.DesignMatrix(run_df,
                                glm.FIR(tr=1, nbasis=24, offset=-2),

        # Regress confounds out of the design matrix
        confounds = confound_design(exp, subj, run, hpf_kernel)
        assert len(confounds) == len(Xrun)
        confounds.index = Xrun.index
        Xrun = OLS(Xrun, confounds).fit().resid

    X = pd.concat(Xs)

    resid = OLS(data, X).fit().resid.values
    return resid
    def _run_interface(self, runtime):

        # Get all the information for the design
        design_kwargs = self.build_design_information()

        # Initialize the design matrix object
        X = glm.DesignMatrix(**design_kwargs)

        # Report on the design
        self.design_report(self.inputs.exp_info, X, design_kwargs)

        # Write out the design object as a pkl to pass to the report function

        # Finally, write out the design files in FSL format
        X.to_fsl_files("design", self.inputs.exp_info["contrasts"])

        return runtime
def residualize_roi_data(subj, mask, model, conditions=None):
    """Residualize cached ROI data against task model."""
    orig_data = load_cached_roi_data(subj, mask)

    # De-mean the data by run and voxel
    orig_data = signal.detrend(orig_data, axis=1, type="constant")

    # Precompute the highpass filter kernel and HRF
    ntp = orig_data.shape[1]
    hpf_kernel = glm.fsl_highpass_matrix(ntp, 128)
    hrf = glm.GammaDifferenceHRF(temporal_deriv=True)

    # Load the task design
    design_file = op.join(data_dir, subj, "design", model + ".csv")
    design = pd.read_csv(design_file)
    if conditions is None:
        conditions = sorted(design["condition"].unique())

    # Set up the output data structure
    out_data = np.empty_like(orig_data)

    # Loop over the runs and get the residual data for each
    for run_i, run_data in enumerate(orig_data):

        # Generate the design matrix
        run_design = design.query("run == (@run_i + 1)")
        X = glm.DesignMatrix(run_design,

        # Fit the model
        ols = sm.OLS(run_data, X.design_matrix).fit()

        # Save the residuals
        out_data[run_i] = ols.resid

    # Z-score the residuals by run and voxel
    out_data = stats.zscore(out_data, axis=1)
    assert not np.any(np.isnan(out_data))

    return out_data
    def deconvolve(self):
        """Fit a model for each run to get condition amplitude estimates."""
        beta_list = []

        # Set experiment-specific variables
        if self.exp == "dots":
            tr = 2
            ntp = 230
            condition_names = ["context", "trial_type", "cue"]
            context_map = dict(motion=0, color=1)
        elif self.exp == "sticks":
            tr = .72
            ntp = 515
            condition_names = ["context", "cue", "ori_diff", "hue_diff"]
            context_map = dict(ori=0, hue=1)

        # Initialize the data and design
        design, info = self.design_info()
        all_conditions = design.condition.sort(inplace=False).unique()
        run_data = np.split(, 12, axis=0)
        hrf_model = glm.GammaDifferenceHRF(temporal_deriv=True, tr=tr)

        # Keep track of voxels with nonzero variance
        assert np.unique([d.shape[0] for d in run_data]).size == 1
        n_voxels = run_data[0].shape[1]
        good_voxels = np.ones(n_voxels, np.bool)

        for run, run_design in design.groupby("run"):

            # Build the design matrix
            dmat = glm.DesignMatrix(run_design,

            # Set up the regression variables
            X = dmat.design_matrix
            Y = pd.DataFrame(run_data[run - 1], index=X.index)

            # Regress signal confounds out of the design matrix
            # (They have already been removed from the data)
            signal_confounds = self.signal_confound_design(run)
            X = OLS(X, signal_confounds).fit().resid

            # Fit the experiment model and extract the condition betas
            betas = OLS(Y, X).fit().params.ix[info.ix[run].index]

            # Identify bad voxels
            good_voxels &= (Y.var(axis=0) > 0).values

        # Reformat the condition information by each variable
        conditions = pd.DataFrame(

        # Build the relevant objects for classification
        samples = conditions.index
        runs = pd.Series(info.index.get_level_values("run"), index=samples)
        betas = pd.concat(beta_list, ignore_index=True)
        rt = pd.Series(info["rt"].values, index=samples)
        y =

        # Remove null or single observation samples
        use = pd.Series(info["count"].values > 1, index=samples)
        runs, betas, rt, y = runs[use], betas[use], rt[use], y[use]

        # Assign instance attributes = design
        self.betas = betas
        self.runs = runs
        self.rt = rt
        self.y = y
        self.good_voxels = good_voxels
    def _run_interface(self, runtime):

        subject = self.inputs.subject
        session = self.inputs.session
        run =
        exp_info = Bunch(self.inputs.exp_info)
        model_info = Bunch(self.inputs.model_info)
        data_dir = self.inputs.data_dir

        # Load the timeseries
        ts_img = nib.load(self.inputs.ts_file)
        affine, header = ts_img.affine, ts_img.header

        # Load the anatomical segmentation and fine analysis mask
        run_mask = nib.load(self.inputs.mask_file).get_data() > 0
        seg_img = nib.load(self.inputs.seg_file)
        seg = seg_img.get_data()
        mask = (seg > 0) & (seg < 5) & run_mask
        n_vox = mask.sum()
        mask_img = nib.Nifti1Image(mask.astype(np.int8), affine, header)

        # Load the noise segmentation
        # TODO implement noisy voxel removal
        noise_img = nib.load(self.inputs.noise_file)

        # Spatially filter the data
        fwhm = model_info.smooth_fwhm
        # TODO use smooth_segmentation instead?
        signals.smooth_volume(ts_img, fwhm, mask_img, noise_img, inplace=True)

        if model_info.surface_smoothing:
            vert_data = nib.load(self.inputs.surf_file).get_data()
            for i, mesh_file in enumerate(self.inputs.mesh_files):
                sm = surface.SurfaceMeasure.from_file(mesh_file)
                vert_img = nib.Nifti1Image(vert_data[..., i], affine)

        # Compute the mean image for later
        # TODO limit to gray matter voxels?
        data = ts_img.get_data()
        mean = data.mean(axis=-1)
        mean_img = nib.Nifti1Image(mean, affine, header)

        # Temporally filter the data
        n_tp = ts_img.shape[-1]
        hpf_matrix = glm.highpass_filter_matrix(n_tp, model_info.hpf_cutoff,
        data[mask] =, data[mask].T).T

        # TODO remove the mean from the data
        # data[gray_mask] += mean[gray_mask, np.newaxis]
        data[~mask] = 0  # TODO this is done within smoothing actually

        # Define confound regressons from various sources
        # TODO
        mc_data = pd.read_csv(self.inputs.mc_file)

        # Detect artifact frames
        # TODO

        # Convert to percent signal change?
        # TODO

        # Build the design matrix
        # TODO move out of moss and simplify
        design_file = op.join(data_dir, subject, "design",
                     + ".csv")
        design = pd.read_csv(design_file)
        run_rows = (design.session == session) & ( == run)
        design = design.loc[run_rows]
        # TODO better error when this fails (maybe check earlier too)
        assert len(design) > 0
        dmat = mossglm.DesignMatrix(design, ntp=n_tp,
        X = dmat.design_matrix.values

        # Save out the design matrix
        design_file = self.define_output("design_file", "design.csv")
        dmat.design_matrix.to_csv(design_file, index=False)

        # Prewhiten the data
        ts_img = nib.Nifti1Image(data, affine)
        WY, WX = glm.prewhiten_image_data(ts_img, mask_img, X)

        # Fit the final model
        B, SS, XtXinv, E = glm.iterative_ols_fit(WY, WX)

        # TODO should we re-compute the tSNR on the residuals?

        # Convert outputs to image format
        beta_img = matrix_to_image(B.T, mask_img)
        error_img = matrix_to_image(SS, mask_img)
        XtXinv_flat = XtXinv.reshape(n_vox, -1)
        ols_img = matrix_to_image(XtXinv_flat.T, mask_img)
        resid_img = matrix_to_image(E, mask_img, ts_img)

        # Write out the results
        self.write_image("mask_file", "mask.nii.gz", mask_img)
        self.write_image("beta_file", "beta.nii.gz", beta_img)
        self.write_image("error_file", "error.nii.gz", error_img)
        self.write_image("ols_file", "ols.nii.gz", ols_img)
        if model_info.save_residuals:
            self.write_image("resid_file", "resid.nii.gz", resid_img)

        # Make some QC plots
        # We want a version of the resid data with an intact mean so that
        # the carpet plot can compute percent signal change.
        # (Maybe carpetplot should accept a mean image and handle that
        # internally)?
        # TODO standarize the representation of mean in this method
        resid_data = np.zeros(ts_img.shape, np.float32)
        resid_data += np.expand_dims(mean * mask, axis=-1)
        resid_data[mask] += E.T
        resid_img = nib.Nifti1Image(resid_data, affine, header)

        p = CarpetPlot(resid_img, seg_img, mc_data)
        self.write_visualization("resid_plot", "resid.png", p)

        # Plot the deisgn matrix
        # TODO update when improving design matrix code
        design_plot = self.define_output("design_plot", "design.png")
        dmat.plot(fname=design_plot, close=True)

        # Plot the sigma squares image for QC
        error_m = Mosaic(mean_img, error_img, mask_img)
        error_m.plot_overlay("cube:.8:.2", 0, fmt=".0f")
        self.write_visualization("error_plot", "error.png", error_m)

        return runtime
    def design_report(self, exp_info, X, design_kwargs):
        """Generate static images summarizing the design."""
        # Plot the design itself
        design_png = op.abspath("design.png")
        X.plot(fname=design_png, close=True)

        with sns.axes_style("whitegrid"):
            # Plot the eigenvalue spectrum
            svd_png = op.abspath("design_singular_values.png")
            X.plot_singular_values(fname=svd_png, close=True)

            # Plot the correlations between design elements and confounds
            corr_png = op.abspath("design_correlation.png")
            if design_kwargs["design"] is None:
                with open(corr_png, "wb"):
                X.plot_confound_correlation(fname=corr_png, close=True)

        # Build a list of images sumarrizing the model
        report = [design_png, corr_png, svd_png]

        # Now plot the information loss from the high-pass filter
        design_kwargs["hpf_cutoff"] = None
        X_unfiltered = glm.DesignMatrix(**design_kwargs)
        tr = design_kwargs["tr"]
        ntp = design_kwargs["ntp"]

        # Plot for each contrast
        for i, (name, cols, weights) in enumerate(exp_info["contrasts"], 1):

            # Compute the contrast predictors
            C = X.contrast_vector(cols, weights)
            y_filt =
            y_unfilt =

            # Compute the spectral density for filtered and unfiltered
            fs, pxx_filt = signal.welch(y_filt, 1. / tr, nperseg=ntp)
            fs, pxx_unfilt = signal.welch(y_unfilt, 1. / tr, nperseg=ntp)

            # Draw the spectral density
            with sns.axes_style("whitegrid"):
                f, ax = plt.subplots(figsize=(9, 3))
            ax.fill_between(fs, pxx_unfilt, color="#C41E3A")
            if exp_info["hpf_cutoff"] is not None:
                ax.axvline(1.0 / exp_info["hpf_cutoff"],
            ax.fill_between(fs, pxx_filt, color=".5")

            # Label the plot
                   ylabel="Spectral Density",
                   xlim=(0, .15))

            # Save the plot
            fname = op.abspath("cope%d_filter.png" % i)
            f.savefig(fname, dpi=100)

        # Store the report files for later
        self.report_files = report