Beispiel #1
0
def residualize(Y, formula_res, data, formula_full=None):
    """
    Residualisation of adjusted residualization.

    Parameters
    ----------
    Y: array (n, p), dependant variables
    formula_res: str, residualisation formula ex: "site":
    1) Fit  Y = b0 + b1 site + eps
    2) Return Y - b0 - b1 site
    data: DataFrame of independant variables
    formula_full:  str, full model formula (default None) ex: "age + sex + site + diagnosis". If not Null residualize
    performs an adjusted residualization:
    1) Fit Y = b1 age + b2 sex + b3 site + b4 diagnosis + eps
    2) Return Y - b3 site

    Returns
    -------
    Y: array (n, p), of residualized dependant variables
    """
    if formula_full is None:
        formula_full = formula_res

    res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys()

    X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula_full, data=data)

    # Fit full model
    mod_mulm = mulm.MUOLS(Y, X).fit()

    # mask of terms in residualize formula within full model
    mask = np.array([cont  for term, cont in t_contrasts.items() if term in res_terms]).sum(axis=0) == 1

    return Y -  np.dot(X[:, mask], mod_mulm.coef[mask, :])
Beispiel #2
0
 def __init__(self, data, formula_res, formula_full=None):
     if formula_full is None:
         formula_full = formula_res
     res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys()
     self.design_mat, self.t_contrasts, self.f_contrasts = \
         mulm.design_matrix(formula=formula_full, data=data)
     # mask of terms in residualize formula within full model
     self.mask = np.array([cont for term, cont in self.t_contrasts.items()
                           if term in res_terms]).sum(axis=0) == 1
Beispiel #3
0
 def __init__(self, data, formula_res, formula_full=None):
     warnings.warn(
         "nitk.stats.Residualizer is deprecated, use mulm.residualizer.Residualizer instead. See https://github.com/neurospin/pylearn-mulm",
         DeprecationWarning)
     if formula_full is None:
         formula_full = formula_res
     res_terms = mulm.design_matrix(formula=formula_res,
                                    data=data)[1].keys()
     self.design_mat, self.t_contrasts, self.f_contrasts = \
         mulm.design_matrix(formula=formula_full, data=data)
     # mask of terms in residualize formula within full model
     self.mask = np.array([
         cont for term, cont in self.t_contrasts.items()
         if term in res_terms
     ]).sum(axis=0) == 1
Beispiel #4
0
 def get_design_mat(self, data):
     design_mat, t_contrasts, f_contrasts = \
         mulm.design_matrix(formula=self.formula_full, data=data)
     assert np.all([
         self.t_contrasts[k] == t_contrasts[k] for k in self.t_contrasts
     ]), "new data doesn't"
     return design_mat
Beispiel #5
0
    def __init__(self,
                 data=None,
                 formula_res=None,
                 formula_full=None,
                 contrast_res=None):
        """
        Parameters
        ----------
        data: DataFrame
            DataFrame containing column to build the design matrix (default None).

        formula_res: str
            Residualisation formula. Ex: "site" (default None).

        formula_full: str
            Full model (formula) of residualisation containing other variables
            to adjust for. Ex.: "site + age + sex" (default None).

        cont_res: boolean array
            the contrast for residualisation (matches formula_res).
            Ex: [False, True, False, False]. The default None corresponds to True
            everywhere.
        """

        if isinstance(data, pd.DataFrame) and isinstance(formula_res, str):
            if formula_full is None:
                formula_full = formula_res
            self.formula_full = formula_full
            res_terms = mulm.design_matrix(formula=formula_res,
                                           data=data)[1].keys()
            _, self.t_contrasts, self.f_contrasts = \
                mulm.design_matrix(formula=formula_full, data=data)
            # mask of terms in residualize formula within full model
            self.contrast_res = np.array([
                cont for term, cont in self.t_contrasts.items()
                if term in res_terms
            ]).sum(axis=0) == 1
        else:
            self.contrast_res = contrast_res
Beispiel #6
0
def univ_stats(Y, formula, data):
    """
    Parameters
    ----------
    Y: array (n_subjects, n_features)
    formula: str eg. "age + sex + site"
    data: DataFrame, containing value of formula terms

    """
    X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula, data=data)
    mod_mulm = mulm.MUOLS(Y, X).fit()
    aov_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts)

    return mod_mulm, aov_mulm
imgs_test = datasets['validation_vbm'].squeeze()
print(imgs_train.shape, imgs_test.shape)

# Read mask
mask_img = nibabel.load(os.path.join(WD, "data", "mni_cerebrum-mask.nii.gz"))
mask_arr = mask_img.get_fdata() != 0

# Apply mask
X_train = imgs_train[:, mask_arr]
X_test = imgs_test[:, mask_arr]

################################################################################
# Univariate statistics
# ---------------------

Z_train, t_contrasts, f_contrasts = mulm.design_matrix(formula="sex + age",
                                                       data=demo_train)
mod_mulm = mulm.MUOLS(Y=X_train, X=Z_train).fit()


def flat_to_img(mask_img, flat_values):
    val_arr = np.zeros(mask_img.get_fdata().shape)
    val_arr[mask_img.get_fdata() != 0] = flat_values.squeeze()
    return nilearn.image.new_img_like(mask_img, val_arr)


tstat_sex, pval_sex, df_sex = mod_mulm.t_test(t_contrasts['sex'], pval=True)
tstat_sex_img = flat_to_img(mask_img, tstat_sex.squeeze())
nilearn.plotting.plot_stat_map(tstat_sex_img, title="sex")

tstat_age, pval_age, df_age = mod_mulm.t_test(t_contrasts['age'], pval=True)
tstat_age_img = flat_to_img(mask_img, tstat_age.squeeze())
################################################################################
# Example 1: Salary dataset
# -------------------------
#
# Fit model a single model: `salary ~ experience + education + management`

url = 'https://github.com/duchesnay/pystatsml/raw/master/datasets/salary_table.csv'
df = pd.read_csv(url)


################################################################################
# Fit with MULM

Y = np.asarray(df.salary)[:, None].astype(float)
X, t_contrasts, f_contrasts = mulm.design_matrix(formula="experience + education + management", data=df)
mod_mulm = mulm.MUOLS(Y, X).fit()
tstat_mulm = OrderedDict((term, mod_mulm.t_test(t_contrasts[term], pval=True)) for term in t_contrasts)
fstat_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts)

print(mod_mulm.coef)
print(pd.DataFrame(tstat_mulm, index=['tstat', 'pval', 'df']).T)
# print(pd.DataFrame(fstat_mulm, index=['fstat', 'pval']).T)


################################################################################
# Fit with statsmodel

mod_sm = smfrmla.ols('salary ~ experience + education + management', df).fit()
print(mod_sm.summary())