def residualize(Y, formula_res, data, formula_full=None): """ Residualisation of adjusted residualization. Parameters ---------- Y: array (n, p), dependant variables formula_res: str, residualisation formula ex: "site": 1) Fit Y = b0 + b1 site + eps 2) Return Y - b0 - b1 site data: DataFrame of independant variables formula_full: str, full model formula (default None) ex: "age + sex + site + diagnosis". If not Null residualize performs an adjusted residualization: 1) Fit Y = b1 age + b2 sex + b3 site + b4 diagnosis + eps 2) Return Y - b3 site Returns ------- Y: array (n, p), of residualized dependant variables """ if formula_full is None: formula_full = formula_res res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys() X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula_full, data=data) # Fit full model mod_mulm = mulm.MUOLS(Y, X).fit() # mask of terms in residualize formula within full model mask = np.array([cont for term, cont in t_contrasts.items() if term in res_terms]).sum(axis=0) == 1 return Y - np.dot(X[:, mask], mod_mulm.coef[mask, :])
def __init__(self, data, formula_res, formula_full=None): if formula_full is None: formula_full = formula_res res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys() self.design_mat, self.t_contrasts, self.f_contrasts = \ mulm.design_matrix(formula=formula_full, data=data) # mask of terms in residualize formula within full model self.mask = np.array([cont for term, cont in self.t_contrasts.items() if term in res_terms]).sum(axis=0) == 1
def __init__(self, data, formula_res, formula_full=None): warnings.warn( "nitk.stats.Residualizer is deprecated, use mulm.residualizer.Residualizer instead. See https://github.com/neurospin/pylearn-mulm", DeprecationWarning) if formula_full is None: formula_full = formula_res res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys() self.design_mat, self.t_contrasts, self.f_contrasts = \ mulm.design_matrix(formula=formula_full, data=data) # mask of terms in residualize formula within full model self.mask = np.array([ cont for term, cont in self.t_contrasts.items() if term in res_terms ]).sum(axis=0) == 1
def get_design_mat(self, data): design_mat, t_contrasts, f_contrasts = \ mulm.design_matrix(formula=self.formula_full, data=data) assert np.all([ self.t_contrasts[k] == t_contrasts[k] for k in self.t_contrasts ]), "new data doesn't" return design_mat
def __init__(self, data=None, formula_res=None, formula_full=None, contrast_res=None): """ Parameters ---------- data: DataFrame DataFrame containing column to build the design matrix (default None). formula_res: str Residualisation formula. Ex: "site" (default None). formula_full: str Full model (formula) of residualisation containing other variables to adjust for. Ex.: "site + age + sex" (default None). cont_res: boolean array the contrast for residualisation (matches formula_res). Ex: [False, True, False, False]. The default None corresponds to True everywhere. """ if isinstance(data, pd.DataFrame) and isinstance(formula_res, str): if formula_full is None: formula_full = formula_res self.formula_full = formula_full res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys() _, self.t_contrasts, self.f_contrasts = \ mulm.design_matrix(formula=formula_full, data=data) # mask of terms in residualize formula within full model self.contrast_res = np.array([ cont for term, cont in self.t_contrasts.items() if term in res_terms ]).sum(axis=0) == 1 else: self.contrast_res = contrast_res
def univ_stats(Y, formula, data): """ Parameters ---------- Y: array (n_subjects, n_features) formula: str eg. "age + sex + site" data: DataFrame, containing value of formula terms """ X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula, data=data) mod_mulm = mulm.MUOLS(Y, X).fit() aov_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts) return mod_mulm, aov_mulm
imgs_test = datasets['validation_vbm'].squeeze() print(imgs_train.shape, imgs_test.shape) # Read mask mask_img = nibabel.load(os.path.join(WD, "data", "mni_cerebrum-mask.nii.gz")) mask_arr = mask_img.get_fdata() != 0 # Apply mask X_train = imgs_train[:, mask_arr] X_test = imgs_test[:, mask_arr] ################################################################################ # Univariate statistics # --------------------- Z_train, t_contrasts, f_contrasts = mulm.design_matrix(formula="sex + age", data=demo_train) mod_mulm = mulm.MUOLS(Y=X_train, X=Z_train).fit() def flat_to_img(mask_img, flat_values): val_arr = np.zeros(mask_img.get_fdata().shape) val_arr[mask_img.get_fdata() != 0] = flat_values.squeeze() return nilearn.image.new_img_like(mask_img, val_arr) tstat_sex, pval_sex, df_sex = mod_mulm.t_test(t_contrasts['sex'], pval=True) tstat_sex_img = flat_to_img(mask_img, tstat_sex.squeeze()) nilearn.plotting.plot_stat_map(tstat_sex_img, title="sex") tstat_age, pval_age, df_age = mod_mulm.t_test(t_contrasts['age'], pval=True) tstat_age_img = flat_to_img(mask_img, tstat_age.squeeze())
################################################################################ # Example 1: Salary dataset # ------------------------- # # Fit model a single model: `salary ~ experience + education + management` url = 'https://github.com/duchesnay/pystatsml/raw/master/datasets/salary_table.csv' df = pd.read_csv(url) ################################################################################ # Fit with MULM Y = np.asarray(df.salary)[:, None].astype(float) X, t_contrasts, f_contrasts = mulm.design_matrix(formula="experience + education + management", data=df) mod_mulm = mulm.MUOLS(Y, X).fit() tstat_mulm = OrderedDict((term, mod_mulm.t_test(t_contrasts[term], pval=True)) for term in t_contrasts) fstat_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts) print(mod_mulm.coef) print(pd.DataFrame(tstat_mulm, index=['tstat', 'pval', 'df']).T) # print(pd.DataFrame(fstat_mulm, index=['fstat', 'pval']).T) ################################################################################ # Fit with statsmodel mod_sm = smfrmla.ols('salary ~ experience + education + management', df).fit() print(mod_sm.summary())