コード例 #1
0
    def test_maxT(self):
        n = 100
        px = 5
        py_info = 2
        py_noize = 100

        beta = np.array([1, 0, -.5] + [0] * (px - 4) + [2]).reshape((px, 1))
        np.random.seed(42)
        X = np.hstack([np.random.randn(n, px - 1), np.ones((n, 1))])
        Y = np.random.randn(n, py_info + py_noize)
        # Causal model: add X on the first py_info variable
        Y[:, :py_info] += np.dot(X, beta)
        contrasts = np.identity(X.shape[1])

        mod = mulm.MUOLS(Y, X).fit()
        tvals, rawp, df = mod.t_test(contrasts, pval=True, two_tailed=True)
        tvals2, maxT, df2 = mod.t_test_maxT(contrasts, two_tailed=True)
        assert np.all(tvals == tvals2)
        assert np.all(df == df2)

        mod_block = mulm.MUOLS(Y, X).fit(block=True, max_elements=1000)
        tvals_block, rawp_block, df_block = mod.t_test(contrasts,
                                                       pval=True,
                                                       two_tailed=True)
        tvals_block2, maxT_block, df_block2 = mod_block.t_test_maxT(
            contrasts, two_tailed=True)

        assert_allclose(tvals_block, tvals_block2)
        assert_allclose(tvals_block2, tvals2)
        assert_allclose(df_block, df_block2)
        assert_allclose(df_block2, df2)

        # More than 10 positive with uncorrected pval
        expected_tp = py_info * 3
        expected_fp = ((py_info + py_noize) * 5 - expected_tp) * 0.05
        expected_p = expected_tp + expected_fp
        # Test the number of rawp positive lie within a expected positive +-10
        assert (np.sum(rawp < 0.05) <
                (expected_p + 10)) and (np.sum(rawp < 0.05) >
                                        (expected_p - 10))
        assert (np.sum(rawp_block < 0.05) <
                (expected_p + 10)) and (np.sum(rawp_block < 0.05) >
                                        (expected_p - 10))

        # Test the number maxT positive lie within a expected true positive +-2
        assert np.sum(maxT < 0.05) < (expected_tp + 2) and np.sum(
            maxT < 0.05) > (expected_tp - 2)
        assert np.sum(maxT_block < 0.05) < (expected_tp + 2) and np.sum(
            maxT_block < 0.05) > (expected_tp - 2)
コード例 #2
0
def univar_stats(Y, X, path_prefix, mask_img):
    contrasts = [1] + [0] * (X.shape[1] - 1)
    mod = mulm.MUOLS(Y, X)
    tvals, pvals, df = mod.fit().t_test(contrasts, pval=True, two_tailed=True)

    print([[thres,
            np.sum(pvals < thres),
            np.sum(pvals < thres) / pvals.size]
           for thres in 10.**np.array([-4, -3, -2])])
    # {'voxsize': 1.5, 'smoothing': 0, 'target': 'dx_num'}
    # [[0.0001, 23068, 0.058190514149063371], [0.001, 47415, 0.11960738808643315], [0.01, 96295, 0.24291033292804132]]

    tstat_arr = np.zeros(mask_arr.shape)
    pvals_arr = np.zeros(mask_arr.shape)

    pvals_arr[mask_arr] = -np.log10(pvals[0])
    tstat_arr[mask_arr] = tvals[0]

    pvals_img = nibabel.Nifti1Image(pvals_arr, affine=mask_img.affine)
    pvals_img.to_filename(path_prefix + "_log10pvals.nii.gz")

    tstat_img = nibabel.Nifti1Image(tstat_arr, affine=mask_img.affine)
    tstat_img.to_filename(path_prefix + "_tstat.nii.gz")

    threshold = 3
    fig = plt.figure(figsize=(13.33, 7.5 * 4))
    ax = fig.add_subplot(411)
    ax.set_title("-log pvalues >%.2f" % threshold)
    plotting.plot_glass_brain(pvals_img,
                              threshold=threshold,
                              figure=fig,
                              axes=ax)

    ax = fig.add_subplot(412)
    ax.set_title("T-stats T>%.2f" % threshold)
    plotting.plot_glass_brain(tstat_img,
                              threshold=threshold,
                              figure=fig,
                              axes=ax)

    ax = fig.add_subplot(413)
    ax.set_title("-log pvalues >%.2f" % threshold)
    plotting.plot_stat_map(pvals_img,
                           colorbar=True,
                           draw_cross=False,
                           threshold=threshold,
                           figure=fig,
                           axes=ax)

    ax = fig.add_subplot(414)
    ax.set_title("T-stats T>%.2f" % threshold)
    plotting.plot_stat_map(tstat_img,
                           colorbar=True,
                           draw_cross=False,
                           threshold=threshold,
                           figure=fig,
                           axes=ax)
    plt.savefig(path_prefix + "_tstat.png")

    return tstat_arr, pvals_arr
コード例 #3
0
def residualize(Y, formula_res, data, formula_full=None):
    """
    Residualisation of adjusted residualization.

    Parameters
    ----------
    Y: array (n, p), dependant variables
    formula_res: str, residualisation formula ex: "site":
    1) Fit  Y = b0 + b1 site + eps
    2) Return Y - b0 - b1 site
    data: DataFrame of independant variables
    formula_full:  str, full model formula (default None) ex: "age + sex + site + diagnosis". If not Null residualize
    performs an adjusted residualization:
    1) Fit Y = b1 age + b2 sex + b3 site + b4 diagnosis + eps
    2) Return Y - b3 site

    Returns
    -------
    Y: array (n, p), of residualized dependant variables
    """
    if formula_full is None:
        formula_full = formula_res

    res_terms = mulm.design_matrix(formula=formula_res, data=data)[1].keys()

    X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula_full, data=data)

    # Fit full model
    mod_mulm = mulm.MUOLS(Y, X).fit()

    # mask of terms in residualize formula within full model
    mask = np.array([cont  for term, cont in t_contrasts.items() if term in res_terms]).sum(axis=0) == 1

    return Y -  np.dot(X[:, mask], mod_mulm.coef[mask, :])
コード例 #4
0
    def test_ttest(self):
        n = 100
        px = 5
        py_info = 2
        py_noize = 100
        beta = np.array([1, 0, -.5] + [0] * (px - 4) + [2]).reshape((px, 1))
        X = np.hstack([np.random.randn(n, px - 1),
                       np.ones((n, 1))])  # X with intercept
        Y = np.random.randn(n, py_info + py_noize)
        # Causal model: add X on the first py_info variable
        Y[:, :py_info] += np.dot(X, beta)
        # Two-tailed t-test all the regressors
        contrasts = np.identity(X.shape[1])

        ## OLS with statmodels, need to iterate over Y columns
        sm_tvals = list()
        sm_pvals = list()
        for j in range(Y.shape[1]):
            mod = sm.OLS(Y[:, j], X)
            sm_ttest = mod.fit().t_test(contrasts)
            sm_tvals.append(sm_ttest.tvalue)
            sm_pvals.append(sm_ttest.pvalue)
        sm_tvals = np.asarray(sm_tvals).T
        sm_pvals = np.asarray(sm_pvals).T
        ## OLS with MULM two-tailed
        mod = mulm.MUOLS(Y, X).fit()
        mulm_tvals, mulm_pvals, mulm_df = mod.t_test(contrasts,
                                                     pval=True,
                                                     two_tailed=True)

        mod_block = mulm.MUOLS(Y, X).fit(block=True, max_elements=1000)
        mulm_tvals_block, mulm_pvals_block, mulm_df_block = mod_block.t_test(
            contrasts, pval=True, two_tailed=True)

        # Check that results are similar
        assert_almost_equal(mulm_tvals, sm_tvals)
        assert_almost_equal(mulm_pvals, sm_pvals)

        assert_allclose(mulm_tvals, mulm_tvals_block)
        assert_allclose(mulm_pvals, mulm_pvals_block)
        assert_allclose(mulm_df, mulm_df_block)
コード例 #5
0
    def fit(self, Y, design_mat):
        """
        Y: array (n, p)
            Dependant variables

        design_mat: array(n, k)
            Design matrix of independant variables
        """
        assert Y.shape[0] == design_mat.shape[0]
        assert self.mask.shape[0] == design_mat.shape[1]
        self.mod_mulm = mulm.MUOLS(Y, design_mat).fit()
        return self
コード例 #6
0
def univ_stats(Y, formula, data):
    """
    Parameters
    ----------
    Y: array (n_subjects, n_features)
    formula: str eg. "age + sex + site"
    data: DataFrame, containing value of formula terms

    """
    X, t_contrasts, f_contrasts = mulm.design_matrix(formula=formula, data=data)
    mod_mulm = mulm.MUOLS(Y, X).fit()
    aov_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts)

    return mod_mulm, aov_mulm
コード例 #7
0
    def test_ttest_ftest_vs_statsmodels(self):
        url = 'https://github.com/duchesnay/pystatsml/raw/master/datasets/salary_table.csv'
        df = pd.read_csv(url)

        # Fit with statmodel
        oneway = smfrmla.ols('salary ~ experience + education + management',
                             df).fit()
        aov = sm.stats.anova_lm(oneway, typ=2)  # Type 2 ANOVA DataFrame

        # Fit with MULM
        X_df = pd.get_dummies(df.iloc[:, 1:])
        X = np.asarray(X_df).astype(float)
        Y = np.asarray(df.salary)[:, None].astype(float)

        con_exp = np.zeros((X.shape[1], X.shape[1]))
        con_exp[0, 0] = 1

        con_edu = np.zeros((X.shape[1], X.shape[1]))
        con_edu[[1, 2, 3], [1, 2, 3]] = 1

        con_man = np.zeros((X.shape[1], X.shape[1]))
        con_man[[4, 5], [4, 5]] = 1

        import mulm
        mod = mulm.MUOLS(Y, X).fit()
        tvals_exp, rawp_expt, df = mod.t_test([1, 0, 0, 0, 0, 0],
                                              pval=True,
                                              two_tailed=True)
        fvals_exp, rawp_exp, df = mod.f_test(con_exp, pval=True)
        fvals_edu, rawp_edu, df = mod.f_test(con_edu, pval=True)
        fvals_man, rawp_man, df = mod.f_test(con_man, pval=True)

        assert np.allclose(aov.loc['experience', 'F'], tvals_exp[0]**2)
        assert np.allclose(aov.loc['experience', 'PR(>F)'], rawp_expt[0]**2)

        assert np.allclose(aov.loc['experience', 'F'], fvals_exp[0])
        assert np.allclose(aov.loc['experience', 'PR(>F)'], rawp_exp[0])

        assert np.allclose(aov.loc['education', 'F'], fvals_edu[0])
        assert np.allclose(aov.loc['education', 'PR(>F)'], rawp_edu[0])

        assert np.allclose(aov.loc['management', 'F'], fvals_man[0])
        assert np.allclose(aov.loc['management', 'PR(>F)'], rawp_man[0])
コード例 #8
0
    def fit(self, Y, X):
        """Fit parameters of p linear models where each Y is regressed on X.

        Parameters
        ----------
        Y: array (n, p)
            Dependant variables

        X: array(n, k)
            Design matrix of independant variables
        """
        if self.contrast_res is None:
            self.contrast_res = np.ones(X.shape[1]).astype(bool)

        assert Y.shape[0] == X.shape[0]
        assert self.contrast_res.shape[0] == X.shape[
            1], "contrast doesn't match design matrix"
        self.mod_mulm = mulm.MUOLS(Y, X).fit()
        return self
コード例 #9
0
snp = snps[:, (m-1):(m+1)]
X = np.hstack((snp, cov_util))

#STOP    #to interact with the interpreter

#MUOLS
s_map = np.zeros(images.shape[1])
p_map = np.zeros(images.shape[1])

debut = range(0, images.shape[1], 10000)
fin = debut + [images.shape[1]]
fin = fin[1:]

for d, f in zip(debut, fin):
    print d,f
    bigols = mulm.MUOLS()
    bigols.fit(X, images[:, d:f])
    contrast = [0.,1.,0.,0.]
#    contrast = [0.,1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
    s, p = bigols.stats_t_coefficients(X, images[:,d:f], contrast, pval=True)
    s_map[d:f] = s[:]
    p_map[d:f] = p[:]

template_for_size = os.path.join(PROJECT_DIR, '2013_imagen_bmi', 'data',
                                 'mask', 'mask.nii')
template_for_size_img = ni.load(template_for_size)

image = np.zeros(template_for_size_img.get_data().shape)
image[masked_data_index] = s_map
pn = os.path.join(PROJECT_DIR, 'documents', '2014jan24_Plink',
                  'bbox_stats_7182018_covGenderPDS.nii.gz')
コード例 #10
0
    #Residuals:
    # Min   1Q  Median      3Q     Max
    #-2.7101 -0.4352  0.0232  0.5453  2.1318
    #
    #Coefficients:
    #Estimate Std. Error t value Pr(>|t|)
    #(Intercept) -0.01548 0.10158  -0.152 0.879
    #X[, 1] 0.01158 0.067840.171 0.865
    #
    #Residual standard error: 0.8629 on 362 degrees of freedom
    #Multiple R-squared:  8.051e-05, Adjusted R-squared:  -0.002682
    #F-statistic: 0.02915 on 1 and 362 DF,  p-value: 0.8645
    for k in snp.keys():
        print '\n====SNP : ', k
        x = X[:, snp[k]].reshape((n, -1))
        #transcoding should be performed to be compliant to R convention
        x[x == 2] = 3
        x[x == 0] = 2
        x[x == 3] = 0
        #intercept
        x = np.hstack((x, np.ones((x.shape[0], 1))))
        x[:, -1] = 1
        olser = mulm.MUOLS()
        olser.fit(x, y)
        betas = olser.coef_
        contrast = [1., 0.]
        t, p = olser.stats_t_coefficients(x, y, contrast, pval=True)
        s, p = olser.stats_f_coefficients(x, y, contrast, pval=True)
        print 'betas = ', betas, '\n\n'
        print 'stat-t, stat-f,  p-val model additif= ', t, s, p
# Read mask
mask_img = nibabel.load(os.path.join(WD, "data", "mni_cerebrum-mask.nii.gz"))
mask_arr = mask_img.get_fdata() != 0

# Apply mask
X_train = imgs_train[:, mask_arr]
X_test = imgs_test[:, mask_arr]

################################################################################
# Univariate statistics
# ---------------------

Z_train, t_contrasts, f_contrasts = mulm.design_matrix(formula="sex + age",
                                                       data=demo_train)
mod_mulm = mulm.MUOLS(Y=X_train, X=Z_train).fit()


def flat_to_img(mask_img, flat_values):
    val_arr = np.zeros(mask_img.get_fdata().shape)
    val_arr[mask_img.get_fdata() != 0] = flat_values.squeeze()
    return nilearn.image.new_img_like(mask_img, val_arr)


tstat_sex, pval_sex, df_sex = mod_mulm.t_test(t_contrasts['sex'], pval=True)
tstat_sex_img = flat_to_img(mask_img, tstat_sex.squeeze())
nilearn.plotting.plot_stat_map(tstat_sex_img, title="sex")

tstat_age, pval_age, df_age = mod_mulm.t_test(t_contrasts['age'], pval=True)
tstat_age_img = flat_to_img(mask_img, tstat_age.squeeze())
nilearn.plotting.plot_stat_map(tstat_age_img, title="age")
コード例 #12
0
# 1st model

MODEL = ["Gender", "Age", "VSF", "Scanner_Type"]

design_mat = utils.make_design_matrix(df, regressors=MODEL).as_matrix()

isnan = numpy.isnan(design_mat)
if isnan.any():
    bad_subject_ind = numpy.where(isnan)[0]
    print "Removing subject", bad_subject_ind
    design_mat = numpy.delete(design_mat, bad_subject_ind, axis=0)
    images = numpy.delete(images, bad_subject_ind, axis=0)

# Fit LM & compute residuals
lm = mulm.MUOLS()
lm.fit(X=design_mat, Y=images)
images_pred = lm.predict(X=design_mat)
res = images - images_pred

# Write to file
residual_name = 'masked_images_' + '_'.join(MODEL)
print "Writing images to", residual_name
data_api.write_images(h5file, res, residual_name)

# 2nd model

MODEL = ["Gender", "Age", "VSF", "ImagingCentreCity"]

design_mat = utils.make_design_matrix(df, regressors=MODEL).as_matrix()
コード例 #13
0
Credit: E Duchesnay
"""
import numpy as np
import mulm
import pylab as plt

n = 100
px = 5
py_info = 2
py_noize = 100

beta = np.array([1, 0, .5] + [0] * (px - 4) + [2]).reshape((px, 1))
X = np.hstack([np.random.randn(n, px-1), np.ones((n, 1))]) # X with intercept
Y = np.random.randn(n, py_info + py_noize)
# Causal model: add X on the first py_info variable
Y[:, :py_info] += np.dot(X, beta)

# t-test all the regressors (by default mulm and sm do two-tailed tests)
contrasts = np.identity(X.shape[1])

mod = mulm.MUOLS(Y, X)
tvals, rawp, df = mod.fit().t_test(contrasts, pval=True, two_tailed=True)
tvals, maxT, df2 = mod.t_test_maxT(contrasts, two_tailed=True)
tvals3, minP, df3 = mod.t_test_minP(contrasts, two_tailed=True)

n, bins, patches = plt.hist([rawp[0,:], maxT[0,:], minP[0,:]],
                            color=['blue', 'red', 'green'],
                            label=['rawp','maxT', 'minP'])
plt.legend()
plt.show()
コード例 #14
0
    ]],
    pd.get_dummies(pop_treat_ses01[['site']])
],
                axis=1)

print(Zdf.isnull().sum())

Zdf.loc[Zdf["age_onset"].isnull(), "age_onset"] = Zdf["age_onset"].mean()
print(Zdf.isnull().sum())

Z = np.asarray(Zdf)

## OLS with MULM
contrasts = [1] + [0] * (Zdf.shape[1] - 1)

mod = mulm.MUOLS(XTreat, Z)
tvals, pvals, df = mod.fit().t_test(contrasts, pval=True, two_tailed=True)

print([[thres,
        np.sum(pvals < thres),
        np.sum(pvals < thres) / pvals.size]
       for thres in 10.**np.array([-4, -3, -2])])
# [[0.0001, 34, 8.5521897378753849e-05], [0.001, 333, 0.0008376115243272068], [0.01, 3374, 0.0084867906398798671]]

tstat_arr = np.zeros(mask_arr.shape)
pvals_arr = np.zeros(mask_arr.shape)

pvals_arr[mask_arr] = -np.log10(pvals[0])
tstat_arr[mask_arr] = tvals[0]

pvals_img = nibabel.Nifti1Image(pvals_arr, affine=mask_img.affine)
コード例 #15
0
################################################################################
# Example 1: Salary dataset
# -------------------------
#
# Fit model a single model: `salary ~ experience + education + management`

url = 'https://github.com/duchesnay/pystatsml/raw/master/datasets/salary_table.csv'
df = pd.read_csv(url)


################################################################################
# Fit with MULM

Y = np.asarray(df.salary)[:, None].astype(float)
X, t_contrasts, f_contrasts = mulm.design_matrix(formula="experience + education + management", data=df)
mod_mulm = mulm.MUOLS(Y, X).fit()
tstat_mulm = OrderedDict((term, mod_mulm.t_test(t_contrasts[term], pval=True)) for term in t_contrasts)
fstat_mulm = OrderedDict((term, mod_mulm.f_test(f_contrasts[term], pval=True)) for term in f_contrasts)

print(mod_mulm.coef)
print(pd.DataFrame(tstat_mulm, index=['tstat', 'pval', 'df']).T)
# print(pd.DataFrame(fstat_mulm, index=['fstat', 'pval']).T)


################################################################################
# Fit with statsmodel

mod_sm = smfrmla.ols('salary ~ experience + education + management', df).fit()
print(mod_sm.summary())

fstat_sm = sm.stats.anova_lm(mod_sm, typ=2) # Type 2 ANOVA DataFrame