Exemple #1
0
def univariate(snps, X, Y, col=pheno_name):
    p = X.shape[1]
    from scipy.stats import pearsonr
    pvals = []
    cors = []
    for i in range(X.shape[1]):
        cor, pval = pearsonr(X[:, i], Y)
        cors.append(cor)
        pvals.append(pval)
    pvals = np.asarray(pvals)
    cors = np.asarray(cors)
    indices = np.where(pvals <= 0.05)
    print "\n"
    print "..... Univariate results"
    print '      numbers of significant p values *un*corrected', len(
        indices[0]), 'over ', p

    import p_value_correction as p_c
    p_corrected = p_c.fdr(pvals)
    w = np.where(p_corrected <= 0.05)[0]
    print '      numbers of significant corrected p values corrected', len(
        w), 'over ', p
    print '     ', snps.measure_ids[w], " pvalcor = ", p_corrected[
        w], " correlation = ", cors[w]
    """lm = LinearRegression()
p = X.shape[1]

#the p_values computation for the univariate approech
from scipy.stats import pearsonr
p_vect = np.array([])
cor_vect = np.array([])
for i in range(X.shape[1]):
    r_row, p_value = pearsonr(X[:, i], Y)
    p_vect = np.hstack((p_vect, p_value))
    cor_vect = np.hstack((cor_vect, r_row))
indices = np.where(p_vect <= 0.05)
print 'numbers of significant p values', len(indices[0]), 'over ', p

#correction of the p_values using the fdr approech
import p_value_correction as p_c
p_corrected = p_c.fdr(p_vect)
indices_c = np.where(p_corrected <= 0.05)
print 's0 : numbers of significant corrected p values', len(
    indices_c[0]), 'over ', p

import matplotlib.pyplot as plt
plt.figure(2)
plt.subplot(211)
plt.hist(p_corrected, 40)
plt.title('corrected p values ')
plt.subplot(212)
plt.hist(p_vect, 40)
plt.title('uncorrected p values ')
plt.show()

import matplotlib.pyplot as plt
def univariate(mask, snps, studyPgS, col='height'):
    # the SNP are X and reordered lines
    X = snps.data[mask, :]
    p = X.shape[1]
    permuter = snps.subject_ids[mask].tolist()
    y = studyPgS.loc[permuter][col]
    covariate = numpy.matrix(
        pandas.get_dummies(studyPgS.loc[permuter]['ScanningCentre'],
                           prefix='Centre')[range(7)])
    print "COVARIATE"
    print covariate
    covariate = numpy.hstack(
        (covariate, numpy.asarray(studyPgS.loc[permuter][['Sex', 'Age']])))
    print "COVARIATE"
    print covariate

    from sklearn.linear_model import LinearRegression
    Y = y - LinearRegression().fit(covariate, y).predict(covariate)

    from scipy.stats import pearsonr
    pvals = []
    cors = []
    for i in range(X.shape[1]):
        cor, pval = pearsonr(X[:, i], Y)
        cors.append(cor)
        pvals.append(pval)
    pvals = numpy.asarray(pvals)
    cors = numpy.asarray(cors)
    indices = numpy.where(pvals <= 0.05)
    print "\n"
    print "..... Univariate results"
    print '      numbers of significant p values *un*corrected', len(
        indices[0]), 'over ', p

    import p_value_correction as p_c
    p_corrected = p_c.fdr(pvals)
    w = numpy.where(p_corrected <= 0.05)[0]
    print '      numbers of significant corrected p values corrected', len(
        w), 'over ', p
    print '     ', snps.measure_ids[w], " pvalcor = ", p_corrected[
        w], " correlation = ", cors[w]

    if col == 'height':
        snps_mask = [
            snps.measure_ids.tolist().index(i) for i in snps.measure_ids[w]
        ]
        subX = snps.data[mask, :][:, snps_mask]
        lm = LinearRegression()
        lm.fit(subX, Y)
        print "\n..... Score explained by the %d significant SNPS is ~ 1.5 percent of the height var" % len(
            w)
        print "      based on: ", subX.shape[0], ' subjects'
        print "      covariate out is sex , age, scanning center"
        print 'lm.score(X, Y)', lm.score(subX, Y)
    else:
        if len(w) > 0:
            print "\n..... Score explained by the %d significant SNPS of the  var" % (
                len(w), col)
            print "      based on: ", subX.shape[0], ' subjects'
            print "      covariate out is sex , age, scanning center"
            print 'lm.score(X, Y)', lm.score(subX, Y)
        else:
            print "\n..... Nothing in %s variability explained by this approach " % (
                col)

    return X, Y
plt.subplot(221)
plt.hist(p_vect_sex_0, 20)
plt.title('uncorrected p values for sex 0')
plt.subplot(222)
plt.hist(p_vect_sex_1, 20)
plt.title('uncorrected p values for sex 1')
plt.subplot(223)
plt.plot(cor_vect_sex_0)
plt.title('correlation p coef sex 0')
plt.subplot(224)
plt.plot(cor_vect_sex_1)
plt.title('correlation coef for sex 1')
plt.show()

import p_value_correction as p_c
p_corrected_sex_0 = p_c.fdr(p_vect_sex_0)
indices_c_sex_0 = np.where(p_corrected_sex_0 <= 0.05)

p_corrected_sex_1 = p_c.fdr(p_vect_sex_1)
indices_c_sex_1 = np.where(p_corrected_sex_1 <= 0.05)

print 's0 : numbers of significant corrected p values', len(
    indices_c_sex_0[0]), 'over ', X_.shape[1]

print 's1 :numbers of significant corrected  p values', len(
    indices_c_sex_1[0]), 'over ', X_.shape[1]

plt.figure(2)
plt.subplot(211)
plt.hist(p_corrected_sex_0, 20)
plt.title('corrected p values for sex 0')
Exemple #5
0
indices_res = np.where(p_vect_res <= 0.05)
print 'numbers of significant p values', len(indices_res[0]), 'over ', p

#
plt.figure(4)
plt.subplot(211)
plt.hist(p_vect_res, 20)
plt.title('uncorrected p values ')
plt.subplot(212)
plt.plot(cor_vect_res)
plt.title('correlation p coef')
plt.show()

import p_value_correction as p_c
p_corrected_res = p_c.fdr(p_vect_res)
indices_c_res = np.where(p_corrected_res <= 0.05)

print 'numbers of significant corrected p values', len(
    indices_c_res[0]), 'over ', p

plt.figure(5)
plt.hist(p_corrected_res, 20)
plt.show()

from sklearn.linear_model import LinearRegression
from sklearn.utils import check_random_state
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
import itertools
import operator
Exemple #6
0
cor_vect_eig_SNP = np.array([])
p_eig_SNP = X_new.shape[1]
for i in range(p_eig_SNP):
    r_row_eig_SNP, p_value_eig_SNP = pearsonr(X_new[:, i], y)
    p_vect_eig_SNP = np.hstack((p_vect_eig_SNP, p_value_eig_SNP))
    cor_vect_eig_SNP = np.hstack((cor_vect_eig_SNP, r_row_eig_SNP))

indices_eig_SNP = np.where(p_vect_eig_SNP <= 0.05)
print 'numbers of significant p values for _eig_SNP', len(
    indices_eig_SNP[0]), 'over ', p_eig_SNP

plt.figure(1)
plt.subplot(211)
plt.hist(p_vect_eig_SNP, 20)
plt.title('uncorrected p values _eig_SNP ')
plt.subplot(212)
plt.plot(cor_vect_eig_SNP)
plt.title('correlation p coef _eig_SNP')
plt.show()

import p_value_correction as p_c
p_corrected_eig_SNP = p_c.fdr(p_vect_eig_SNP)
indices_c_eig_SNP = np.where(p_corrected_eig_SNP <= 0.05)

print 's0 : numbers of significant corrected p values', len(
    indices_c_eig_SNP[0]), 'over ', X_new.shape[1]
plt.figure(2)
plt.hist(p_corrected_eig_SNP)
plt.title('corrected p values for _eig_SNP')
plt.show()