def univariate(snps, X, Y, col=pheno_name): p = X.shape[1] from scipy.stats import pearsonr pvals = [] cors = [] for i in range(X.shape[1]): cor, pval = pearsonr(X[:, i], Y) cors.append(cor) pvals.append(pval) pvals = np.asarray(pvals) cors = np.asarray(cors) indices = np.where(pvals <= 0.05) print "\n" print "..... Univariate results" print ' numbers of significant p values *un*corrected', len( indices[0]), 'over ', p import p_value_correction as p_c p_corrected = p_c.fdr(pvals) w = np.where(p_corrected <= 0.05)[0] print ' numbers of significant corrected p values corrected', len( w), 'over ', p print ' ', snps.measure_ids[w], " pvalcor = ", p_corrected[ w], " correlation = ", cors[w] """lm = LinearRegression()
p = X.shape[1] #the p_values computation for the univariate approech from scipy.stats import pearsonr p_vect = np.array([]) cor_vect = np.array([]) for i in range(X.shape[1]): r_row, p_value = pearsonr(X[:, i], Y) p_vect = np.hstack((p_vect, p_value)) cor_vect = np.hstack((cor_vect, r_row)) indices = np.where(p_vect <= 0.05) print 'numbers of significant p values', len(indices[0]), 'over ', p #correction of the p_values using the fdr approech import p_value_correction as p_c p_corrected = p_c.fdr(p_vect) indices_c = np.where(p_corrected <= 0.05) print 's0 : numbers of significant corrected p values', len( indices_c[0]), 'over ', p import matplotlib.pyplot as plt plt.figure(2) plt.subplot(211) plt.hist(p_corrected, 40) plt.title('corrected p values ') plt.subplot(212) plt.hist(p_vect, 40) plt.title('uncorrected p values ') plt.show() import matplotlib.pyplot as plt
def univariate(mask, snps, studyPgS, col='height'): # the SNP are X and reordered lines X = snps.data[mask, :] p = X.shape[1] permuter = snps.subject_ids[mask].tolist() y = studyPgS.loc[permuter][col] covariate = numpy.matrix( pandas.get_dummies(studyPgS.loc[permuter]['ScanningCentre'], prefix='Centre')[range(7)]) print "COVARIATE" print covariate covariate = numpy.hstack( (covariate, numpy.asarray(studyPgS.loc[permuter][['Sex', 'Age']]))) print "COVARIATE" print covariate from sklearn.linear_model import LinearRegression Y = y - LinearRegression().fit(covariate, y).predict(covariate) from scipy.stats import pearsonr pvals = [] cors = [] for i in range(X.shape[1]): cor, pval = pearsonr(X[:, i], Y) cors.append(cor) pvals.append(pval) pvals = numpy.asarray(pvals) cors = numpy.asarray(cors) indices = numpy.where(pvals <= 0.05) print "\n" print "..... Univariate results" print ' numbers of significant p values *un*corrected', len( indices[0]), 'over ', p import p_value_correction as p_c p_corrected = p_c.fdr(pvals) w = numpy.where(p_corrected <= 0.05)[0] print ' numbers of significant corrected p values corrected', len( w), 'over ', p print ' ', snps.measure_ids[w], " pvalcor = ", p_corrected[ w], " correlation = ", cors[w] if col == 'height': snps_mask = [ snps.measure_ids.tolist().index(i) for i in snps.measure_ids[w] ] subX = snps.data[mask, :][:, snps_mask] lm = LinearRegression() lm.fit(subX, Y) print "\n..... Score explained by the %d significant SNPS is ~ 1.5 percent of the height var" % len( w) print " based on: ", subX.shape[0], ' subjects' print " covariate out is sex , age, scanning center" print 'lm.score(X, Y)', lm.score(subX, Y) else: if len(w) > 0: print "\n..... Score explained by the %d significant SNPS of the var" % ( len(w), col) print " based on: ", subX.shape[0], ' subjects' print " covariate out is sex , age, scanning center" print 'lm.score(X, Y)', lm.score(subX, Y) else: print "\n..... Nothing in %s variability explained by this approach " % ( col) return X, Y
plt.subplot(221) plt.hist(p_vect_sex_0, 20) plt.title('uncorrected p values for sex 0') plt.subplot(222) plt.hist(p_vect_sex_1, 20) plt.title('uncorrected p values for sex 1') plt.subplot(223) plt.plot(cor_vect_sex_0) plt.title('correlation p coef sex 0') plt.subplot(224) plt.plot(cor_vect_sex_1) plt.title('correlation coef for sex 1') plt.show() import p_value_correction as p_c p_corrected_sex_0 = p_c.fdr(p_vect_sex_0) indices_c_sex_0 = np.where(p_corrected_sex_0 <= 0.05) p_corrected_sex_1 = p_c.fdr(p_vect_sex_1) indices_c_sex_1 = np.where(p_corrected_sex_1 <= 0.05) print 's0 : numbers of significant corrected p values', len( indices_c_sex_0[0]), 'over ', X_.shape[1] print 's1 :numbers of significant corrected p values', len( indices_c_sex_1[0]), 'over ', X_.shape[1] plt.figure(2) plt.subplot(211) plt.hist(p_corrected_sex_0, 20) plt.title('corrected p values for sex 0')
indices_res = np.where(p_vect_res <= 0.05) print 'numbers of significant p values', len(indices_res[0]), 'over ', p # plt.figure(4) plt.subplot(211) plt.hist(p_vect_res, 20) plt.title('uncorrected p values ') plt.subplot(212) plt.plot(cor_vect_res) plt.title('correlation p coef') plt.show() import p_value_correction as p_c p_corrected_res = p_c.fdr(p_vect_res) indices_c_res = np.where(p_corrected_res <= 0.05) print 'numbers of significant corrected p values', len( indices_c_res[0]), 'over ', p plt.figure(5) plt.hist(p_corrected_res, 20) plt.show() from sklearn.linear_model import LinearRegression from sklearn.utils import check_random_state from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression import itertools import operator
cor_vect_eig_SNP = np.array([]) p_eig_SNP = X_new.shape[1] for i in range(p_eig_SNP): r_row_eig_SNP, p_value_eig_SNP = pearsonr(X_new[:, i], y) p_vect_eig_SNP = np.hstack((p_vect_eig_SNP, p_value_eig_SNP)) cor_vect_eig_SNP = np.hstack((cor_vect_eig_SNP, r_row_eig_SNP)) indices_eig_SNP = np.where(p_vect_eig_SNP <= 0.05) print 'numbers of significant p values for _eig_SNP', len( indices_eig_SNP[0]), 'over ', p_eig_SNP plt.figure(1) plt.subplot(211) plt.hist(p_vect_eig_SNP, 20) plt.title('uncorrected p values _eig_SNP ') plt.subplot(212) plt.plot(cor_vect_eig_SNP) plt.title('correlation p coef _eig_SNP') plt.show() import p_value_correction as p_c p_corrected_eig_SNP = p_c.fdr(p_vect_eig_SNP) indices_c_eig_SNP = np.where(p_corrected_eig_SNP <= 0.05) print 's0 : numbers of significant corrected p values', len( indices_c_eig_SNP[0]), 'over ', X_new.shape[1] plt.figure(2) plt.hist(p_corrected_eig_SNP) plt.title('corrected p values for _eig_SNP') plt.show()