def gwas(bedSim, bedTest, pheno, h2, outFile, eigenFile, covar): bedSim, pheno = leapUtils._fixupBedAndPheno(bedSim, pheno) bedTest, pheno = leapUtils._fixupBedAndPheno(bedTest, pheno) #Run GWAS logdelta = np.log(1.0/h2 - 1) G0 = (bedSim if eigenFile is None else None) print 'Performing LEAP GWAS...' results_df = fastlmm.association.single_snp(bedTest, pheno, G0=G0, covar=covar, output_file_name=outFile, log_delta=logdelta, cache_file=eigenFile) return results_df
def probit(bed, pheno, h2, prev, eigen, outFile, keepArr, covar, thresholds, nofail, numSkipTopPCs, mineig, hess, recenter, maxFixedIters, epsilon, treatFixedAsRandom=False): bed, pheno = leapUtils._fixupBedAndPheno(bed, pheno) #Extract phenotype if isinstance(pheno, dict): phe = pheno['vals'] else: phe = pheno if (len(phe.shape) == 2): if (phe.shape[1] == 1): phe = phe[:, 0] else: raise Exception('More than one phenotype found') if (keepArr is None): keepArr = np.ones(phe.shape[0], dtype=np.bool) S = eigen['arr_1'] * bed.sid.shape[0] U = eigen['arr_0'] S = np.sqrt(S) goodS = (S > mineig) if (numSkipTopPCs > 0): goodS[-numSkipTopPCs:] = False if (np.sum(~goodS) > 0): print 'Removing', np.sum(~goodS), 'PCs with low variance' G = U[:, goodS] * S[goodS] #Set binary vector pheUnique = np.unique(phe) if (pheUnique.shape[0] != 2): raise Exception('phenotype file has more than two values') pheMean = phe.mean() cases = (phe > pheMean) phe[~cases] = 0 phe[cases] = 1 #run probit regression t = stats.norm(0, 1).isf(prev) if (thresholds is not None): t = thresholds #Recenter G to only consider the unrelated individuals if recenter: G -= np.mean(G[keepArr, :], axis=0) else: G -= np.mean(G, axis=0) numFixedFeatures = 0 if (covar is not None): covar -= covar.mean() covar /= covar.std() covar *= np.mean(np.std(G, axis=0)) G = np.concatenate((covar, G), axis=1) if (not treatFixedAsRandom): numFixedFeatures += covar.shape[1] #Run Probit regression probitThresh = (t if thresholds is None else t[keepArr]) beta = probitRegression(G[keepArr, :], phe[keepArr], probitThresh, bed.sid.shape[0], numFixedFeatures, h2, hess, maxFixedIters, epsilon, nofail) #Predict liabilities for all individuals meanLiab = G.dot(beta) liab = meanLiab.copy() indsToFlip = ((liab <= t) & (phe > 0.5)) | ((liab > t) & (phe < 0.5)) liab[indsToFlip] = stats.norm(0, 1).isf(prev) if (outFile is not None): #save liabilities f = open(outFile + '.liabs', 'w') for ind_i, [fid, iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f' % liab[ind_i]]) + '\n') f.close() #save liabilities after regressing out the fixed effects if (numFixedFeatures > 0): liab_nofixed = liab - G[:, :numFixedFeatures].dot( beta[:numFixedFeatures]) f = open(outFile + '.liab_nofixed', 'w') for ind_i, [fid, iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f' % liab_nofixed[ind_i]]) + '\n') f.close() liab_nofixed2 = meanLiab - G[:, :numFixedFeatures].dot( beta[:numFixedFeatures]) indsToFlip = ((liab_nofixed2 <= t) & (phe > 0.5)) | ((liab_nofixed2 > t) & (phe < 0.5)) liab_nofixed2[indsToFlip] = stats.norm(0, 1).isf(prev) f = open(outFile + '.liab_nofixed2', 'w') for ind_i, [fid, iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f' % liab_nofixed2[ind_i]]) + '\n') f.close() #Return phenotype struct with liabilities liabsStruct = {'header': [None], 'vals': liab, 'iid': bed.iid} return liabsStruct
def probit(bed, pheno, h2, prev, eigen, outFile, keepArr, covar, thresholds, nofail, numSkipTopPCs, mineig, hess, recenter, maxFixedIters, epsilon, treatFixedAsRandom=False): bed, pheno = leapUtils._fixupBedAndPheno(bed, pheno) #Extract phenotype if isinstance(pheno, dict): phe = pheno['vals'] else: phe = pheno if (len(phe.shape)==2): if (phe.shape[1]==1): phe=phe[:,0] else: raise Exception('More than one phenotype found') if (keepArr is None): keepArr = np.ones(phe.shape[0], dtype=np.bool) S = eigen['arr_1'] * bed.sid.shape[0] U = eigen['arr_0'] S = np.sqrt(S) goodS = (S>mineig) if (numSkipTopPCs > 0): goodS[-numSkipTopPCs:] = False if (np.sum(~goodS) > 0): print 'Removing', np.sum(~goodS), 'PCs with low variance' G = U[:, goodS]*S[goodS] #Set binary vector pheUnique = np.unique(phe) if (pheUnique.shape[0] != 2): raise Exception('phenotype file has more than two values') pheMean = phe.mean() cases = (phe>pheMean) phe[~cases] = 0 phe[cases] = 1 #run probit regression t = stats.norm(0,1).isf(prev) if (thresholds is not None): t = thresholds #Recenter G to only consider the unrelated individuals if recenter: G -= np.mean(G[keepArr, :], axis=0) else: G -= np.mean(G, axis=0) numFixedFeatures = 0 if (covar is not None): covar -= covar.mean() covar /= covar.std() covar *= np.mean(np.std(G, axis=0)) G = np.concatenate((covar, G), axis=1) if (not treatFixedAsRandom): numFixedFeatures += covar.shape[1] #Run Probit regression probitThresh = (t if thresholds is None else t[keepArr]) beta = probitRegression(G[keepArr, :], phe[keepArr], probitThresh, bed.sid.shape[0], numFixedFeatures, h2, hess, maxFixedIters, epsilon, nofail) #Predict liabilities for all individuals meanLiab = G.dot(beta) liab = meanLiab.copy() indsToFlip = ((liab <= t) & (phe>0.5)) | ((liab > t) & (phe<0.5)) liab[indsToFlip] = stats.norm(0,1).isf(prev) if (outFile is not None): #save liabilities f = open(outFile+'.liabs', 'w') for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab[ind_i]]) + '\n') f.close() #save liabilities after regressing out the fixed effects if (numFixedFeatures > 0): liab_nofixed = liab - G[:, :numFixedFeatures].dot(beta[:numFixedFeatures]) f = open(outFile+'.liab_nofixed', 'w') for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab_nofixed[ind_i]]) + '\n') f.close() liab_nofixed2 = meanLiab - G[:, :numFixedFeatures].dot(beta[:numFixedFeatures]) indsToFlip = ((liab_nofixed2 <= t) & (phe>0.5)) | ((liab_nofixed2 > t) & (phe<0.5)) liab_nofixed2[indsToFlip] = stats.norm(0,1).isf(prev) f = open(outFile+'.liab_nofixed2', 'w') for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab_nofixed2[ind_i]]) + '\n') f.close() #Return phenotype struct with liabilities liabsStruct = { 'header':[None], 'vals':liab, 'iid':bed.iid } return liabsStruct