예제 #1
0
파일: leap_gwas.py 프로젝트: omerwe/LEAP
def gwas(bedSim, bedTest, pheno, h2, outFile, eigenFile, covar):

	bedSim, pheno = leapUtils._fixupBedAndPheno(bedSim, pheno)
	bedTest, pheno = leapUtils._fixupBedAndPheno(bedTest, pheno)

	#Run GWAS	
	logdelta = np.log(1.0/h2 - 1)
	G0 = (bedSim if eigenFile is None else None)
	print 'Performing LEAP GWAS...'
	results_df = fastlmm.association.single_snp(bedTest, pheno, G0=G0, covar=covar, output_file_name=outFile, log_delta=logdelta, cache_file=eigenFile)
	return results_df
예제 #2
0
def gwas(bedSim, bedTest, pheno, h2, outFile, eigenFile, covar):

	bedSim, pheno = leapUtils._fixupBedAndPheno(bedSim, pheno)
	bedTest, pheno = leapUtils._fixupBedAndPheno(bedTest, pheno)

	#Run GWAS	
	logdelta = np.log(1.0/h2 - 1)
	G0 = (bedSim if eigenFile is None else None)
	print 'Performing LEAP GWAS...'
	results_df = fastlmm.association.single_snp(bedTest, pheno, G0=G0, covar=covar, output_file_name=outFile, log_delta=logdelta, cache_file=eigenFile)
	return results_df
예제 #3
0
def probit(bed,
           pheno,
           h2,
           prev,
           eigen,
           outFile,
           keepArr,
           covar,
           thresholds,
           nofail,
           numSkipTopPCs,
           mineig,
           hess,
           recenter,
           maxFixedIters,
           epsilon,
           treatFixedAsRandom=False):

    bed, pheno = leapUtils._fixupBedAndPheno(bed, pheno)

    #Extract phenotype
    if isinstance(pheno, dict): phe = pheno['vals']
    else: phe = pheno
    if (len(phe.shape) == 2):
        if (phe.shape[1] == 1): phe = phe[:, 0]
        else: raise Exception('More than one phenotype found')
    if (keepArr is None): keepArr = np.ones(phe.shape[0], dtype=np.bool)

    S = eigen['arr_1'] * bed.sid.shape[0]
    U = eigen['arr_0']
    S = np.sqrt(S)
    goodS = (S > mineig)
    if (numSkipTopPCs > 0): goodS[-numSkipTopPCs:] = False
    if (np.sum(~goodS) > 0):
        print 'Removing', np.sum(~goodS), 'PCs with low variance'
    G = U[:, goodS] * S[goodS]

    #Set binary vector
    pheUnique = np.unique(phe)
    if (pheUnique.shape[0] != 2):
        raise Exception('phenotype file has more than two values')
    pheMean = phe.mean()
    cases = (phe > pheMean)
    phe[~cases] = 0
    phe[cases] = 1

    #run probit regression
    t = stats.norm(0, 1).isf(prev)
    if (thresholds is not None): t = thresholds

    #Recenter G	to only consider the unrelated individuals
    if recenter: G -= np.mean(G[keepArr, :], axis=0)
    else: G -= np.mean(G, axis=0)

    numFixedFeatures = 0
    if (covar is not None):
        covar -= covar.mean()
        covar /= covar.std()
        covar *= np.mean(np.std(G, axis=0))
        G = np.concatenate((covar, G), axis=1)
        if (not treatFixedAsRandom): numFixedFeatures += covar.shape[1]

    #Run Probit regression
    probitThresh = (t if thresholds is None else t[keepArr])
    beta = probitRegression(G[keepArr, :], phe[keepArr], probitThresh,
                            bed.sid.shape[0], numFixedFeatures, h2, hess,
                            maxFixedIters, epsilon, nofail)

    #Predict liabilities for all individuals
    meanLiab = G.dot(beta)
    liab = meanLiab.copy()
    indsToFlip = ((liab <= t) & (phe > 0.5)) | ((liab > t) & (phe < 0.5))
    liab[indsToFlip] = stats.norm(0, 1).isf(prev)

    if (outFile is not None):
        #save liabilities
        f = open(outFile + '.liabs', 'w')
        for ind_i, [fid, iid] in enumerate(bed.iid):
            f.write(' '.join([fid, iid, '%0.3f' % liab[ind_i]]) + '\n')
        f.close()

        #save liabilities after regressing out the fixed effects
        if (numFixedFeatures > 0):
            liab_nofixed = liab - G[:, :numFixedFeatures].dot(
                beta[:numFixedFeatures])
            f = open(outFile + '.liab_nofixed', 'w')
            for ind_i, [fid, iid] in enumerate(bed.iid):
                f.write(' '.join([fid, iid,
                                  '%0.3f' % liab_nofixed[ind_i]]) + '\n')
            f.close()

            liab_nofixed2 = meanLiab - G[:, :numFixedFeatures].dot(
                beta[:numFixedFeatures])
            indsToFlip = ((liab_nofixed2 <= t) &
                          (phe > 0.5)) | ((liab_nofixed2 > t) & (phe < 0.5))
            liab_nofixed2[indsToFlip] = stats.norm(0, 1).isf(prev)
            f = open(outFile + '.liab_nofixed2', 'w')
            for ind_i, [fid, iid] in enumerate(bed.iid):
                f.write(' '.join([fid, iid,
                                  '%0.3f' % liab_nofixed2[ind_i]]) + '\n')
            f.close()

    #Return phenotype struct with liabilities
    liabsStruct = {'header': [None], 'vals': liab, 'iid': bed.iid}
    return liabsStruct
예제 #4
0
def probit(bed, pheno, h2, prev, eigen, outFile, keepArr, covar, thresholds, nofail,
				numSkipTopPCs, mineig, hess, recenter, maxFixedIters, epsilon, treatFixedAsRandom=False):
				
	bed, pheno = leapUtils._fixupBedAndPheno(bed, pheno)
				
	#Extract phenotype
	if isinstance(pheno, dict):	phe = pheno['vals']
	else: phe = pheno		
	if (len(phe.shape)==2):
		if (phe.shape[1]==1): phe=phe[:,0]
		else: raise Exception('More than one phenotype found')		
	if (keepArr is None): keepArr = np.ones(phe.shape[0], dtype=np.bool)				
				
	S = eigen['arr_1'] * bed.sid.shape[0]
	U = eigen['arr_0']
	S = np.sqrt(S)
	goodS = (S>mineig)
	if (numSkipTopPCs > 0): goodS[-numSkipTopPCs:] = False
	if (np.sum(~goodS) > 0): print 'Removing', np.sum(~goodS), 'PCs with low variance'	
	G = U[:, goodS]*S[goodS]
	
	#Set binary vector
	pheUnique = np.unique(phe)
	if (pheUnique.shape[0] != 2): raise Exception('phenotype file has more than two values')
	pheMean = phe.mean()
	cases = (phe>pheMean)
	phe[~cases] = 0
	phe[cases] = 1

	#run probit regression
	t = stats.norm(0,1).isf(prev)
	if (thresholds is not None): t = thresholds

	#Recenter G	to only consider the unrelated individuals
	if recenter: G -= np.mean(G[keepArr, :], axis=0)
	else: G -= np.mean(G, axis=0)
	
	numFixedFeatures = 0
	if (covar is not None):
		covar -= covar.mean()
		covar /= covar.std()
		covar *= np.mean(np.std(G, axis=0))
		G = np.concatenate((covar, G), axis=1)
		if (not treatFixedAsRandom): numFixedFeatures += covar.shape[1]

	#Run Probit regression
	probitThresh = (t if thresholds is None else t[keepArr])
	beta = probitRegression(G[keepArr, :], phe[keepArr], probitThresh, bed.sid.shape[0], numFixedFeatures, h2, hess, maxFixedIters, epsilon, nofail)

	#Predict liabilities for all individuals
	meanLiab = G.dot(beta)		
	liab = meanLiab.copy()
	indsToFlip = ((liab <= t) & (phe>0.5)) | ((liab > t) & (phe<0.5))
	liab[indsToFlip] = stats.norm(0,1).isf(prev)
	
	if (outFile is not None):
		#save liabilities
		f = open(outFile+'.liabs', 'w')
		for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab[ind_i]]) + '\n')		
		f.close()

		#save liabilities after regressing out the fixed effects
		if (numFixedFeatures > 0):
			liab_nofixed = liab - G[:, :numFixedFeatures].dot(beta[:numFixedFeatures])
			f = open(outFile+'.liab_nofixed', 'w')
			for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab_nofixed[ind_i]]) + '\n')		
			f.close()
			
			liab_nofixed2 = meanLiab - G[:, :numFixedFeatures].dot(beta[:numFixedFeatures])
			indsToFlip = ((liab_nofixed2 <= t) & (phe>0.5)) | ((liab_nofixed2 > t) & (phe<0.5))
			liab_nofixed2[indsToFlip] = stats.norm(0,1).isf(prev)
			f = open(outFile+'.liab_nofixed2', 'w')
			for ind_i,[fid,iid] in enumerate(bed.iid): f.write(' '.join([fid, iid, '%0.3f'%liab_nofixed2[ind_i]]) + '\n')		
			f.close()	
			
	#Return phenotype struct with liabilities
	liabsStruct = {
		'header':[None],
		'vals':liab,
		'iid':bed.iid
	}
	return liabsStruct