y[snp] = [genos[3], 'A', 'G'] + genos[6:] print >> currbimbam, ", ".join(y) #t1 = time.time() #print t1-t0 currbimbam.close() #genomat = matrix_reader(genodir + 'hutt.imputed.dhssnps.bimbam',sep=",") print "Running GEMMA..." gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.covariates -lmm 4 -maf 0.05 -o curr_' + pheno) t0 = time.time() ifier(gemmer) t1 = time.time() print t1-t0 #currresults = open(genodir + 'output/curr_' + pheno + '.assoc.txt','r') currresults = matrix_reader(genodir + 'output/curr_' + pheno + '.assoc.txt',dtype='f8') currsort = curresults[currresults[:,12].argsort()] currwins = currsort[0:100,] currscores = [0]*len(dhsdic[dhsdic.keys()[0]]) for snp in currwins[:,1]: currscores = currscores + dhsdic[snp] currperms = [0]*len(dhsdic[dhsdic.keys()[0]]) currpermwins = [0]*len(dhsdic[dhsdic.keys()[0]]) curractive = [0]*len(dhsdic[dhsdic.keys()[0]]) print "Running permutations..." for perm in xrange(0,100): tissueps = permer(scores = currscores, circuit = currperms, permwins = currpermwins, actives=curractive) #completedgenes += 1 #t1 = time.time()
genodir = sys.argv[2] outname = sys.argv[3] genor = genodir + 'ByChr/' + outname + '.chr' + chrm + '.genos.txt' anoter = genodir + 'ByChr/' + outname + '.chr' + chrm + '.annot.txt' print "Loading annotations..." sys.stdout.flush() bim = open(genodir + 'ByChr/' + outname + '.chr' + chrm + '.bim','r') bimer = bim.readlines() snpids = [x.strip().split()[1] for x in bimer] snppos = [x.strip().split()[3] for x in bimer] print "Loading genotypes..." sys.stdout.flush() raws = matrix_reader(genodir + 'ByChr/' + outname + '.chr' + chrm + '.raw',sep=" ") #linecounter = subprocess.Popen('wc -l ' + genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw', shell=True, stdout=subprocess.PIPE) #linecount = int(linecounter.communicate()[0].strip().split()[0]) #columncounter = subprocess.Popen('awk -F" " \'{print NF;exit}\' ' + genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw', shell=True, stdout=subprocess.PIPE) #columncount = int(columncounter.communicate()[0].strip().split()[0]) #raws = numpy.zeros((linecount,columncount),dtype='|S2') #rawin = open(genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw','r') #for i,line in enumerate(rawin): # raws[i,:] = line.strip().split() #raws = numpy.loadtxt(genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw',dtype='str') print "Transposing genotypes..." sys.stdout.flush() genorfile = open(genor,'w') for line in range(6,raws.shape[1]):
import sys import os import numpy sys.path.append('/mnt/lustre/home/cusanovich/Programs/') sys.path.append('/mnt/lustre/home/cusanovich/Programs/lib/python2.6/site-packages/') from DarrenTools import ifier, matrix_reader genodir = '/mnt/lustre/home/cusanovich/500HT/Imputed1415/' os.chdir(genodir) #plinker = 'plink --bfile 500HT/Imputed1415/hutt.imputed.rename --recodeA' #ifier(plinker) genomatix = matrix_reader('/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.add.raw',sep=" ") properg = genomatix.T[6:,1:] snps = [x.split('_')[0] for x in genomatix.T[6:,0]] findivs = genomatix.T[1,1:] numpy.savetxt('/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.genos.bimbam', properg, fmt = '%s', delimiter = '\t', newline = '\n') snper = open('/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.snps.bimbam','w') print >> snper, '\n'.join(snps) snper.close() finer = open('/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.findivs.bimbam','w') print >> finer, '\n'.join(findivs) finer.close()
import os import numpy sys.path.append('/mnt/lustre/home/cusanovich/Programs/') sys.path.append( '/mnt/lustre/home/cusanovich/Programs/lib/python2.6/site-packages/') from DarrenTools import ifier, matrix_reader genodir = '/mnt/lustre/home/cusanovich/500HT/Imputed1415/' os.chdir(genodir) #plinker = 'plink --bfile 500HT/Imputed1415/hutt.imputed.rename --recodeA' #ifier(plinker) genomatix = matrix_reader( '/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.add.raw', sep=" ") properg = genomatix.T[6:, 1:] snps = [x.split('_')[0] for x in genomatix.T[6:, 0]] findivs = genomatix.T[1, 1:] numpy.savetxt( '/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.genos.bimbam', properg, fmt='%s', delimiter='\t', newline='\n') snper = open( '/mnt/lustre/home/cusanovich/500HT/Imputed1415/hutt.imputed.rename.snps.bimbam', 'w')
gemmer = (hmdir + 'Programs/gemma0.94 -g ' + genodir + 'perm_curr_' + chrm + '_pc' + str(pcs) + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.pcs.txt' + ' -lmm 4 -maf 0.05 -o perm_curr_' + chrm + '_pc' + str(pcs)) ifier(gemmer) permering = open(genodir + 'output/perm_curr_' + chrm + '_pc' + str(pcs) + '.assoc.txt','r') permers = [x.strip().split()[12] for x in permering.readlines()] permers = [float(x) for x in permers if x != 'nan' and x != 'p_lrt'] permering.close() permlow = min(permers) if permlow <= pmin: winnerperms += 1 if winnerperms == 10: return 11/uniform(perm+2,perm+3) return (winnerperms + 1)/float(10001) print "Loading expression..." if regressPCs: mastercols = matrix_reader('/mnt/lustre/home/cusanovich/500HT/hutt' + mapper + '.' + distance + '.mastercols.txt',dtype='|S15') if not regressPCs: mastercols = matrix_reader('/mnt/lustre/home/cusanovich/500HT/hutt' + mapper + '.' + distance + '.chrmspecific.mastercols.txt',dtype='|S15') masterdic = {} exprcoldic = {} chrmdic = {} for i in range(mastercols.shape[0]): try: masterdic[mastercols[i,0]].append(mastercols[i,1]) except KeyError: masterdic[mastercols[i,0]] = [mastercols[i,1]] exprcoldic[mastercols[i,0]] = mastercols[i,2] chrmdic[mastercols[i,0]] = mastercols[i,4]
glist = open(currfiles + '.gs','w') for snp in masterdic.keys(): #for snp in masterdic.keys()[0:1000]: print >> snplist, snp print >> alist, 'A' print >> glist, 'G' snplist.close() alist.close() glist.close() print "Running permutations..." blocker = open(genodir + 'Block_' + blocknum + 'permwins.txt','w') for perm in xrange(0,100): permer(perm,randind = genoinds) permresults = matrix_reader(genodir + 'output/perm_curr_' + blocknum + '.assoc.txt',dtype='f8') permsort = permresults[permresults[:,12].argsort()] permwins = permsort[0:100,] print >> blocker, '\t'.join(permwins) blocker.close() cleanup = 'rm ' + genodir + '*curr_' + pheno + '.*' ifier(cleanup) print "Writing results..." aller = open('/mnt/lustre/home/cusanovich/500HT/Tissues/' + pheno + '.enrichmentps.txt','w') for i in xrange(0,len(tissueps)): print >> aller, '{0}\t{2:.4g}'.format(dhsdic['rsID'][i],tissueps[i]) aller.close()
genodir = sys.argv[2] outname = sys.argv[3] genor = genodir + 'ByChr/' + outname + '.chr' + chrm + '.genos.txt' anoter = genodir + 'ByChr/' + outname + '.chr' + chrm + '.annot.txt' print "Loading annotations..." sys.stdout.flush() bim = open(genodir + 'ByChr/' + outname + '.chr' + chrm + '.bim', 'r') bimer = bim.readlines() snpids = [x.strip().split()[1] for x in bimer] snppos = [x.strip().split()[3] for x in bimer] print "Loading genotypes..." sys.stdout.flush() raws = matrix_reader(genodir + 'ByChr/' + outname + '.chr' + chrm + '.raw', sep=" ") #linecounter = subprocess.Popen('wc -l ' + genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw', shell=True, stdout=subprocess.PIPE) #linecount = int(linecounter.communicate()[0].strip().split()[0]) #columncounter = subprocess.Popen('awk -F" " \'{print NF;exit}\' ' + genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw', shell=True, stdout=subprocess.PIPE) #columncount = int(columncounter.communicate()[0].strip().split()[0]) #raws = numpy.zeros((linecount,columncount),dtype='|S2') #rawin = open(genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw','r') #for i,line in enumerate(rawin): # raws[i,:] = line.strip().split() #raws = numpy.loadtxt(genodir + 'ByChr/hutt.imputed.chr' + chrm + '.raw',dtype='str') print "Transposing genotypes..." sys.stdout.flush() genorfile = open(genor, 'w') for line in range(6, raws.shape[1]):