def prepPop2(allInds, database, refPop, isXPEHH): f = open(allInds) w = open(allInds + "_", "w") for l in f: toks = l.rstrip().replace(" ", "\t").split("\t") w.write(toks[1] + "\n") w.close() for chro in range(1, 23 + 1): # Human dependent if not karyo.accept(chro, 1): continue if refPop != "shapeIt": lexec.submit("python3", "%s/beagle2ihs.py %s/%s/%s-%d.gz %s tmp-%d %s/37-%d.map %d" % (MEGA.phasingScripts, MEGA.phaseDB, database, refPop, chro, allInds, chro, MEGA.geneticMapDB, chro, chro)) else: lexec.submit("python3", "%s/beagle2ihs.py %s/%s/%d.gz %s tmp-%d %s/37-%d.map %d" % (MEGA.phasingScripts, MEGA.phaseDB, database, chro, allInds, chro, MEGA.geneticMapDB, chro, chro)) lexec.wait(True) for chro in range(1, 23 + 1): # Human dependent if not karyo.accept(chro, 1): continue if isXPEHH: os.rename('tmp-%d.hap' % chro, '%d.hap' % chro) os.rename('tmp-%d.map' % chro, '%d.map' % chro) else: removeFixed('tmp-%d' % chro, '%d' % chro)
removeFixed('tmp-%d' % chro, '%d' % chro) if command == 'prepareData': studyName = sys.argv[3] allInds = sys.argv[4] source = sys.argv[5] refPop = sys.argv[6] if len(sys.argv) > 7: isXPEHH = True else: isXPEHH = False prepPop2(allInds, source, refPop, isXPEHH) elif command == 'iHS': for k in range(1, maxChro + 1): if not karyo.accept(k, 1): # needs whole chromosome accepted continue input = "%d.hap" % (k,) inmap = "%d.map" % (k,) out = "%d.uiHS" % (k,) lexec.out = out lexec.submit("ihs", "%s %s" % (inmap, input)) lexec.wait(True) elif command == 'statIHS': maxBin = sys.argv[3] winSize = sys.argv[4] minSNPs = sys.argv[5] os.system("python3 %s/statIHS.py %s %s %s" % ( MEGA.haploScripts, maxBin, winSize, minSNPs))
end = pos write_block(block, chro, start, end) block += 1 return block if args.step == "prepFST": group = args.group try: os.mkdir(group) except OSError: pass os.chdir(group) os.system('rm gp-* 2>/dev/null') plink.getIndivs("../" + indList, "fst", isBinary=False, acceptFun=lambda chro, pos: chro == karyo.groups[group] and karyo.accept(chro, pos), suf=myStudy.fs.sampSuff) os.system("plink --noweb --recode --file fst --maf %f --out gp" % myStudy.fs.MAF) nblocks = slice_plink("gp", args.max) for block in range(nblocks): to_genepop("gp-%d" % block, "conv-%d" % block, popList) elif args.step == "FST": group = args.group os.chdir(group) for i in range(100000): try: os.mkdir(str(i)) except OSError: pass if os.path.exists("conv-%d.gp" % i): os.chdir(str(i))