def getPopURL(pop, karyo, hash=None, onlyPop=False, onlyKaryo=False): if not hash: hash = MEGA.getHash(MEGA.getIndivs(pop)) if onlyPop: return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, pop) elif onlyKaryo: return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, karyo) else: return '<a href="pop.py?md5=%s&k=%s">%s (%s)<a>' % (hash, karyo, pop, karyo)
def getPrFileName(self, pop, stat, cut, supp=None): if stat == "iHS": fName = "iHS-%1.2f.top" % (cut / 100.0) inds = self.study.getStatIndivs("iHS", pop) md5 = MEGA.getHash(inds) elif stat == "xpEHH": fName = "xpEHH-%s-%1.2f.top" % (supp, cut / 100.0) inds = self.study.getStatIndivs("xpEHH", pop) md5 = MEGA.getHash(inds) return md5, fName
def doxpEHH(study, force, pop, supp): logging.info("Starting xpEHH %s %s", pop, supp) shutil.rmtree("xpEHH", True) os.mkdir("xpEHH") os.chdir("xpEHH") setIndivs = study.getStatIndivs("xpEHH", pop) suppSetIndivs = study.getStatIndivs("xpEHH", supp) logging.info("numIndivs: %d %d", len(setIndivs), len(suppSetIndivs)) hash = MEGA.getHash(setIndivs) hashSupp = MEGA.getHash(suppSetIndivs) myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash logging.info("xpEHH: %s hash: %s phase: %s", pop, hash, study.getPhasePop("xpEHH", pop)) if not force: if os.path.isfile(myDir + "/%s-22.uxpEHH" % (supp,)): logging.info("Already done and not forced") os.chdir("..") return name = "study/%s/xpEHH/%s" % (study.name, pop) sql.addId(name, hash) suppName = "study/%s/xpEHH/%s-support" % (study.name, supp) sql.addId(suppName, hashSupp) stats.doBasicInfoSet(setIndivs, force) stats.doBasicInfoSet(suppSetIndivs, force) w = open(tmpDir + "/xpEHH/inds", "w") for famId, sampId in setIndivs: w.write("%s\t%s\n" % (famId, sampId)) w.close() w = open(tmpDir + "/xpEHH/suppinds", "w") for famId, sampId in suppSetIndivs: w.write("%s\t%s\n" % (famId, sampId)) w.close() os.system("python3 %s/doHaploStats.py prepareData %s %s suppinds %s %s xpEHH" % (MEGA.haploScripts, karyo.karyotype, study.name, study.xpEHHConf["source"], study.getPhasePop("xpEHH", supp))) for k in range(1, 23 + 1): shutil.copyfile("%d.hap" % (k,), "s%d.hap" % (k,)) os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s xpEHH" % ( MEGA.haploScripts, karyo.karyotype, study.name, study.xpEHHConf["source"], study.getPhasePop("xpEHH", pop),)) os.system("python3 %s/doHaploStats.py XPEHH %s" % ( MEGA.haploScripts, karyo.karyotype)) for k in range(1, 23 + 1): shutil.copyfile("%d.xpEHH" % (k,), myDir + "/%s-%d.uxpEHH" % (supp, k,)) os.chdir("..") logging.info("xpEHH Done") return hash
def getOvFileNames(self, name): for stat, cut, pop in self.getOvComponents(name): if stat == "iHS": fName = "iHS-%1.2f.top" % (cut / 100.0) inds = self.study.getStatIndivs("iHS", pop) md5 = MEGA.getHash(inds) yield md5, fName
def doChro(myHash, chro): studyCase = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash, chro) ), True) refCases = {} for ref in refPops: myHash = MEGA.getHash(study.getStatIndivs("iHS", ref)) refCases[ref] = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash, chro) )) for rs, pos, f0, i1, i2 in studyCase: val = i1 if f0>0.5 else i2 comparisons = [] for refCase in list(refCases.values()): try: rf0, ri1, ri2 = refCase[rs] except KeyError: continue if rf0>0.5: comparisons.append(ri1) else: comparisons.append(ri2) if len(comparisons)==0: continue comparisons.sort() refVal = comparisons[len(comparisons)/2] if val== 0 or refVal == 0: continue stat = math.log(val/refVal) stat = val/refVal print(chro, rs, pos, val, refVal, stat, f0)
def doPop(study, force, pop): setIndivs = study.pops.getIndivs(pop) hash = MEGA.getHash(setIndivs) if addPopHash(study.cacheDir + "/pops", pop, hash) or force: name = "study/%s/%s" % (study.name, pop) sql.addId(name, hash) stats.doBasicInfoSet(setIndivs, force)
def getDfFileNames(self, name): mdNames = [] for stat, cut, pop in self.getDfComponents(name): if stat == "iHS": fName = "iHS-%1.2f.top" % (cut / 100.0) inds = self.study.getStatIndivs("iHS", pop) md5 = MEGA.getHash(inds) mdNames.append((md5, fName)) return mdNames
def getIndivs(self, pop): try: if pop not in self.indsPop: self.indsPop[pop] = [ind for ind in MEGA.getIndivs(pop) if ind not in self.alwaysRemove] for addPop in self.addPops.get(pop, []): popInds = MEGA.getIndivs(addPop) self.indsPop[pop].extend([ind for ind in popInds if ind not in self.alwaysRemove]) for i, ind in enumerate(self.addInds.get(pop, [])): if ind not in self.alwaysRemove: self.indsPop[pop].append(ind) for ind in self.delInds.get(pop, []): self.indsPop[pop].remove(ind) except ValueError: logging.error("pop getIndivs %s %s %s" % ( self.study.name, pop, ind)) return self.indsPop[pop]
def doSNPList(comparisons, comparison): stat, cut = comparisons.getSLStat(comparison) popInfos = list(comparisons.getSLPopsInfo(comparison)) SNPLists = MEGA.getSNPLists() SNPListNames = list(SNPLists.keys()) for name in SNPListNames: w = open("cmp.sl.%s.%s" % (name, comparison), "w") w.write("\t\t\t") popInfos = list(comparisons.getGLPopsInfo(comparison)) for popInfo in popInfos: w.write("\t%s" % (popInfo,)) w.write("\n") SNPs = SNPLists[name] winContentSNP = {} winContentGenes = {} winSpot = {} for i in range(len(SNPs)): winContentSNP.setdefault((SNPs[i][1], SNPs[i][2] - SNPs[i][2] % 200000), []).append(SNPs[i]) for popInfo in popInfos: popName = popInfo[0] if len(popInfo) > 1: suppPop = popInfo[1] else: suppPop = None md5, fName = comparisons.getGLFileName(popName, stat, cut, suppPop) f = open("%s/sets/%s/%s/%s" % (MEGA.cacheDB, args.karyo, md5, fName)) for l in f: toks = l.rstrip().split("\t") chro = int(toks[0]) pos = int(toks[1]) if (chro, pos) not in list(winContentSNP.keys()): continue if len(toks) > 4: windMarkers = toks[4].split(" ") else: windMarkers = [] winContentGenes[(chro, pos)] = windMarkers winSpot.setdefault((chro, pos), []).append(popName) for chro, pos in winSpot: w.write("%d\t%d\t%s\t%s" % ( chro, pos, " ".join([x[0] + " " + str(x[2]) for x in winContentSNP[chro, pos]]), " ".join(winContentGenes[chro, pos]))) for popInfo in popInfos: popName = popInfo[0] w.write("\t") if popName in winSpot[(chro, pos)]: w.write("X") w.write("\n") w.close()
def doBundle(bundle, pops, sizes): myIndivs = [] for i in range(len(pops)-1): myIndivs.extend(MEGA.getIndivs(pops[i])) doPop(pops[i], sizes[i], MEGA.getIndivs(pops[i])) try: os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"])) except OSError: pass #Already exists, OK bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop]) try: os.mkdir(bundleDir) except OSError: pass #Already exists, OK w = open(bundleDir + os.sep + "basic", "w") w.write(str(sizes[-1]) + "\n") for i in range(len(pops)-1): w.write("\t".join([pops[i], str(sizes[i])])) w.write("\n") w.close() w = open(bundleDir + os.sep + "indivs", "w") w.write("\n".join(map(lambda x:str(x), myIndivs)) + "\n") w.close()
def doBundle(bundle, pops, sizes): myIndivs = [] for i in range(len(pops) - 1): myIndivs.extend(MEGA.getIndivs(pops[i])) doPop(pops[i], sizes[i], MEGA.getIndivs(pops[i])) try: os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"])) except OSError: pass #Already exists, OK bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop]) try: os.mkdir(bundleDir) except OSError: pass #Already exists, OK w = open(bundleDir + os.sep + "basic", "w") w.write(str(sizes[-1]) + "\n") for i in range(len(pops) - 1): w.write("\t".join([pops[i], str(sizes[i])])) w.write("\n") w.close() w = open(bundleDir + os.sep + "indivs", "w") w.write("\n".join(map(lambda x: str(x), myIndivs)) + "\n") w.close()
def preload(self): #debug overrides #startingmap = 'map1'#'map3'#'map2'#'map1' ##init self.F_options = MEGA.mega2('Data\\Configuration.MEGA') ##init options smap = self.F_options.fetch( 'OPTIONS.txt') ##get map options before map load #print(self.F_options.fetch('OPTIONS.txt')) #input(smap) self.currentmap_data = Class_maphandler.maphandler( smap[0].split(' = ')[1]) #(startingmap) self.currentmap_data.PS_BGM(True) ##sfx setup self.soundhandler_SFX.LoadSND('HS_SE_000.wav', 0) #testing
def doBasicInfoSet(indivs, force=True): popHash = MEGA.getHash(indivs) size = len(indivs) popDir = os.sep.join([MEGA.cacheDB, "sets", karyo.karyotype, popHash]) try: os.makedirs(popDir) except OSError: pass # Already exists, OK if (not force) and path.exists(popDir + os.sep + "basic"): return w = open(popDir + os.sep + "basic", "w") w.write(str(size) + "\n") w.close() w = open(popDir + os.sep + "indivs", "w") for fam, ind in indivs: w.write("\t".join([fam, ind]) + "\n") w.close() return popHash
def __init__(self,mapfile): self.clear() ##cpyd from mega if mapfile[-5:].upper() == '.MEGA': print('is') else: mapfile+='.MEGA' ## if os.path.isfile(self.mpath+mapfile): self.name = mapfile self.data_mega = MEGA.mega2(self.mpath+mapfile) ##processing self.mega_process() self.process_collision() else: print('LOG /maphandler- no file specified or bad file given') print('________________ file given - '+str(self.mpath+mapfile))
def doIBD(studyName): myStudy = study.Study(studyName) w = open("ibd.problems", "w") for pop in myStudy.pops.pops: md5 = MEGA.getHash(myStudy.pops.getIndivs(pop)) try: for rec in plink.parseGenome(open(MEGA.cacheDB + "/sets/" + args.karyo + "/" + md5 + "/genome")): if rec["PI_HAT"] >= 0.125: w.write("\t".join([rec["FID1"], rec["IID1"], rec["FID2"], rec["IID2"], rec["RT"], rec["EZ"], str(rec["Z0"]), str(rec["Z1"]), str(rec["Z2"]), str(rec["PI_HAT"]), str(rec["PHE"]), str(rec["DST"]), str(rec["PPC"]), str(rec["RATIO"])]) + "\n") except IOError: pass w.close()
def sendPreamble(md5, studyName, other=None, withMaps=False): print("Content-Type: text/html") # HTML is following print() # blank line, end of headers if other: name = other elif md5: name = MEGA.getNameFromMD5(md5) else: name = studyName print("<header>") print("<title>%s</title>" % (name,)) if withMaps: print('<link rel="stylesheet" href="http://cdn.leafletjs.com/leaflet-0.4/leaflet.css" />') print("</header><body>") if withMaps: print('<script src="http://cdn.leafletjs.com/leaflet-0.4/leaflet.js"></script>') print('<a href="%s/mega.py">Main page</a>' % MEGA.webRoot)
def doiHS(study, force, pop): logging.info("Starting iHS") shutil.rmtree("iHS", True) os.mkdir("iHS") os.chdir("iHS") setIndivs = study.getStatIndivs("iHS", pop) logging.info("numIndivs: %d", len(setIndivs)) hash = MEGA.getHash(setIndivs) myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash logging.info("iHS: %s hash: %s phase: %s", pop, hash, study.getPhasePop("iHS", pop)) if not force: if os.path.isfile(myDir + "/22.uiHS"): logging.info("Already done and not forced") os.chdir("..") return name = "study/%s/iHS/%s" % (study.name, pop) sql.addId(name, hash) stats.doBasicInfoSet(setIndivs, force) w = open(tmpDir + "/iHS/inds", "w") for famId, sampId in setIndivs: w.write("%s\t%s\n" % (famId, sampId)) w.close() os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s" % (MEGA.haploScripts, karyo.karyotype, study.name, study.iHSConf["source"], study.getPhasePop("iHS", pop))) os.system("python3 %s/doHaploStats.py iHS %s %s" % (MEGA.haploScripts, karyo.karyotype, study.name)) for k in range(1, 23 + 1): try: shutil.copyfile("%d.uiHS" % k, myDir + "/%d.uiHS" % k) except IOError: pass os.chdir("..") logging.info("iHS Done") return hash
def getIndivsPop(self, pop, cut, withPop=True, doDistant=False): indivs = self.study.pops.getIndivs(pop) popDir = MEGA.getHash(indivs) distantIndivs = [] if doDistant: f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + popDir + "/distant." + str(cut)) for l in f: famId, sampId = tuple(l.rstrip().split("\t")) distantIndivs.append((famId, sampId)) f.close() f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + popDir + "/ibd.retain." + str(cut)) indivs = [] for l in f: famId, sampId, pop = tuple(l.rstrip().split(" ")[:3]) if (famId, sampId) not in distantIndivs: if withPop: indivs.append((famId, sampId, pop)) else: indivs.append((famId, sampId)) f.close() return indivs
def doGeneList(comparisons, comparison): stat, cut = comparisons.getGLStat(comparison) popInfos = list(comparisons.getGLPopsInfo(comparison)) geneLists = MEGA.getGeneLists() geneListNames = list(geneLists.keys()) geneListNames.sort() w = open("cmp.gl.%s" % (comparison,), "w") for listName in geneListNames: w.write("\t" + listName) w.write("\n") for popInfo in popInfos: myGenes = {} for listName in geneLists: myGenes[listName] = [] popName = popInfo[0] w.write(popName) if len(popInfo) > 1: suppPop = popInfo[1] w.write(" (%s)" % suppPop) else: suppPop = None md5, fName = comparisons.getGLFileName(popName, stat, cut, suppPop) f = open("%s/sets/%s/%s/%s" % (MEGA.cacheDB, args.karyo, md5, fName)) for l in f: toks = l.rstrip().split("\t") if len(toks) > 4: windMarkers = toks[4].split(" ") for listName, myList in list(geneLists.items()): for marker in windMarkers: if marker in myList: myGenes[listName].append(marker) for listName in geneListNames: myGeneList = list(set(myGenes[listName])) w.write("\t" + " ".join(myGeneList)) w.write("\n") w.close()
print("<h1>%s</h1>" % (studyName,)) print(study.desc) print("<br>") print(study.summary) print("<h2>Map</h2>") web.addMap(study.pops.pops) print("<h2>Populations</h2>") cuts = study.ibd.cuts print('<table border="1">') print('<tr><td>Population</td><td>#</td>') for cut in cuts: print('<td>', cut, '</td>') print('</tr>') for pop in study.pops.pops: md5 = MEGA.getHash(study.pops.getIndivs(pop)) print("<tr>") print("<td>", web.getPopURL(pop, karyo.karyotype, md5), "</td>") print("<td>", len(study.pops.getIndivs(pop)), "</td>") for cut in cuts: try: print('<td>', len(study.ibd.getIndivsPop(pop, cut)), '</td>') except IOError: print('<td></td>') print("</tr>") print("</td>") print("</tr></table>") print("<br>") print(web.linkFile(studyName, "ibd.problems", "IBD issues (>12.5)", True)) print('<h2>PCA</h2>')
def doZoom(study, force, name): logging.info("Starting Zoom - %s", name) #XXX FORCE try: os.mkdir("zoom") except OSError: pass # Already exists os.chdir("zoom") chro, start, end, pop, ibd = study.zoom.general[name] pops = pop.split("+") indivs = [] for myPop in pops: if ibd: indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False)) else: indivs.extend(study.pops.getIndivs(myPop)) hash = MEGA.getHash(indivs) posAlls = ensembl.getSNPs(chro) poses = {} snps = [] snpAtPos = {} for rs, content in list(posAlls.items()): pos = content[0] if pos >= start and pos <= end: snps.append(rs) poses[rs] = pos snpAtPos[pos] = rs ancAlls = ensembl.getAncs(chro) ancs = {} for snp in snps: ancs[snp] = ancAlls.get(snp, None) gPoses = {} f = open(MEGA.geneticMapDB + "/37-%d.map" % chro) f.readline() for l in f: toks = l.rstrip().split("\t") pos = int(toks[1]) if pos < start: continue if pos > end: break rs = snpAtPos.get(pos, "") if rs == "": continue gPos = float(toks[3]) gPoses[rs] = gPos f.close() #assuming ihs phase.conf exists source = study.iHSConf["source"] refPop = study.getPhasePop("iHS", pop) if refPop != "shapeIt": phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro) else: phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro) inds = [x[1] for x in indivs] project_beagle_phase(gzip.open("%d.gz" % chro, "w"), gzip.open(phasedFile), ind_retain=inds, snp_retain=snps, want_phased=True, is_phased=True) realSNPs = set() f = gzip.open("%d.gz" % chro) f.readline() f.readline() for l in f: toks = l.rstrip().split("\t") realSNPs.add(toks[1]) f.close() stats = study.zoom.stats[name] vals = {} for stat, params in stats: if stat == "iHS": vals["iHS"] = {} myHash = MEGA.getHash(study.getStatIndivs("iHS", pop)) f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + myHash + "/" + str(chro) + ".iHS") for l in f: toks = l.rstrip().split("\t") if toks[0] in realSNPs: vals["iHS"][toks[0]] = float(toks[2]) elif stat == "xpEHH": vals["xpEHH"] = {} myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop)) f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH") for l in f: toks = l.rstrip().split("\t") if toks[0] in realSNPs: vals["xpEHH"][toks[0]] = float(toks[2]) elif stat == "EHH": rsId = params[0] allele = params[1] vals["EHH"] = calcEHH(chro, poses, rsId, allele) statNames = list(vals.keys()) statNames.sort() w = open("zoom.txt", "w") f = gzip.open("%d.gz" % chro) f.readline() header = f.readline().rstrip().split("\t")[2:] w.write("RS\tpos\tgPos\tanc\t") for name in statNames: w.write(name + "\t") w.write("\t".join(header)) w.write("\n") for l in f: toks = l.rstrip().split("\t") rs = toks[1] haplos = toks[2:] w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0), str(gPoses.get(rs, "")), ancs.get(rs, ""))) for name in statNames: w.write(str(vals[name].get(rs, "")) + "\t") w.write("\t".join(haplos)) w.write("\n") w.close() f.close() shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/" + "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
def doIBDs(ibdlist): logging.info("Starting IBD") doSomething = False w = open("lst", "w") popsToDo = {} for study, force, pop in ibdlist: indivs = study.pops.getIndivs(pop) #this is OK, but it is really study.ibd.pops hash = MEGA.getHash(indivs) if not force: doThis = False for cut in study.ibd.cuts: if not os.path.isfile(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/ibd.retain." + str(cut)): doThis = True if not doThis: continue doSomething = True popsToDo[pop] = hash for famId, sampId in indivs: w.write("%s\t%s\t%s\n" % (famId, sampId, pop)) w.close() if doSomething: logging.debug("Running doRelated split") os.system("cat lst |python3 %s/doRelated split" % (MEGA.inbreedingScripts)) logging.debug("Running doRelated IBD") os.system("cat lst |python3 %s/doRelated IBD" % (MEGA.inbreedingScripts)) logging.debug("Running doRelated statIBD") os.system("cat lst |python3 %s/doRelated statIBD > ibdata/statIBD" % (MEGA.inbreedingScripts)) for pop in popsToDo: pop_ = pop.replace(" ", "_") hash = popsToDo[pop] logging.debug("copy %s %s", "ibdata/%s.genome" % (pop_,), MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/genome") shutil.copyfile("ibdata/%s.genome" % (pop_,), MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/genome") for cut in study.ibd.cuts: logging.debug("Running doRelated removeCloserIBD %f", cut) os.system("cat lst |python3 %s/doRelated removeCloserIBD %f" % (MEGA.inbreedingScripts, cut)) ibds = {} sIBD = open("ibdata/statIBD") for l in sIBD: toks = l.strip().rstrip().split("\t") ibds[toks[0]] = "\t".join([x.strip().rstrip() for x in toks[1:]]) sIBD.close() for pop in popsToDo: if pop not in ibds: continue hash = popsToDo[pop] myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash w = open(myDir + "/IBDstat." + str(cut), "w") w.write(ibds[pop] + "\n") w.close() pop_ = pop.replace(" ", "_") logging.debug("copy %s %s", "ibdata/%s.distant" % (pop_,), MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/distant." + str(cut)) shutil.copyfile("ibdata/%s.distant" % (pop_,), MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/distant." + str(cut)) shutil.copyfile("ibdata/%s_ibd.txt" % (pop_,), MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/ibd.retain." + str(cut)) logging.info("IBD Done")
def readWindowFile(f): content = [] for l in f: toks = l.rstrip().split("\t") chro = int(toks[0]) startPos = int(toks[1]) ensgs = toks[-2].split(" ") hugos = toks[-3].split(" ") content.append((chro, startPos, ensgs, hugos)) f.close() return content windows = readWindowFile(open(args.windows)) if isAmigo: MEGA.getHugo() f = open(MEGA.amigoFile, 'rb') amigo = pickle.load(f) del amigo amigo = pickle.load(f) del amigo amigo = pickle.load(f) del amigo amiGO = pickle.load(f) f.close() else: f = open(MEGA.uniprotFile) uniprot = pickle.load(f) del uniprot uniGO = pickle.load(f) f.close()
import sys import MEGA from MEGA.study import Study import rpy2.robjects as robjects from rpy2.robjects.vectors import DataFrame if len(sys.argv) <= 4: print("%s study admixture.Q famFile pop pop ...") sys.exit(-1) study = Study(sys.argv[1]) admix = sys.argv[2] indList = sys.argv[3] goodOrder = sys.argv[4:] indivPop = MEGA.getPop4Indiv() inds = [] f = open(indList) for l in f: toks = l.split(" ") inds.append((toks[0], toks[1])) f.close() lines = [] f = open(admix) for l in f: #lines.append(map(lambda x: float(x), l.rstrip().split(" "))) lines.append(l) f.close()
def getRefiHS(study, name): md5 = MEGA.getHash(study.getStatIndivs("iHS", name)) return '<a href="%s/pop.py?md5=%s">iHS %s</a><br>' % (MEGA.webRoot, md5, name)
def getRefPop(indivs, karyo, popName=None): md5 = MEGA.getHash(indivs) if not popName: popName = MEGA.getNameFromMD5(md5) return '<a href="%s/pop.py?md5=%s&k=%s">%s</a><br>' % (MEGA.webRoot, md5, karyo, popName)
os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"])) except OSError: pass #Already exists, OK bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop]) try: os.mkdir(bundleDir) except OSError: pass #Already exists, OK w = open(bundleDir + os.sep + "basic", "w") w.write(str(sizes[-1]) + "\n") for i in range(len(pops) - 1): w.write("\t".join([pops[i], str(sizes[i])])) w.write("\n") w.close() w = open(bundleDir + os.sep + "indivs", "w") w.write("\n".join(map(lambda x: str(x), myIndivs)) + "\n") w.close() if len(lines) == 2: #CORE POP size = int(lines[1]) doPop(pop, size, MEGA.getIndivs(pop)) else: pops = [] sizes = [] for line in lines[1:]: toks = line.rstrip().split("\t") pops.append(toks[0].lstrip().rstrip()) sizes.append(int(toks[1])) doBundle(pop, pops, sizes)
def copy(self): for pop in self.pops: indivs = self.study.pops.getIndivs(pop) hash = MEGA.getHash(indivs) shutil.copyfile('hwe-' + pop, '%s/sets/%s/%s/hwe.bz2' % (MEGA.cacheDB, karyo.karyotype, hash))
def getHash(popStudy): return MEGA.getHash(study.pops.getIndivs(popStudy))
import os from __future__ import print_function from igrat import Executor import MEGA lexec = MEGA.executor sources = {} indSrc = {} for ref, fname in MEGA.getPlinkFiles(): sources[ref] = fname indSrc[ref] = [] sp = open(MEGA.metaDB + os.sep + "clean.tab") for l in sp: toks = l.rstrip().split("\t") src = toks[9] if src in sources: indSrc[src].append((toks[0],toks[1])) sp.close() tartuSources = open(MEGA.metaDB + os.sep + "tartu.db").read().splitlines() for src in tartuSources: fName = sources[src] inds = indSrc[src] print(fName) w = open(MEGA.plinkDB + os.sep + fName,"w") for fam, samp in inds: w.write("%s %s\n" % (fam, samp)) w.close()
#!/usr/local/bin/python import os import sys import ConfigParser as config import MEGA if len(sys.argv) not in [2]: print "Usage: %s lcfg" % (sys.argv[0].split("/")[-1], ) sys.exit(-1) cfg = sys.argv[1] MEGA.loadLocal(cfg) metaDB = MEGA.metaDB ibd = MEGA.ibd popCounts = {} for l in open(metaDB + os.sep + "clean.tab"): rec = MEGA.getRecord(l) popCounts.setdefault(rec["pop"], []).append(rec["sampId"]) popNames = popCounts.keys() popNames.sort() for pop in popNames: if pop == "": continue #clean has to sort this (curation is better) sys.stdout.write(pop + "\t" + str(len(popCounts[pop])) + "\t") pop = pop.rstrip() #clean has to sort this ibp = open(ibd + os.sep + pop.replace(" ", "_") + "_keep.txt") sys.stdout.write(str(len(ibp.readlines())) + "\t") rp = open(ibd + os.sep + pop.replace(" ", "_") + "_remove.txt")
def getCase(f): x = [] y = [] for l in f: toks = [x for x in l.rstrip().split("\t") if x != ""] rs = toks[0] pos = int(toks[1]) uihs = float(toks[2]) x.append(pos) y.append(uihs) return x, y study = MEGA.study.Study(studyName) myHash = MEGA.getHash(study.getStatIndivs("iHS", studyPop)) def doChro(myHash, chro): return getCase(open("%s/sets/%s/%d.iHS" % (MEGA.cacheDB, myHash, chro))) x = [] y = [] for i in chros: cx, cy = doChro(myHash, i) print(len(cx)) x.extend(cx) y.extend(cy) print(1)
#!/usr/local/bin/python from __future__ import print_function import os import sys import MEGA if len(sys.argv) not in [2]: print("Usage: %s lcfg" % (sys.argv[0].split("/")[-1],)) sys.exit(-1) cfg = sys.argv[1] MEGA.loadLocal(cfg) metaDB = MEGA.metaDB ibd = MEGA.ibd popCounts = {} for l in open(metaDB + os.sep + "clean.tab"): rec = MEGA.getRecord(l) popCounts.setdefault(rec["pop"], []).append(rec["sampId"]) popNames = list(popCounts.keys()) popNames.sort() for pop in popNames: if pop=="": continue #clean has to sort this (curation is better) sys.stdout.write(pop + "\t" + str(len(popCounts[pop])) + "\t") pop=pop.rstrip() #clean has to sort this ibp = open(ibd + os.sep + pop.replace(" ", "_") + "_keep.txt") sys.stdout.write(str(len(ibp.readlines())) + "\t") rp = open(ibd + os.sep + pop.replace(" ", "_") + "_remove.txt")
def readfile(fname): try: f = open(fname, 'r') data = f.readlines() f.close() except: try: f.close() except: print('error/file already closed!') return data os.chdir('Main_data') m = MEGA.mega2('DATA.MEGA') ford = readfile('MEGAORDER.txt') ##for y in range(len(ford)): ## ford[y] = ford[y].strip('\n') ford = ford[0].strip('\n') print(ford) ford = csv2array(ford) for x in range(len(ford)): dat = readfile(ford[x]) for y in range(len(dat)): dat[y] = dat[y].strip('\n') m.adddata([ford[x], dat]) m.save() print('parsetest')
import sys import MEGA if len(sys.argv)!=2: print "python %s pop" % (sys.argv[0],) sys.exit(-1) pop=sys.argv[1] if pop in MEGA.bundles: print "BUNDLE" myPops = MEGA.bundles[pop] cnt = 0 for myPop in myPops: myLen = len(MEGA.getIndivs(myPop)) print "%20s\t%5d" % (myPop, myLen) cnt += myLen print "%20s\t%5d" %("All", cnt) else: print "CORE POP" print len(MEGA.getIndivs(pop))
print("<tr><td>") print("</td><td>".join(l.rstrip().split("\t"))) print("</td></tr>") print("</table>") print(web.linkFile(studyName, "cmp.pr.%s" % name, "(download)", isStudy=True, fType="text/plain")) print("<br>") print(web.linkFile(studyName, "cmp.p2.%s" % name, "Windows", isStudy=True, fType="text/plain")) elif study.comparisons.getType(name)=="Gene List": os.chdir(MEGA.cacheDB + "/studies/"+studyName) f = open("cmp.gl.%s" % name) print("<table>") for l in f: print("<tr><td>") print("</td><td>".join(l.rstrip().split("\t"))) print("</td></tr>") print("</table>") print(web.linkFile(studyName, "cmp.gl.%s" % name, "(download)", isStudy=True, fType="text/plain")) elif study.comparisons.getType(name)=="SNP List": os.chdir(MEGA.cacheDB + "/studies/"+studyName) snpLists = MEGA.getSNPLists() for lst in snpLists: f = open("cmp.sl.%s.%s" % (lst,name)) print(lst) print("<table border=1>") for l in f: print("<tr><td>") print("</td><td>".join(l.rstrip().split("\t"))) print("</td></tr>") print("</table>") print(web.linkFile(studyName, "cmp.sl.%s.%s" % (lst,name), "(download)", isStudy=True, fType="text/plain"))