Exemple #1
0
def getPopURL(pop, karyo, hash=None, onlyPop=False, onlyKaryo=False):
    if not hash:
        hash = MEGA.getHash(MEGA.getIndivs(pop))
    if onlyPop:
        return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, pop)
    elif onlyKaryo:
        return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, karyo)
    else:
        return '<a href="pop.py?md5=%s&k=%s">%s (%s)<a>' % (hash, karyo, pop, karyo)
Exemple #2
0
 def getPrFileName(self, pop, stat, cut, supp=None):
     if stat == "iHS":
         fName = "iHS-%1.2f.top" % (cut / 100.0)
         inds = self.study.getStatIndivs("iHS", pop)
         md5 = MEGA.getHash(inds)
     elif stat == "xpEHH":
         fName = "xpEHH-%s-%1.2f.top" % (supp, cut / 100.0)
         inds = self.study.getStatIndivs("xpEHH", pop)
         md5 = MEGA.getHash(inds)
     return md5, fName
Exemple #3
0
def doxpEHH(study, force, pop, supp):
    logging.info("Starting xpEHH %s %s", pop, supp)
    shutil.rmtree("xpEHH", True)
    os.mkdir("xpEHH")
    os.chdir("xpEHH")
    setIndivs = study.getStatIndivs("xpEHH", pop)
    suppSetIndivs = study.getStatIndivs("xpEHH", supp)
    logging.info("numIndivs: %d %d", len(setIndivs), len(suppSetIndivs))
    hash = MEGA.getHash(setIndivs)
    hashSupp = MEGA.getHash(suppSetIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("xpEHH: %s hash: %s phase: %s", pop, hash,
                 study.getPhasePop("xpEHH", pop))
    if not force:
        if os.path.isfile(myDir + "/%s-22.uxpEHH" % (supp,)):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/xpEHH/%s" % (study.name, pop)
    sql.addId(name, hash)
    suppName = "study/%s/xpEHH/%s-support" % (study.name, supp)
    sql.addId(suppName, hashSupp)
    stats.doBasicInfoSet(setIndivs, force)
    stats.doBasicInfoSet(suppSetIndivs, force)
    w = open(tmpDir + "/xpEHH/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    w = open(tmpDir + "/xpEHH/suppinds", "w")
    for famId, sampId in suppSetIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s suppinds %s %s xpEHH" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.xpEHHConf["source"],
               study.getPhasePop("xpEHH", supp)))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.hap" % (k,),
                        "s%d.hap" % (k,))
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s xpEHH" % (
              MEGA.haploScripts, karyo.karyotype, study.name,
              study.xpEHHConf["source"], study.getPhasePop("xpEHH", pop),))
    os.system("python3 %s/doHaploStats.py XPEHH %s" % (
        MEGA.haploScripts, karyo.karyotype))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.xpEHH" % (k,),
                        myDir + "/%s-%d.uxpEHH" % (supp, k,))
    os.chdir("..")
    logging.info("xpEHH Done")
    return hash
Exemple #4
0
 def getOvFileNames(self, name):
     for stat, cut, pop in self.getOvComponents(name):
         if stat == "iHS":
             fName = "iHS-%1.2f.top" % (cut / 100.0)
             inds = self.study.getStatIndivs("iHS", pop)
             md5 = MEGA.getHash(inds)
         yield md5, fName
Exemple #5
0
def doChro(myHash, chro):
    studyCase = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash, chro) ), True)

    refCases = {}
    for ref in refPops:
        myHash = MEGA.getHash(study.getStatIndivs("iHS", ref))
        refCases[ref] = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash,
            chro) ))

    for rs, pos, f0, i1, i2 in studyCase:
        val = i1 if f0>0.5 else i2
        comparisons = []
        for refCase in list(refCases.values()):
            try:
                rf0, ri1, ri2 = refCase[rs]
            except KeyError:
                continue
            if rf0>0.5:
                comparisons.append(ri1)
            else:
                comparisons.append(ri2)
        if len(comparisons)==0:
            continue
        comparisons.sort()
        refVal = comparisons[len(comparisons)/2]
        if val== 0 or refVal == 0:
            continue
        stat = math.log(val/refVal)
        stat = val/refVal
        print(chro, rs, pos, val, refVal, stat, f0)
Exemple #6
0
def doPop(study, force, pop):
    setIndivs = study.pops.getIndivs(pop)
    hash = MEGA.getHash(setIndivs)
    if addPopHash(study.cacheDir + "/pops", pop, hash) or force:
        name = "study/%s/%s" % (study.name, pop)
        sql.addId(name, hash)
        stats.doBasicInfoSet(setIndivs, force)
Exemple #7
0
 def getDfFileNames(self, name):
     mdNames = []
     for stat, cut, pop in self.getDfComponents(name):
         if stat == "iHS":
             fName = "iHS-%1.2f.top" % (cut / 100.0)
             inds = self.study.getStatIndivs("iHS", pop)
             md5 = MEGA.getHash(inds)
         mdNames.append((md5, fName))
     return mdNames
Exemple #8
0
 def getIndivs(self, pop):
     try:
         if pop not in self.indsPop:
             self.indsPop[pop] = [ind for ind in MEGA.getIndivs(pop)
                                  if ind not in self.alwaysRemove]
             for addPop in self.addPops.get(pop, []):
                 popInds = MEGA.getIndivs(addPop)
                 self.indsPop[pop].extend([ind for ind in popInds
                                           if ind not in self.alwaysRemove])
             for i, ind in enumerate(self.addInds.get(pop, [])):
                 if ind not in self.alwaysRemove:
                     self.indsPop[pop].append(ind)
             for ind in self.delInds.get(pop, []):
                 self.indsPop[pop].remove(ind)
     except ValueError:
         logging.error("pop getIndivs %s %s %s" % (
                       self.study.name, pop, ind))
     return self.indsPop[pop]
def doSNPList(comparisons, comparison):
    stat, cut = comparisons.getSLStat(comparison)
    popInfos = list(comparisons.getSLPopsInfo(comparison))
    SNPLists = MEGA.getSNPLists()
    SNPListNames = list(SNPLists.keys())
    for name in SNPListNames:
        w = open("cmp.sl.%s.%s" % (name, comparison), "w")
        w.write("\t\t\t")
        popInfos = list(comparisons.getGLPopsInfo(comparison))
        for popInfo in popInfos:
            w.write("\t%s" % (popInfo,))
        w.write("\n")
        SNPs = SNPLists[name]
        winContentSNP = {}
        winContentGenes = {}
        winSpot = {}
        for i in range(len(SNPs)):
            winContentSNP.setdefault((SNPs[i][1], SNPs[i][2] -
                                      SNPs[i][2] % 200000),
                                     []).append(SNPs[i])
        for popInfo in popInfos:
            popName = popInfo[0]
            if len(popInfo) > 1:
                suppPop = popInfo[1]
            else:
                suppPop = None

            md5, fName = comparisons.getGLFileName(popName, stat, cut, suppPop)
            f = open("%s/sets/%s/%s/%s" % (MEGA.cacheDB, args.karyo,
                                           md5, fName))
            for l in f:
                toks = l.rstrip().split("\t")
                chro = int(toks[0])
                pos = int(toks[1])
                if (chro, pos) not in list(winContentSNP.keys()):
                    continue
                if len(toks) > 4:
                    windMarkers = toks[4].split(" ")
                else:
                    windMarkers = []
                winContentGenes[(chro, pos)] = windMarkers
                winSpot.setdefault((chro, pos), []).append(popName)
        for chro, pos in winSpot:
            w.write("%d\t%d\t%s\t%s" % (
                    chro, pos,
                    " ".join([x[0] + " " + str(x[2])
                             for x in winContentSNP[chro, pos]]),
                    " ".join(winContentGenes[chro, pos])))
            for popInfo in popInfos:
                popName = popInfo[0]
                w.write("\t")
                if popName in winSpot[(chro, pos)]:
                    w.write("X")

            w.write("\n")
    w.close()
Exemple #10
0
def doBundle(bundle, pops, sizes):
    myIndivs = []
    for i in range(len(pops)-1):
        myIndivs.extend(MEGA.getIndivs(pops[i]))
        doPop(pops[i], sizes[i], MEGA.getIndivs(pops[i]))
    try:
        os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"]))
    except OSError:
        pass #Already exists, OK
    bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop])
    try:
        os.mkdir(bundleDir)
    except OSError:
        pass #Already exists, OK
    w = open(bundleDir + os.sep + "basic", "w")
    w.write(str(sizes[-1]) + "\n")
    for i in range(len(pops)-1):
        w.write("\t".join([pops[i], str(sizes[i])]))
        w.write("\n")
    w.close()
    w = open(bundleDir + os.sep + "indivs", "w")
    w.write("\n".join(map(lambda x:str(x), myIndivs)) + "\n")
    w.close()
Exemple #11
0
def doBundle(bundle, pops, sizes):
    myIndivs = []
    for i in range(len(pops) - 1):
        myIndivs.extend(MEGA.getIndivs(pops[i]))
        doPop(pops[i], sizes[i], MEGA.getIndivs(pops[i]))
    try:
        os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"]))
    except OSError:
        pass  #Already exists, OK
    bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop])
    try:
        os.mkdir(bundleDir)
    except OSError:
        pass  #Already exists, OK
    w = open(bundleDir + os.sep + "basic", "w")
    w.write(str(sizes[-1]) + "\n")
    for i in range(len(pops) - 1):
        w.write("\t".join([pops[i], str(sizes[i])]))
        w.write("\n")
    w.close()
    w = open(bundleDir + os.sep + "indivs", "w")
    w.write("\n".join(map(lambda x: str(x), myIndivs)) + "\n")
    w.close()
Exemple #12
0
 def preload(self):
     #debug overrides
     #startingmap = 'map1'#'map3'#'map2'#'map1'
     ##init
     self.F_options = MEGA.mega2('Data\\Configuration.MEGA')  ##init options
     smap = self.F_options.fetch(
         'OPTIONS.txt')  ##get map options before map load
     #print(self.F_options.fetch('OPTIONS.txt'))
     #input(smap)
     self.currentmap_data = Class_maphandler.maphandler(
         smap[0].split(' = ')[1])  #(startingmap)
     self.currentmap_data.PS_BGM(True)
     ##sfx setup
     self.soundhandler_SFX.LoadSND('HS_SE_000.wav', 0)  #testing
Exemple #13
0
def doBasicInfoSet(indivs, force=True):
    popHash = MEGA.getHash(indivs)
    size = len(indivs)
    popDir = os.sep.join([MEGA.cacheDB, "sets", karyo.karyotype, popHash])
    try:
        os.makedirs(popDir)
    except OSError:
        pass  # Already exists, OK
    if (not force) and path.exists(popDir + os.sep + "basic"):
        return
    w = open(popDir + os.sep + "basic", "w")
    w.write(str(size) + "\n")
    w.close()
    w = open(popDir + os.sep + "indivs", "w")
    for fam, ind in indivs:
        w.write("\t".join([fam, ind]) + "\n")
    w.close()
    return popHash
    def __init__(self,mapfile):
        self.clear()
        ##cpyd from  mega
        if mapfile[-5:].upper() == '.MEGA':
            print('is')
        else:
            mapfile+='.MEGA'
        ##

        if os.path.isfile(self.mpath+mapfile):
            self.name = mapfile
            
            self.data_mega = MEGA.mega2(self.mpath+mapfile)
            ##processing
            self.mega_process()
            self.process_collision()
        else:
            print('LOG /maphandler- no file specified or bad file given')
            print('________________ file given - '+str(self.mpath+mapfile))
Exemple #15
0
def doIBD(studyName):
    myStudy = study.Study(studyName)
    w = open("ibd.problems", "w")
    for pop in myStudy.pops.pops:
        md5 = MEGA.getHash(myStudy.pops.getIndivs(pop))
        try:
            for rec in plink.parseGenome(open(MEGA.cacheDB + "/sets/" +
                                              args.karyo + "/" +
                                              md5 + "/genome")):
                if rec["PI_HAT"] >= 0.125:
                    w.write("\t".join([rec["FID1"], rec["IID1"], rec["FID2"],
                                      rec["IID2"], rec["RT"], rec["EZ"],
                                      str(rec["Z0"]), str(rec["Z1"]),
                                      str(rec["Z2"]), str(rec["PI_HAT"]),
                                      str(rec["PHE"]), str(rec["DST"]),
                                      str(rec["PPC"]), str(rec["RATIO"])]) +
                            "\n")
        except IOError:
            pass
    w.close()
Exemple #16
0
def sendPreamble(md5, studyName, other=None, withMaps=False):
    print("Content-Type: text/html")     # HTML is following
    print()                               # blank line, end of headers

    if other:
        name = other
    elif md5:
        name = MEGA.getNameFromMD5(md5)
    else:
        name = studyName

    print("<header>")
    print("<title>%s</title>" % (name,))
    if withMaps:
        print('<link rel="stylesheet" href="http://cdn.leafletjs.com/leaflet-0.4/leaflet.css" />')
    print("</header><body>")
    if withMaps:
        print('<script src="http://cdn.leafletjs.com/leaflet-0.4/leaflet.js"></script>')

    print('<a href="%s/mega.py">Main page</a>' % MEGA.webRoot)
Exemple #17
0
def doiHS(study, force, pop):
    logging.info("Starting iHS")
    shutil.rmtree("iHS", True)
    os.mkdir("iHS")
    os.chdir("iHS")
    setIndivs = study.getStatIndivs("iHS", pop)
    logging.info("numIndivs: %d", len(setIndivs))
    hash = MEGA.getHash(setIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("iHS: %s hash: %s phase: %s",
                 pop, hash, study.getPhasePop("iHS", pop))
    if not force:
        if os.path.isfile(myDir + "/22.uiHS"):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/iHS/%s" % (study.name, pop)
    sql.addId(name, hash)
    stats.doBasicInfoSet(setIndivs, force)
    w = open(tmpDir + "/iHS/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.iHSConf["source"], study.getPhasePop("iHS", pop)))
    os.system("python3 %s/doHaploStats.py iHS %s %s" % (MEGA.haploScripts,
              karyo.karyotype, study.name))

    for k in range(1, 23 + 1):
        try:
            shutil.copyfile("%d.uiHS" % k, myDir + "/%d.uiHS" % k)
        except IOError:
            pass

    os.chdir("..")
    logging.info("iHS Done")
    return hash
Exemple #18
0
 def getIndivsPop(self, pop, cut, withPop=True, doDistant=False):
     indivs = self.study.pops.getIndivs(pop)
     popDir = MEGA.getHash(indivs)
     distantIndivs = []
     if doDistant:
         f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                  popDir + "/distant." + str(cut))
         for l in f:
             famId, sampId = tuple(l.rstrip().split("\t"))
             distantIndivs.append((famId, sampId))
         f.close()
     f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
              popDir + "/ibd.retain." + str(cut))
     indivs = []
     for l in f:
         famId, sampId, pop = tuple(l.rstrip().split(" ")[:3])
         if (famId, sampId) not in distantIndivs:
             if withPop:
                 indivs.append((famId, sampId, pop))
             else:
                 indivs.append((famId, sampId))
     f.close()
     return indivs
Exemple #19
0
def doGeneList(comparisons, comparison):
    stat, cut = comparisons.getGLStat(comparison)
    popInfos = list(comparisons.getGLPopsInfo(comparison))
    geneLists = MEGA.getGeneLists()
    geneListNames = list(geneLists.keys())
    geneListNames.sort()
    w = open("cmp.gl.%s" % (comparison,), "w")
    for listName in geneListNames:
        w.write("\t" + listName)
    w.write("\n")
    for popInfo in popInfos:
        myGenes = {}
        for listName in geneLists:
            myGenes[listName] = []
        popName = popInfo[0]
        w.write(popName)
        if len(popInfo) > 1:
            suppPop = popInfo[1]
            w.write(" (%s)" % suppPop)
        else:
            suppPop = None

        md5, fName = comparisons.getGLFileName(popName, stat, cut, suppPop)
        f = open("%s/sets/%s/%s/%s" % (MEGA.cacheDB, args.karyo, md5, fName))
        for l in f:
            toks = l.rstrip().split("\t")
            if len(toks) > 4:
                windMarkers = toks[4].split(" ")
                for listName, myList in list(geneLists.items()):
                    for marker in windMarkers:
                        if marker in myList:
                            myGenes[listName].append(marker)
        for listName in geneListNames:
            myGeneList = list(set(myGenes[listName]))
            w.write("\t" + " ".join(myGeneList))
        w.write("\n")
    w.close()
Exemple #20
0
print("<h1>%s</h1>" % (studyName,))
print(study.desc)
print("<br>")
print(study.summary)

print("<h2>Map</h2>")
web.addMap(study.pops.pops)
print("<h2>Populations</h2>")
cuts = study.ibd.cuts
print('<table border="1">')
print('<tr><td>Population</td><td>#</td>')
for cut in cuts:
    print('<td>', cut, '</td>')
print('</tr>')
for pop in study.pops.pops:
    md5 = MEGA.getHash(study.pops.getIndivs(pop))
    print("<tr>")
    print("<td>", web.getPopURL(pop, karyo.karyotype, md5), "</td>")
    print("<td>", len(study.pops.getIndivs(pop)), "</td>")
    for cut in cuts:
        try:
            print('<td>', len(study.ibd.getIndivsPop(pop, cut)), '</td>')
        except IOError:
            print('<td></td>')
    print("</tr>")
print("</td>")
print("</tr></table>")
print("<br>")
print(web.linkFile(studyName, "ibd.problems", "IBD issues (>12.5)", True))

print('<h2>PCA</h2>')
Exemple #21
0
def doZoom(study, force, name):
    logging.info("Starting Zoom - %s", name)
    #XXX FORCE
    try:
        os.mkdir("zoom")
    except OSError:
        pass  # Already exists
    os.chdir("zoom")
    chro, start, end, pop, ibd = study.zoom.general[name]
    pops = pop.split("+")
    indivs = []
    for myPop in pops:
        if ibd:
            indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False))
        else:
            indivs.extend(study.pops.getIndivs(myPop))
    hash = MEGA.getHash(indivs)
    posAlls = ensembl.getSNPs(chro)
    poses = {}
    snps = []
    snpAtPos = {}
    for rs, content in list(posAlls.items()):
        pos = content[0]
        if pos >= start and pos <= end:
            snps.append(rs)
            poses[rs] = pos
            snpAtPos[pos] = rs
    ancAlls = ensembl.getAncs(chro)
    ancs = {}
    for snp in snps:
        ancs[snp] = ancAlls.get(snp, None)

    gPoses = {}
    f = open(MEGA.geneticMapDB + "/37-%d.map" % chro)
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        pos = int(toks[1])
        if pos < start:
            continue
        if pos > end:
            break
        rs = snpAtPos.get(pos, "")
        if rs == "":
            continue
        gPos = float(toks[3])
        gPoses[rs] = gPos
    f.close()

    #assuming ihs phase.conf exists
    source = study.iHSConf["source"]
    refPop = study.getPhasePop("iHS", pop)
    if refPop != "shapeIt":
        phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro)
    else:
        phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro)
    inds = [x[1] for x in indivs]
    project_beagle_phase(gzip.open("%d.gz" % chro, "w"),
                         gzip.open(phasedFile),
                         ind_retain=inds, snp_retain=snps,
                         want_phased=True, is_phased=True)

    realSNPs = set()
    f = gzip.open("%d.gz" % chro)
    f.readline()
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        realSNPs.add(toks[1])
    f.close()

    stats = study.zoom.stats[name]
    vals = {}
    for stat, params in stats:
        if stat == "iHS":
            vals["iHS"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("iHS", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + str(chro) + ".iHS")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["iHS"][toks[0]] = float(toks[2])
        elif stat == "xpEHH":
            vals["xpEHH"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["xpEHH"][toks[0]] = float(toks[2])
        elif stat == "EHH":
            rsId = params[0]
            allele = params[1]
            vals["EHH"] = calcEHH(chro, poses, rsId, allele)

    statNames = list(vals.keys())
    statNames.sort()
    w = open("zoom.txt", "w")
    f = gzip.open("%d.gz" % chro)
    f.readline()
    header = f.readline().rstrip().split("\t")[2:]
    w.write("RS\tpos\tgPos\tanc\t")
    for name in statNames:
        w.write(name + "\t")
    w.write("\t".join(header))
    w.write("\n")
    for l in f:
        toks = l.rstrip().split("\t")
        rs = toks[1]
        haplos = toks[2:]
        w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0),
                str(gPoses.get(rs, "")), ancs.get(rs, "")))
        for name in statNames:
            w.write(str(vals[name].get(rs, "")) + "\t")
        w.write("\t".join(haplos))
        w.write("\n")
    w.close()
    f.close()
    shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype +
                    "/" + hash + "/" +
                    "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
Exemple #22
0
def doIBDs(ibdlist):
    logging.info("Starting IBD")
    doSomething = False
    w = open("lst", "w")
    popsToDo = {}
    for study, force, pop in ibdlist:
        indivs = study.pops.getIndivs(pop)
        #this is OK, but it is really study.ibd.pops
        hash = MEGA.getHash(indivs)
        if not force:
            doThis = False
            for cut in study.ibd.cuts:
                if not os.path.isfile(MEGA.cacheDB + "/sets/" +
                                      karyo.karyotype + "/" + hash +
                                      "/ibd.retain." + str(cut)):
                    doThis = True
            if not doThis:
                continue

        doSomething = True
        popsToDo[pop] = hash
        for famId, sampId in indivs:
            w.write("%s\t%s\t%s\n" % (famId, sampId, pop))
    w.close()
    if doSomething:
        logging.debug("Running doRelated split")
        os.system("cat lst |python3 %s/doRelated split" % (MEGA.inbreedingScripts))
        logging.debug("Running doRelated IBD")
        os.system("cat lst |python3 %s/doRelated IBD" % (MEGA.inbreedingScripts))
        logging.debug("Running doRelated statIBD")
        os.system("cat lst |python3 %s/doRelated statIBD > ibdata/statIBD" %
                  (MEGA.inbreedingScripts))
        for pop in popsToDo:
            pop_ = pop.replace(" ", "_")
            hash = popsToDo[pop]
            logging.debug("copy %s %s",
                          "ibdata/%s.genome" % (pop_,),
                          MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                          hash + "/genome")
            shutil.copyfile("ibdata/%s.genome" % (pop_,),
                            MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                            hash + "/genome")
        for cut in study.ibd.cuts:
            logging.debug("Running doRelated removeCloserIBD %f", cut)
            os.system("cat lst |python3 %s/doRelated removeCloserIBD %f" %
                      (MEGA.inbreedingScripts, cut))
            ibds = {}
            sIBD = open("ibdata/statIBD")
            for l in sIBD:
                toks = l.strip().rstrip().split("\t")
                ibds[toks[0]] = "\t".join([x.strip().rstrip() for x in toks[1:]])
            sIBD.close()
            for pop in popsToDo:
                if pop not in ibds:
                    continue
                hash = popsToDo[pop]
                myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
                w = open(myDir + "/IBDstat." + str(cut), "w")
                w.write(ibds[pop] + "\n")
                w.close()
                pop_ = pop.replace(" ", "_")
                logging.debug("copy %s %s",
                              "ibdata/%s.distant" % (pop_,),
                              MEGA.cacheDB + "/sets/" + karyo.karyotype +
                              "/" + hash + "/distant." + str(cut))
                shutil.copyfile("ibdata/%s.distant" % (pop_,),
                                MEGA.cacheDB + "/sets/" + karyo.karyotype +
                                "/" + hash + "/distant." + str(cut))
                shutil.copyfile("ibdata/%s_ibd.txt" % (pop_,),
                                MEGA.cacheDB + "/sets/" + karyo.karyotype +
                                "/" + hash + "/ibd.retain." + str(cut))
    logging.info("IBD Done")
def readWindowFile(f):
    content = []
    for l in f:
        toks = l.rstrip().split("\t")
        chro = int(toks[0])
        startPos = int(toks[1])
        ensgs = toks[-2].split(" ")
        hugos = toks[-3].split(" ")
        content.append((chro, startPos, ensgs, hugos))
    f.close()
    return content

windows = readWindowFile(open(args.windows))

if isAmigo:
    MEGA.getHugo()
    f = open(MEGA.amigoFile, 'rb')
    amigo = pickle.load(f)
    del amigo
    amigo = pickle.load(f)
    del amigo
    amigo = pickle.load(f)
    del amigo
    amiGO = pickle.load(f)
    f.close()
else:
    f = open(MEGA.uniprotFile)
    uniprot = pickle.load(f)
    del uniprot
    uniGO = pickle.load(f)
    f.close()
import sys
import MEGA
from MEGA.study import Study
import rpy2.robjects as robjects
from rpy2.robjects.vectors import DataFrame

if len(sys.argv) <= 4:
    print("%s study admixture.Q famFile pop pop ...")
    sys.exit(-1)

study = Study(sys.argv[1])
admix = sys.argv[2]
indList = sys.argv[3]
goodOrder = sys.argv[4:]

indivPop = MEGA.getPop4Indiv()

inds = []
f = open(indList)
for l in f:
    toks = l.split(" ")
    inds.append((toks[0], toks[1]))
f.close()

lines = []
f = open(admix)
for l in f:
    #lines.append(map(lambda x: float(x), l.rstrip().split(" ")))
    lines.append(l)
f.close()
Exemple #25
0
def getRefiHS(study, name):
    md5 = MEGA.getHash(study.getStatIndivs("iHS", name))
    return '<a href="%s/pop.py?md5=%s">iHS %s</a><br>' % (MEGA.webRoot,
                                                          md5, name)
Exemple #26
0
def getRefPop(indivs, karyo, popName=None):
    md5 = MEGA.getHash(indivs)
    if not popName:
        popName = MEGA.getNameFromMD5(md5)
    return '<a href="%s/pop.py?md5=%s&k=%s">%s</a><br>' % (MEGA.webRoot,
                                                           md5, karyo, popName)
Exemple #27
0
        os.mkdir(os.sep.join([MEGA.cacheDB, "bundles"]))
    except OSError:
        pass  #Already exists, OK
    bundleDir = os.sep.join([MEGA.cacheDB, "bundles", pop])
    try:
        os.mkdir(bundleDir)
    except OSError:
        pass  #Already exists, OK
    w = open(bundleDir + os.sep + "basic", "w")
    w.write(str(sizes[-1]) + "\n")
    for i in range(len(pops) - 1):
        w.write("\t".join([pops[i], str(sizes[i])]))
        w.write("\n")
    w.close()
    w = open(bundleDir + os.sep + "indivs", "w")
    w.write("\n".join(map(lambda x: str(x), myIndivs)) + "\n")
    w.close()


if len(lines) == 2:  #CORE POP
    size = int(lines[1])
    doPop(pop, size, MEGA.getIndivs(pop))
else:
    pops = []
    sizes = []
    for line in lines[1:]:
        toks = line.rstrip().split("\t")
        pops.append(toks[0].lstrip().rstrip())
        sizes.append(int(toks[1]))
    doBundle(pop, pops, sizes)
Exemple #28
0
 def copy(self):
     for pop in self.pops:
         indivs = self.study.pops.getIndivs(pop)
         hash = MEGA.getHash(indivs)
         shutil.copyfile('hwe-' + pop, '%s/sets/%s/%s/hwe.bz2' %
                         (MEGA.cacheDB, karyo.karyotype, hash))
Exemple #29
0
 def getHash(popStudy):
     return MEGA.getHash(study.pops.getIndivs(popStudy))
import os
from __future__ import print_function

from igrat import Executor
import MEGA

lexec = MEGA.executor

sources = {}
indSrc = {}
for ref, fname in MEGA.getPlinkFiles():
    sources[ref] = fname
    indSrc[ref] = []

sp = open(MEGA.metaDB + os.sep + "clean.tab")
for l in sp:
    toks = l.rstrip().split("\t")
    src = toks[9]
    if src in sources:
        indSrc[src].append((toks[0],toks[1]))
sp.close()

tartuSources = open(MEGA.metaDB + os.sep + "tartu.db").read().splitlines()
for src in tartuSources:
    fName = sources[src]
    inds = indSrc[src]
    print(fName)
    w = open(MEGA.plinkDB + os.sep + fName,"w")
    for fam, samp in inds:
        w.write("%s %s\n" % (fam, samp))
    w.close()
Exemple #31
0
#!/usr/local/bin/python

import os
import sys
import ConfigParser as config
import MEGA

if len(sys.argv) not in [2]:
    print "Usage: %s lcfg" % (sys.argv[0].split("/")[-1], )
    sys.exit(-1)

cfg = sys.argv[1]
MEGA.loadLocal(cfg)
metaDB = MEGA.metaDB
ibd = MEGA.ibd

popCounts = {}

for l in open(metaDB + os.sep + "clean.tab"):
    rec = MEGA.getRecord(l)
    popCounts.setdefault(rec["pop"], []).append(rec["sampId"])

popNames = popCounts.keys()
popNames.sort()
for pop in popNames:
    if pop == "": continue  #clean has to sort this (curation is better)
    sys.stdout.write(pop + "\t" + str(len(popCounts[pop])) + "\t")
    pop = pop.rstrip()  #clean has to sort this
    ibp = open(ibd + os.sep + pop.replace(" ", "_") + "_keep.txt")
    sys.stdout.write(str(len(ibp.readlines())) + "\t")
    rp = open(ibd + os.sep + pop.replace(" ", "_") + "_remove.txt")
Exemple #32
0
def getCase(f):
    x = []
    y = []
    for l in f:
        toks = [x for x in l.rstrip().split("\t") if x != ""]
        rs = toks[0]
        pos = int(toks[1])
        uihs = float(toks[2])
        x.append(pos)
        y.append(uihs)
    return x, y


study = MEGA.study.Study(studyName)
myHash = MEGA.getHash(study.getStatIndivs("iHS", studyPop))


def doChro(myHash, chro):
    return getCase(open("%s/sets/%s/%d.iHS" % (MEGA.cacheDB, myHash, chro)))


x = []
y = []
for i in chros:
    cx, cy = doChro(myHash, i)
    print(len(cx))
    x.extend(cx)
    y.extend(cy)

print(1)
#!/usr/local/bin/python
from __future__ import print_function

import os
import sys
import MEGA

if len(sys.argv) not in [2]:
    print("Usage: %s lcfg" % (sys.argv[0].split("/")[-1],))
    sys.exit(-1)

cfg = sys.argv[1]
MEGA.loadLocal(cfg)
metaDB = MEGA.metaDB
ibd = MEGA.ibd

popCounts = {}

for l in open(metaDB + os.sep + "clean.tab"):
    rec = MEGA.getRecord(l)
    popCounts.setdefault(rec["pop"], []).append(rec["sampId"])

popNames = list(popCounts.keys())
popNames.sort()
for pop in popNames:
    if pop=="": continue #clean has to sort this (curation is better)
    sys.stdout.write(pop + "\t" +  str(len(popCounts[pop])) + "\t")
    pop=pop.rstrip() #clean has to sort this 
    ibp = open(ibd + os.sep + pop.replace(" ", "_") + "_keep.txt")
    sys.stdout.write(str(len(ibp.readlines())) + "\t")
    rp = open(ibd + os.sep + pop.replace(" ", "_") + "_remove.txt")
Exemple #34
0
def readfile(fname):
    try:
        f = open(fname, 'r')
        data = f.readlines()
        f.close()
    except:
        try:
            f.close()
        except:
            print('error/file already closed!')
    return data


os.chdir('Main_data')
m = MEGA.mega2('DATA.MEGA')
ford = readfile('MEGAORDER.txt')
##for y in range(len(ford)):
##    ford[y] = ford[y].strip('\n')
ford = ford[0].strip('\n')
print(ford)
ford = csv2array(ford)

for x in range(len(ford)):
    dat = readfile(ford[x])
    for y in range(len(dat)):
        dat[y] = dat[y].strip('\n')
    m.adddata([ford[x], dat])
m.save()

print('parsetest')
Exemple #35
0
import sys
import MEGA

if len(sys.argv)!=2:
    print "python %s pop" % (sys.argv[0],)
    sys.exit(-1)

pop=sys.argv[1]

if pop in MEGA.bundles:
    print "BUNDLE"
    myPops = MEGA.bundles[pop]
    cnt = 0
    for myPop in myPops:
        myLen = len(MEGA.getIndivs(myPop))
        print "%20s\t%5d" % (myPop, myLen)
        cnt += myLen
    print "%20s\t%5d" %("All", cnt)
else:
    print "CORE POP"
    print len(MEGA.getIndivs(pop))
        print("<tr><td>")
        print("</td><td>".join(l.rstrip().split("\t")))
        print("</td></tr>")
    print("</table>")
    print(web.linkFile(studyName, "cmp.pr.%s" % name, "(download)", isStudy=True, fType="text/plain"))
    print("<br>")
    print(web.linkFile(studyName, "cmp.p2.%s" % name, "Windows", isStudy=True, fType="text/plain"))
elif study.comparisons.getType(name)=="Gene List":
    os.chdir(MEGA.cacheDB + "/studies/"+studyName)
    f = open("cmp.gl.%s" % name)
    print("<table>")
    for l in f:
        print("<tr><td>")
        print("</td><td>".join(l.rstrip().split("\t")))
        print("</td></tr>")
    print("</table>")
    print(web.linkFile(studyName, "cmp.gl.%s" % name, "(download)", isStudy=True, fType="text/plain"))
elif study.comparisons.getType(name)=="SNP List":
    os.chdir(MEGA.cacheDB + "/studies/"+studyName)
    snpLists = MEGA.getSNPLists()
    for lst in snpLists:
        f = open("cmp.sl.%s.%s" % (lst,name))
        print(lst)
        print("<table border=1>")
        for l in f:
            print("<tr><td>")
            print("</td><td>".join(l.rstrip().split("\t")))
            print("</td></tr>")
        print("</table>")
        print(web.linkFile(studyName, "cmp.sl.%s.%s" % (lst,name), "(download)", isStudy=True, fType="text/plain"))