Beispiel #1
0
 def getPrFileName(self, pop, stat, cut, supp=None):
     if stat == "iHS":
         fName = "iHS-%1.2f.top" % (cut / 100.0)
         inds = self.study.getStatIndivs("iHS", pop)
         md5 = MEGA.getHash(inds)
     elif stat == "xpEHH":
         fName = "xpEHH-%s-%1.2f.top" % (supp, cut / 100.0)
         inds = self.study.getStatIndivs("xpEHH", pop)
         md5 = MEGA.getHash(inds)
     return md5, fName
Beispiel #2
0
def doxpEHH(study, force, pop, supp):
    logging.info("Starting xpEHH %s %s", pop, supp)
    shutil.rmtree("xpEHH", True)
    os.mkdir("xpEHH")
    os.chdir("xpEHH")
    setIndivs = study.getStatIndivs("xpEHH", pop)
    suppSetIndivs = study.getStatIndivs("xpEHH", supp)
    logging.info("numIndivs: %d %d", len(setIndivs), len(suppSetIndivs))
    hash = MEGA.getHash(setIndivs)
    hashSupp = MEGA.getHash(suppSetIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("xpEHH: %s hash: %s phase: %s", pop, hash,
                 study.getPhasePop("xpEHH", pop))
    if not force:
        if os.path.isfile(myDir + "/%s-22.uxpEHH" % (supp,)):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/xpEHH/%s" % (study.name, pop)
    sql.addId(name, hash)
    suppName = "study/%s/xpEHH/%s-support" % (study.name, supp)
    sql.addId(suppName, hashSupp)
    stats.doBasicInfoSet(setIndivs, force)
    stats.doBasicInfoSet(suppSetIndivs, force)
    w = open(tmpDir + "/xpEHH/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    w = open(tmpDir + "/xpEHH/suppinds", "w")
    for famId, sampId in suppSetIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s suppinds %s %s xpEHH" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.xpEHHConf["source"],
               study.getPhasePop("xpEHH", supp)))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.hap" % (k,),
                        "s%d.hap" % (k,))
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s xpEHH" % (
              MEGA.haploScripts, karyo.karyotype, study.name,
              study.xpEHHConf["source"], study.getPhasePop("xpEHH", pop),))
    os.system("python3 %s/doHaploStats.py XPEHH %s" % (
        MEGA.haploScripts, karyo.karyotype))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.xpEHH" % (k,),
                        myDir + "/%s-%d.uxpEHH" % (supp, k,))
    os.chdir("..")
    logging.info("xpEHH Done")
    return hash
Beispiel #3
0
 def getOvFileNames(self, name):
     for stat, cut, pop in self.getOvComponents(name):
         if stat == "iHS":
             fName = "iHS-%1.2f.top" % (cut / 100.0)
             inds = self.study.getStatIndivs("iHS", pop)
             md5 = MEGA.getHash(inds)
         yield md5, fName
Beispiel #4
0
def doChro(myHash, chro):
    studyCase = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash, chro) ), True)

    refCases = {}
    for ref in refPops:
        myHash = MEGA.getHash(study.getStatIndivs("iHS", ref))
        refCases[ref] = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash,
            chro) ))

    for rs, pos, f0, i1, i2 in studyCase:
        val = i1 if f0>0.5 else i2
        comparisons = []
        for refCase in list(refCases.values()):
            try:
                rf0, ri1, ri2 = refCase[rs]
            except KeyError:
                continue
            if rf0>0.5:
                comparisons.append(ri1)
            else:
                comparisons.append(ri2)
        if len(comparisons)==0:
            continue
        comparisons.sort()
        refVal = comparisons[len(comparisons)/2]
        if val== 0 or refVal == 0:
            continue
        stat = math.log(val/refVal)
        stat = val/refVal
        print(chro, rs, pos, val, refVal, stat, f0)
Beispiel #5
0
def doPop(study, force, pop):
    setIndivs = study.pops.getIndivs(pop)
    hash = MEGA.getHash(setIndivs)
    if addPopHash(study.cacheDir + "/pops", pop, hash) or force:
        name = "study/%s/%s" % (study.name, pop)
        sql.addId(name, hash)
        stats.doBasicInfoSet(setIndivs, force)
Beispiel #6
0
 def getDfFileNames(self, name):
     mdNames = []
     for stat, cut, pop in self.getDfComponents(name):
         if stat == "iHS":
             fName = "iHS-%1.2f.top" % (cut / 100.0)
             inds = self.study.getStatIndivs("iHS", pop)
             md5 = MEGA.getHash(inds)
         mdNames.append((md5, fName))
     return mdNames
Beispiel #7
0
def getPopURL(pop, karyo, hash=None, onlyPop=False, onlyKaryo=False):
    if not hash:
        hash = MEGA.getHash(MEGA.getIndivs(pop))
    if onlyPop:
        return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, pop)
    elif onlyKaryo:
        return '<a href="pop.py?md5=%s&k=%s">%s<a>' % (hash, karyo, karyo)
    else:
        return '<a href="pop.py?md5=%s&k=%s">%s (%s)<a>' % (hash, karyo, pop, karyo)
Beispiel #8
0
def doBasicInfoSet(indivs, force=True):
    popHash = MEGA.getHash(indivs)
    size = len(indivs)
    popDir = os.sep.join([MEGA.cacheDB, "sets", karyo.karyotype, popHash])
    try:
        os.makedirs(popDir)
    except OSError:
        pass  # Already exists, OK
    if (not force) and path.exists(popDir + os.sep + "basic"):
        return
    w = open(popDir + os.sep + "basic", "w")
    w.write(str(size) + "\n")
    w.close()
    w = open(popDir + os.sep + "indivs", "w")
    for fam, ind in indivs:
        w.write("\t".join([fam, ind]) + "\n")
    w.close()
    return popHash
Beispiel #9
0
def doIBD(studyName):
    myStudy = study.Study(studyName)
    w = open("ibd.problems", "w")
    for pop in myStudy.pops.pops:
        md5 = MEGA.getHash(myStudy.pops.getIndivs(pop))
        try:
            for rec in plink.parseGenome(open(MEGA.cacheDB + "/sets/" +
                                              args.karyo + "/" +
                                              md5 + "/genome")):
                if rec["PI_HAT"] >= 0.125:
                    w.write("\t".join([rec["FID1"], rec["IID1"], rec["FID2"],
                                      rec["IID2"], rec["RT"], rec["EZ"],
                                      str(rec["Z0"]), str(rec["Z1"]),
                                      str(rec["Z2"]), str(rec["PI_HAT"]),
                                      str(rec["PHE"]), str(rec["DST"]),
                                      str(rec["PPC"]), str(rec["RATIO"])]) +
                            "\n")
        except IOError:
            pass
    w.close()
Beispiel #10
0
def doiHS(study, force, pop):
    logging.info("Starting iHS")
    shutil.rmtree("iHS", True)
    os.mkdir("iHS")
    os.chdir("iHS")
    setIndivs = study.getStatIndivs("iHS", pop)
    logging.info("numIndivs: %d", len(setIndivs))
    hash = MEGA.getHash(setIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("iHS: %s hash: %s phase: %s",
                 pop, hash, study.getPhasePop("iHS", pop))
    if not force:
        if os.path.isfile(myDir + "/22.uiHS"):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/iHS/%s" % (study.name, pop)
    sql.addId(name, hash)
    stats.doBasicInfoSet(setIndivs, force)
    w = open(tmpDir + "/iHS/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.iHSConf["source"], study.getPhasePop("iHS", pop)))
    os.system("python3 %s/doHaploStats.py iHS %s %s" % (MEGA.haploScripts,
              karyo.karyotype, study.name))

    for k in range(1, 23 + 1):
        try:
            shutil.copyfile("%d.uiHS" % k, myDir + "/%d.uiHS" % k)
        except IOError:
            pass

    os.chdir("..")
    logging.info("iHS Done")
    return hash
Beispiel #11
0
 def getIndivsPop(self, pop, cut, withPop=True, doDistant=False):
     indivs = self.study.pops.getIndivs(pop)
     popDir = MEGA.getHash(indivs)
     distantIndivs = []
     if doDistant:
         f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                  popDir + "/distant." + str(cut))
         for l in f:
             famId, sampId = tuple(l.rstrip().split("\t"))
             distantIndivs.append((famId, sampId))
         f.close()
     f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
              popDir + "/ibd.retain." + str(cut))
     indivs = []
     for l in f:
         famId, sampId, pop = tuple(l.rstrip().split(" ")[:3])
         if (famId, sampId) not in distantIndivs:
             if withPop:
                 indivs.append((famId, sampId, pop))
             else:
                 indivs.append((famId, sampId))
     f.close()
     return indivs
Beispiel #12
0
def getCase(f):
    x = []
    y = []
    for l in f:
        toks = [x for x in l.rstrip().split("\t") if x != ""]
        rs = toks[0]
        pos = int(toks[1])
        uihs = float(toks[2])
        x.append(pos)
        y.append(uihs)
    return x, y


study = MEGA.study.Study(studyName)
myHash = MEGA.getHash(study.getStatIndivs("iHS", studyPop))


def doChro(myHash, chro):
    return getCase(open("%s/sets/%s/%d.iHS" % (MEGA.cacheDB, myHash, chro)))


x = []
y = []
for i in chros:
    cx, cy = doChro(myHash, i)
    print(len(cx))
    x.extend(cx)
    y.extend(cy)

print(1)
Beispiel #13
0
for path in paths:
    sys.path.insert(0, os.path.expanduser(path))

import MEGA
from MEGA import web

web.sendPreamble(None, None, "Populations")

print("<h2>Populations</h2>")

pops = MEGA.pops
karyos = os.listdir(MEGA.cacheDB + "/sets/")
print('<table border="1">')
curr = 1
print("<tr>")
for pop in pops:
    indivs = MEGA.getIndivs(pop)
    hash = MEGA.getHash(indivs)
    print('<td>')
    print(pop)
    start = False
    for myKaryo in karyos:
        if os.path.isdir('%s/sets/%s/%s' % (MEGA.cacheDB, myKaryo, hash)):
            print("%s" % web.getPopURL(pop, myKaryo, hash, onlyKaryo=True))
    print('</td>')
    if curr % 6 == 0:
        print("</tr><tr>")
    curr += 1
print("</tr>")
print("</table>")
Beispiel #14
0
def doZoom(study, force, name):
    logging.info("Starting Zoom - %s", name)
    #XXX FORCE
    try:
        os.mkdir("zoom")
    except OSError:
        pass  # Already exists
    os.chdir("zoom")
    chro, start, end, pop, ibd = study.zoom.general[name]
    pops = pop.split("+")
    indivs = []
    for myPop in pops:
        if ibd:
            indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False))
        else:
            indivs.extend(study.pops.getIndivs(myPop))
    hash = MEGA.getHash(indivs)
    posAlls = ensembl.getSNPs(chro)
    poses = {}
    snps = []
    snpAtPos = {}
    for rs, content in list(posAlls.items()):
        pos = content[0]
        if pos >= start and pos <= end:
            snps.append(rs)
            poses[rs] = pos
            snpAtPos[pos] = rs
    ancAlls = ensembl.getAncs(chro)
    ancs = {}
    for snp in snps:
        ancs[snp] = ancAlls.get(snp, None)

    gPoses = {}
    f = open(MEGA.geneticMapDB + "/37-%d.map" % chro)
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        pos = int(toks[1])
        if pos < start:
            continue
        if pos > end:
            break
        rs = snpAtPos.get(pos, "")
        if rs == "":
            continue
        gPos = float(toks[3])
        gPoses[rs] = gPos
    f.close()

    #assuming ihs phase.conf exists
    source = study.iHSConf["source"]
    refPop = study.getPhasePop("iHS", pop)
    if refPop != "shapeIt":
        phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro)
    else:
        phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro)
    inds = [x[1] for x in indivs]
    project_beagle_phase(gzip.open("%d.gz" % chro, "w"),
                         gzip.open(phasedFile),
                         ind_retain=inds, snp_retain=snps,
                         want_phased=True, is_phased=True)

    realSNPs = set()
    f = gzip.open("%d.gz" % chro)
    f.readline()
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        realSNPs.add(toks[1])
    f.close()

    stats = study.zoom.stats[name]
    vals = {}
    for stat, params in stats:
        if stat == "iHS":
            vals["iHS"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("iHS", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + str(chro) + ".iHS")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["iHS"][toks[0]] = float(toks[2])
        elif stat == "xpEHH":
            vals["xpEHH"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["xpEHH"][toks[0]] = float(toks[2])
        elif stat == "EHH":
            rsId = params[0]
            allele = params[1]
            vals["EHH"] = calcEHH(chro, poses, rsId, allele)

    statNames = list(vals.keys())
    statNames.sort()
    w = open("zoom.txt", "w")
    f = gzip.open("%d.gz" % chro)
    f.readline()
    header = f.readline().rstrip().split("\t")[2:]
    w.write("RS\tpos\tgPos\tanc\t")
    for name in statNames:
        w.write(name + "\t")
    w.write("\t".join(header))
    w.write("\n")
    for l in f:
        toks = l.rstrip().split("\t")
        rs = toks[1]
        haplos = toks[2:]
        w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0),
                str(gPoses.get(rs, "")), ancs.get(rs, "")))
        for name in statNames:
            w.write(str(vals[name].get(rs, "")) + "\t")
        w.write("\t".join(haplos))
        w.write("\n")
    w.close()
    f.close()
    shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype +
                    "/" + hash + "/" +
                    "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
Beispiel #15
0
def getRefiHS(study, name):
    md5 = MEGA.getHash(study.getStatIndivs("iHS", name))
    return '<a href="%s/pop.py?md5=%s">iHS %s</a><br>' % (MEGA.webRoot,
                                                          md5, name)
Beispiel #16
0
def doIBDs(ibdlist):
    logging.info("Starting IBD")
    doSomething = False
    w = open("lst", "w")
    popsToDo = {}
    for study, force, pop in ibdlist:
        indivs = study.pops.getIndivs(pop)
        #this is OK, but it is really study.ibd.pops
        hash = MEGA.getHash(indivs)
        if not force:
            doThis = False
            for cut in study.ibd.cuts:
                if not os.path.isfile(MEGA.cacheDB + "/sets/" +
                                      karyo.karyotype + "/" + hash +
                                      "/ibd.retain." + str(cut)):
                    doThis = True
            if not doThis:
                continue

        doSomething = True
        popsToDo[pop] = hash
        for famId, sampId in indivs:
            w.write("%s\t%s\t%s\n" % (famId, sampId, pop))
    w.close()
    if doSomething:
        logging.debug("Running doRelated split")
        os.system("cat lst |python3 %s/doRelated split" % (MEGA.inbreedingScripts))
        logging.debug("Running doRelated IBD")
        os.system("cat lst |python3 %s/doRelated IBD" % (MEGA.inbreedingScripts))
        logging.debug("Running doRelated statIBD")
        os.system("cat lst |python3 %s/doRelated statIBD > ibdata/statIBD" %
                  (MEGA.inbreedingScripts))
        for pop in popsToDo:
            pop_ = pop.replace(" ", "_")
            hash = popsToDo[pop]
            logging.debug("copy %s %s",
                          "ibdata/%s.genome" % (pop_,),
                          MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                          hash + "/genome")
            shutil.copyfile("ibdata/%s.genome" % (pop_,),
                            MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                            hash + "/genome")
        for cut in study.ibd.cuts:
            logging.debug("Running doRelated removeCloserIBD %f", cut)
            os.system("cat lst |python3 %s/doRelated removeCloserIBD %f" %
                      (MEGA.inbreedingScripts, cut))
            ibds = {}
            sIBD = open("ibdata/statIBD")
            for l in sIBD:
                toks = l.strip().rstrip().split("\t")
                ibds[toks[0]] = "\t".join([x.strip().rstrip() for x in toks[1:]])
            sIBD.close()
            for pop in popsToDo:
                if pop not in ibds:
                    continue
                hash = popsToDo[pop]
                myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
                w = open(myDir + "/IBDstat." + str(cut), "w")
                w.write(ibds[pop] + "\n")
                w.close()
                pop_ = pop.replace(" ", "_")
                logging.debug("copy %s %s",
                              "ibdata/%s.distant" % (pop_,),
                              MEGA.cacheDB + "/sets/" + karyo.karyotype +
                              "/" + hash + "/distant." + str(cut))
                shutil.copyfile("ibdata/%s.distant" % (pop_,),
                                MEGA.cacheDB + "/sets/" + karyo.karyotype +
                                "/" + hash + "/distant." + str(cut))
                shutil.copyfile("ibdata/%s_ibd.txt" % (pop_,),
                                MEGA.cacheDB + "/sets/" + karyo.karyotype +
                                "/" + hash + "/ibd.retain." + str(cut))
    logging.info("IBD Done")
Beispiel #17
0
def getRefPop(indivs, karyo, popName=None):
    md5 = MEGA.getHash(indivs)
    if not popName:
        popName = MEGA.getNameFromMD5(md5)
    return '<a href="%s/pop.py?md5=%s&k=%s">%s</a><br>' % (MEGA.webRoot,
                                                           md5, karyo, popName)
Beispiel #18
0
 def copy(self):
     for pop in self.pops:
         indivs = self.study.pops.getIndivs(pop)
         hash = MEGA.getHash(indivs)
         shutil.copyfile('hwe-' + pop, '%s/sets/%s/%s/hwe.bz2' %
                         (MEGA.cacheDB, karyo.karyotype, hash))
Beispiel #19
0
print("<h1>%s</h1>" % (studyName,))
print(study.desc)
print("<br>")
print(study.summary)

print("<h2>Map</h2>")
web.addMap(study.pops.pops)
print("<h2>Populations</h2>")
cuts = study.ibd.cuts
print('<table border="1">')
print('<tr><td>Population</td><td>#</td>')
for cut in cuts:
    print('<td>', cut, '</td>')
print('</tr>')
for pop in study.pops.pops:
    md5 = MEGA.getHash(study.pops.getIndivs(pop))
    print("<tr>")
    print("<td>", web.getPopURL(pop, karyo.karyotype, md5), "</td>")
    print("<td>", len(study.pops.getIndivs(pop)), "</td>")
    for cut in cuts:
        try:
            print('<td>', len(study.ibd.getIndivsPop(pop, cut)), '</td>')
        except IOError:
            print('<td></td>')
    print("</tr>")
print("</td>")
print("</tr></table>")
print("<br>")
print(web.linkFile(studyName, "ibd.problems", "IBD issues (>12.5)", True))

print('<h2>PCA</h2>')
Beispiel #20
0
def doChro(myHash, chro, refs):
    studyCase = getCase(open("%s/sets/%s/%d.uiHS" % (MEGA.cacheDB, myHash, chro) ))

    for f0, i1, i2, uihs in studyCase:
        if f0<0.5:
            refs.setdefault(f0,[]).append(uihs)
        else:
            refs.setdefault(f0,[]).append(uihs)
    return refs

for ref in refPops:
    vals = {}
    for i in range(maxChro):
        k = i+1
        myHash = MEGA.getHash(study.getStatIndivs("iHS", ref))
        doChro(myHash, k, vals)

    fs = list(vals.keys())
    fs.sort()
    x = []
    ymax = []
    y20 = []
    ymed = []
    y80 = []
    yvar = []
    ycnt = []
    for f in fs:
        x.append(f)
        yvar.append(numpy.std(vals[f]))
        y20.append(numpy.percentile(vals[f],20))
Beispiel #21
0
 def getHash(popStudy):
     return MEGA.getHash(study.pops.getIndivs(popStudy))