Esempio n. 1
0
def doZoom(study, force, name):
    logging.info("Starting Zoom - %s", name)
    #XXX FORCE
    try:
        os.mkdir("zoom")
    except OSError:
        pass  # Already exists
    os.chdir("zoom")
    chro, start, end, pop, ibd = study.zoom.general[name]
    pops = pop.split("+")
    indivs = []
    for myPop in pops:
        if ibd:
            indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False))
        else:
            indivs.extend(study.pops.getIndivs(myPop))
    hash = MEGA.getHash(indivs)
    posAlls = ensembl.getSNPs(chro)
    poses = {}
    snps = []
    snpAtPos = {}
    for rs, content in list(posAlls.items()):
        pos = content[0]
        if pos >= start and pos <= end:
            snps.append(rs)
            poses[rs] = pos
            snpAtPos[pos] = rs
    ancAlls = ensembl.getAncs(chro)
    ancs = {}
    for snp in snps:
        ancs[snp] = ancAlls.get(snp, None)

    gPoses = {}
    f = open(MEGA.geneticMapDB + "/37-%d.map" % chro)
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        pos = int(toks[1])
        if pos < start:
            continue
        if pos > end:
            break
        rs = snpAtPos.get(pos, "")
        if rs == "":
            continue
        gPos = float(toks[3])
        gPoses[rs] = gPos
    f.close()

    #assuming ihs phase.conf exists
    source = study.iHSConf["source"]
    refPop = study.getPhasePop("iHS", pop)
    if refPop != "shapeIt":
        phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro)
    else:
        phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro)
    inds = [x[1] for x in indivs]
    project_beagle_phase(gzip.open("%d.gz" % chro, "w"),
                         gzip.open(phasedFile),
                         ind_retain=inds, snp_retain=snps,
                         want_phased=True, is_phased=True)

    realSNPs = set()
    f = gzip.open("%d.gz" % chro)
    f.readline()
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        realSNPs.add(toks[1])
    f.close()

    stats = study.zoom.stats[name]
    vals = {}
    for stat, params in stats:
        if stat == "iHS":
            vals["iHS"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("iHS", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + str(chro) + ".iHS")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["iHS"][toks[0]] = float(toks[2])
        elif stat == "xpEHH":
            vals["xpEHH"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["xpEHH"][toks[0]] = float(toks[2])
        elif stat == "EHH":
            rsId = params[0]
            allele = params[1]
            vals["EHH"] = calcEHH(chro, poses, rsId, allele)

    statNames = list(vals.keys())
    statNames.sort()
    w = open("zoom.txt", "w")
    f = gzip.open("%d.gz" % chro)
    f.readline()
    header = f.readline().rstrip().split("\t")[2:]
    w.write("RS\tpos\tgPos\tanc\t")
    for name in statNames:
        w.write(name + "\t")
    w.write("\t".join(header))
    w.write("\n")
    for l in f:
        toks = l.rstrip().split("\t")
        rs = toks[1]
        haplos = toks[2:]
        w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0),
                str(gPoses.get(rs, "")), ancs.get(rs, "")))
        for name in statNames:
            w.write(str(vals[name].get(rs, "")) + "\t")
        w.write("\t".join(haplos))
        w.write("\n")
    w.close()
    f.close()
    shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype +
                    "/" + hash + "/" +
                    "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
import sys

from MEGA import ensembl


if len(sys.argv) != 2:
    print("python %s chro" % sys.argv[0])

chro = int(sys.argv[1])

posAlls = ensembl.getSNPs(chro)
pos2rs = {}
for rs, rsInfo in list(posAlls.items()):
    pos2rs[rsInfo[0]] = rs


w = sys.stdout
fp = sys.stdin
w.write(fp.readline())  # header 1
w.write(fp.readline())  # header 2
posin = 0
posout = 0
for l in fp:
    toks = l.rstrip().replace("\t", " ").split(" ")
    pos = toks[1].split(":")[1]
    if pos in pos2rs:
        posin += 1
        toks[1] = str(pos2rs[pos])
        w.write("\t".join(toks) + "\n")
    else:
        posout += 1