コード例 #1
0
ファイル: impute.py プロジェクト: zhimenggan/jcvi
def passthrough(args):
    """
    %prog passthrough chrY.vcf chrY.new.vcf

    Pass through Y and MT vcf.
    """
    p = OptionParser(passthrough.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    vcffile, newvcffile = args
    fp = open(vcffile)
    fw = open(newvcffile, "w")
    gg = ["0/0", "0/1", "1/1"]
    for row in fp:
        if row[0] == "#":
            print(row.strip(), file=fw)
            continue

        v = VcfLine(row)
        v.filter = "PASS"
        v.format = "GT:GP"
        probs = [0] * 3
        probs[gg.index(v.genotype)] = 1
        v.genotype = v.genotype.replace("/", "|") + \
                ":{0}".format(",".join("{0:.3f}".format(x) for x in probs))
        print(v, file=fw)
    fw.close()
コード例 #2
0
ファイル: impute.py プロジェクト: zhimenggan/jcvi
def validate(args):
    """
    %prog validate imputed.vcf withheld.vcf

    Validate imputation against withheld variants.
    """
    p = OptionParser(validate.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    imputed, withheld = args
    register = {}
    fp = open(withheld)
    for row in fp:
        if row[0] == "#":
            continue
        v = VcfLine(row)
        register[(v.seqid, v.pos)] = v.genotype

    logging.debug("Imported {0} records from `{1}`".\
                    format(len(register), withheld))

    fp = must_open(imputed)
    hit = concordant = 0
    seen = set()
    for row in fp:
        if row[0] == "#":
            continue
        v = VcfLine(row)
        chr, pos, genotype = v.seqid, v.pos, v.genotype
        if (chr, pos) in seen:
            continue
        seen.add((chr, pos))
        if (chr, pos) not in register:
            continue
        truth = register[(chr, pos)]
        imputed = genotype.split(":")[0]
        if "|" in imputed:
            imputed = "/".join(sorted(genotype.split(":")[0].split("|")))
            #probs = [float(x) for x in genotype.split(":")[-1].split(",")]
            #imputed = max(zip(probs, ["0/0", "0/1", "1/1"]))[-1]
        hit += 1
        if truth == imputed:
            concordant += 1
        else:
            print(row.strip(), "truth={0}".format(truth), file=sys.stderr)

    logging.debug("Total concordant: {0}".\
            format(percentage(concordant, hit)))
コード例 #3
0
def mitocompile(args):
    """
    %prog mitcompile *.vcf.gz

    Extract information about deletions in vcf file.
    """
    from jcvi.formats.vcf import VcfLine
    from six.moves.urllib.parse import parse_qsl

    p = OptionParser(mitocompile.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    vcfs = args
    print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split()))
    for i, vcf in enumerate(vcfs):
        if (i + 1) % 100 == 0:
            logging.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs))))
        depthfile = vcf.replace(".sv.vcf.gz", ".depth")
        fp = must_open(depthfile)
        chrm, depth = fp.next().split()
        depth = int(float(depth))
        samplekey = op.basename(vcf).split("_")[0]

        fp = must_open(vcf)
        for row in fp:
            if row[0] == "#":
                continue
            v = VcfLine(row)
            info = dict(parse_qsl(v.info))
            print(
                "\t".join(
                    str(x)
                    for x in (
                        vcf,
                        samplekey,
                        depth,
                        v.seqid,
                        v.pos,
                        v.alt,
                        info.get("SVLEN"),
                        info["PE"],
                        info["SR"],
                    )
                )
            )