def analyse(args):
    drug_loci = pp.load_bed(args.bed, [6], 4)
    for d in drug_loci:
        drug_loci[d] = [x.lower() for x in drug_loci[d][0].split(";")]
    print(drug_loci)
    drug = args.drug
    results = json.load(open("results.json"))
    fn = defaultdict(int)
    fp = defaultdict(int)
    print("Number true positives: %s" % len(results[drug]["tp"]))
    print("Number false negatives: %s" % len(results[drug]["fn"]))
    print("Number true negatives: %s" % len(results[drug]["tn"]))
    print("Number false positives: %s" % len(results[drug]["fp"]))
    for s in results[drug]["fn"]:
        print("%s\tFalse_negative" % s)
        tmp = json.load(open("%s/%s.results.json" % (args.dir, s)))
        for var in tmp["other_variants"]:
            if not any([drug in x for x in drug_loci[var["locus_tag"]]]):
                continue

            fn[(var["gene"], var["locus_tag"], var["change"])] += 1
    for s in results[drug]["fp"]:
        print("%s\tFalse_positive" % s)
        tmp = json.load(open("%s/%s.results.json" % (args.dir, s)))
        for var in tmp["dr_variants"]:
            #if drug not in drug_loci[var["locus_tag"]]:
            if drug != var["drug"].lower():
                continue

            fp[(var["gene"], var["locus_tag"], var["change"])] += 1
    for key in sorted(fn, key=lambda x: fn[x]):
        gene, rv, var = key
        if gene == ".":
            gene = rv
        print("False_Negative\t%s\t%s\t%s" % (gene, var, fn[key]))
    for key in sorted(fp, key=lambda x: fp[x]):
        gene, rv, var = key
        #		tmp = odds_ratio[rv][var]
        #		print "False_Positive\t%s\t%s\t%s\t%s\t%s\t%s" % (gene,var,fp[key],tmp[4],tmp[5],tmp[6])
        print("False_Positive\t%s\t%s\t%s" % (gene, var, fp[key]))
    if args.itol:
        write_itol(results[drug]["fp"], results[drug]["fn"], args.drug)
def calculate(args):
    sample_file = args.samples
    dst_file = args.dst

    dst = load_dst(dst_file)
    drug_loci = pp.load_bed(args.bed, [6], 4)  # {'Rv0668': ('rifampicin')}
    FAIL = open("samples_not_found.txt", "w")
    samples = [x.rstrip() for x in open(sample_file).readlines()]
    ext = ".results.json"
    drugs = [d.lower() for d in dst[samples[0]].keys()]
    results = {
        d: {
            "tp": [],
            "tn": [],
            "fp": [],
            "fn": []
        }
        for d in drugs + ["flq", "mdr", "xdr", "sus"]
    }
    counts = {
        d: {
            "tp": 0,
            "tn": 0,
            "fp": 0,
            "fn": 0
        }
        for d in drugs + ["flq", "mdr", "xdr", "sus"]
    }
    pre = args.dir if args.dir else ""
    for s in tqdm(samples):
        res_file = "%s/%s%s" % (pre, s, ext)
        if pp.nofile(res_file):
            pp.log("Warning: %s does not exist!" % res_file)
            FAIL.write("%s\n" % s)
            continue
        res = json.load(open(res_file))
        na_drugs = set()
        for locus in drug_loci:
            if res["missing_regions"][locus] > args.miss:
                for tmp in drug_loci[locus][0].split(","):
                    na_drugs.add(tmp)
        resistant_drugs = [d["drug"].lower() for d in res["dr_variants"]]
        for d in drugs:
            if d in na_drugs:
                dst[s][d] = "NA"

        for d in drugs:
            if dst[s][d] == "0" and d not in resistant_drugs:
                results[d]["tn"].append(s)
                counts[d]["tn"] += 1
            elif dst[s][d] == "0" and d in resistant_drugs:
                results[d]["fp"].append(s)
                counts[d]["fp"] += 1
            elif dst[s][d] == "1" and d not in resistant_drugs:
                results[d]["fn"].append(s)
                counts[d]["fn"] += 1
            elif dst[s][d] == "1" and d in resistant_drugs:
                results[d]["tp"].append(s)
                counts[d]["tp"] += 1

        #### Fluoroquinolones ####
        dst_flq = "0"
        dst_flq_NA = True

        for d in fluoroquinolones:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": dst_flq_NA = False
            if dst[s][d] == "1": dst_flq = "1"

        dst_flq_list = [dst[s][d] for d in fluoroquinolones if d in dst[s]]
        if "1" in dst_flq_list and "0" in dst_flq_list:
            dst_flq = "NA"
        if dst_flq_NA: dst_flq = "NA"

        gst_flq = "0"
        for d in fluoroquinolones:
            if d in resistant_drugs: gst_flq = "1"

        if dst_flq == "1" and gst_flq == "1":
            results["flq"]["tp"].append(s)
            counts["flq"]["tp"] += 1
        if dst_flq == "0" and gst_flq == "1":
            results["flq"]["fp"].append(s)
            counts["flq"]["fp"] += 1
        if dst_flq == "1" and gst_flq == "0":
            results["flq"]["fn"].append(s)
            counts["flq"]["fn"] += 1
        if dst_flq == "0" and gst_flq == "0":
            results["flq"]["tn"].append(s)
            counts["flq"]["tn"] += 1

        #### MDR & XDR ####
        dst_mdr = "1" if dst[s]["rifampicin"] == "1" and dst[s][
            "isoniazid"] == "1" else "0"
        if dst[s]["rifampicin"] == "NA" or dst[s]["isoniazid"] == "NA":
            dst_mdr = "NA"
        flq = False
        flq_NA = True
        for d in fluoroquinolones:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": flq_NA = False
            if dst[s][d] == "1": flq = True
        amg = False
        amg_NA = True
        for d in aminoglycosides:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": amg_NA = False
            if dst[s][d] == "1": amg = True
        dst_xdr = "1" if dst_mdr == "1" and flq and amg else "0"

        if flq_NA or amg_NA: dst_xdr = "NA"
        if dst_mdr == "NA": dst_xdr = "NA"

        #### Profiling results #####
        gst_mdr = "1" if "rifampicin" in resistant_drugs and "isoniazid" in resistant_drugs else "0"
        flq = False
        for d in fluoroquinolones:
            if d in resistant_drugs: flq = True
        amg = False
        for d in aminoglycosides:
            if d in resistant_drugs: amg = True
        gst_xdr = "1" if gst_mdr == "1" and flq and amg else "0"
        if dst_mdr == "1" and gst_mdr == "1":
            results["mdr"]["tp"].append(s)
            counts["mdr"]["tp"] += 1
        if dst_mdr == "0" and gst_mdr == "1":
            results["mdr"]["fp"].append(s)
            counts["mdr"]["fp"] += 1
        if dst_mdr == "1" and gst_mdr == "0":
            results["mdr"]["fn"].append(s)
            counts["mdr"]["fn"] += 1
        if dst_mdr == "0" and gst_mdr == "0":
            results["mdr"]["tn"].append(s)
            counts["mdr"]["tn"] += 1
        if dst_xdr == "1" and gst_xdr == "1":
            results["xdr"]["tp"].append(s)
            counts["xdr"]["tp"] += 1
        if dst_xdr == "0" and gst_xdr == "1":
            results["xdr"]["fp"].append(s)
            counts["xdr"]["fp"] += 1
        if dst_xdr == "1" and gst_xdr == "0":
            results["xdr"]["fn"].append(s)
            counts["xdr"]["fn"] += 1
        if dst_xdr == "0" and gst_xdr == "0":
            results["xdr"]["tn"].append(s)
            counts["xdr"]["tn"] += 1
        ### susceptibility
        if "NA" not in [dst[s][d] for d in first_line]:
            dst_sus = "1" if "1" not in [dst[s][d] for d in drugs] else "0"
            gst_sus = "1" if all(
                [x not in resistant_drugs for x in first_line]) else "0"
            if dst_sus == "1" and gst_sus == "1":
                results["sus"]["tp"].append(s)
                counts["sus"]["tp"] += 1
            if dst_sus == "0" and gst_sus == "1":
                results["sus"]["fp"].append(s)
                counts["sus"]["fp"] += 1
            if dst_sus == "1" and gst_sus == "0":
                results["sus"]["fn"].append(s)
                counts["sus"]["fn"] += 1
            if dst_sus == "0" and gst_sus == "0":
                results["sus"]["tn"].append(s)
                counts["sus"]["tn"] += 1
    json.dump(results, open("results.json", "w"))
    json.dump(counts, open("counts.json", "w"))
    counts = json.load(open("counts.json"))
    drugs = [x.rstrip().lower() for x in open(args.drugs).readlines()
             ] if args.drugs else list(counts.keys())
    print("Drug\tNum\tSusceptible\tResistant\tSensitivity\tSpecificity")
    for d in drugs:
        if d not in counts: continue
        if counts[d]["tp"] + counts[d]["fn"] == 0 or counts[d]["tn"] + counts[
                d]["fp"] == 0:
            continue
        sensitivity = counts[d]["tp"] / (counts[d]["tp"] + counts[d]["fn"])
        specificity = counts[d]["tn"] / (counts[d]["tn"] + counts[d]["fp"])
        total = counts[d]["tp"] + counts[d]["fp"] + counts[d]["tn"] + counts[
            d]["fn"]
        suc = counts[d]["tn"] + counts[d]["fp"]
        res = counts[d]["tp"] + counts[d]["fn"]
        print("%s\t%s\t%s\t%s\t%s\t%s" %
              (d.capitalize(), total, suc, res, sensitivity, specificity))
예제 #3
0
def profile_vcf(filename, conf):
    params = conf.copy()
    params["tmpvcf"] = pp.get_random_file(extension=".vcf.gz")
    params["tmpcsq"] = pp.get_random_file(extension=".vcf.gz")
    params["filename"] = filename
    params["tmphdr"] = pp.get_random_file()
    params["tmptxt"] = pp.get_random_file()
    l = ""
    for l in pp.cmd_out(
            "bcftools view %(filename)s -h | grep \"^##FORMAT=<ID=AD\"" %
            params):
        pass
    AD_found = False if l == "" else True
    if AD_found == False:
        open(params["tmphdr"], "w").write(
            "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">\n"
        )
        pp.run_cmd(
            "bcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT\\t.[\\t0,100]\\n' %(filename)s > %(tmptxt)s"
            % params)
        pp.run_cmd("bgzip %(tmptxt)s" % params)
        pp.run_cmd("tabix -s 1 -b 2 -p vcf %(tmptxt)s.gz" % params)
        pp.run_cmd(
            "bcftools view -a %(filename)s | bcftools annotate -a %(tmptxt)s.gz -c CHROM,POS,REF,ALT,-,FMT/AD -h %(tmphdr)s -Oz -o %(tmpvcf)s"
            % params)
    else:
        pp.run_cmd("bcftools view -a %(filename)s -Oz -o %(tmpvcf)s" % params)
    pp.run_cmd(
        "bcftools view -T %(bed)s %(tmpvcf)s | bcftools csq -f %(ref)s -g %(gff)s  -Oz -o %(tmpcsq)s -p a"
        % params)
    csq_bcf_obj = pp.bcf(params["tmpcsq"])
    csq = csq_bcf_obj.load_csq(ann_file=conf["ann"])
    results = {
        "variants": [],
        "missing_pos": [],
        "qc": {
            "pct_reads_mapped": "NA",
            "num_reads_mapped": "NA"
        }
    }
    for sample in csq:
        results["variants"] = csq[sample]
    all_bcf_obj = pp.bcf(params["tmpvcf"])
    mutations = all_bcf_obj.get_bed_gt(conf["barcode"], conf["ref"])
    if "C" in mutations["Chromosome"][325505] and mutations["Chromosome"][
            325505]["C"] == 50:
        mutations["Chromosome"][325505] = {"T": 25}
    if "G" in mutations["Chromosome"][599868] and mutations["Chromosome"][
            599868]["G"] == 50:
        mutations["Chromosome"][599868] = {"A": 25}
    if "C" in mutations["Chromosome"][931123] and mutations["Chromosome"][
            931123]["C"] == 50:
        mutations["Chromosome"][931123] = {"T": 25}
    if "T" in mutations["Chromosome"][1759252] and mutations["Chromosome"][
            1759252]["T"] == 50:
        mutations["Chromosome"][1759252] = {"G": 25}
    json.dump(mutations, open("dump.json", "w"))
    barcode_mutations = pp.barcode(mutations, conf["barcode"])
    results["barcode"] = barcode_mutations
    results = pp.db_compare(db_file=conf["json_db"], mutations=results)
    bed_regions = pp.load_bed(conf["bed"], [4], 4)
    missing_regions = {gene: "NA" for gene in bed_regions}
    results["missing_regions"] = missing_regions
    if AD_found:
        pp.run_cmd("rm %(tmpcsq)s" % params)
    else:
        pp.run_cmd("rm %(tmpcsq)s %(tmphdr)s %(tmptxt)s*" % params)
    return results