Python log 예제들, pathogenprofiler.log Python 예제들

예제 #1

0

파일 보기

파일: covid-profiler.py 프로젝트: jodyphelan/covid-profiler

def main_load_library(args):
    lib_prefix = args.prefix.split("/")[-1]
    files = {
        "gff": ".gff",
        "ref": ".fasta",
        "barcode": ".barcode.bed",
        "version": ".version.json",
        "proteins": ".proteins.csv",
        "non_coding_bed": ".non_coding.bed"
    }
    if pp.nofolder(sys.base_prefix + "/share/covidprofiler"):
        pp.run_cmd("mkdir %s " % (sys.base_prefix + "/share/covidprofiler/"))
    pp.run_cmd("cp %s %s" % (args.msa, "%s/share/covidprofiler/%s.msa.fa" %
                             (sys.base_prefix, lib_prefix)))
    pp.run_cmd("cp %s %s" %
               (args.meta, "%s/share/covidprofiler/%s.msa.meta.csv" %
                (sys.base_prefix, lib_prefix)))
    for key in files:
        new_file_location = sys.base_prefix + "/share/covidprofiler/" + lib_prefix + files[
            key]
        pp.run_cmd("cp %s %s" % (args.prefix + files[key], new_file_location))
    pp.run_cmd("samtools faidx %s" % sys.base_prefix +
               "/share/covidprofiler/" + lib_prefix + ".fasta")
    pp.run_cmd("bwa index %s" % sys.base_prefix + "/share/covidprofiler/" +
               lib_prefix + ".fasta")
    if os.path.isfile("%s" % sys.base_prefix + "/share/covidprofiler/" +
                      lib_prefix + ".dict"):
        pp.run_cmd("rm %s" % sys.base_prefix + "/share/covidprofiler/" +
                   lib_prefix + ".dict")
    pp.log("Sucessfully imported library")

예제 #2

0

파일 보기

파일: process-gisaid.py 프로젝트: jodyphelan/covid-profiler

def main_load_library(args):
    lib_prefix = args.prefix.split("/")[-1]
    files = {
        "gff": ".gff",
        "ref": ".fasta",
        "barcode": ".barcode.bed",
        "version": ".version.json"
    }
    if pp.nofolder(sys.base_prefix + "/share/covidprofiler"):
        pp.run_cmd("mkdir %s " % (sys.base_prefix + "/share/covidprofiler/"))
    for key in files:
        new_file_location = sys.base_prefix + "/share/covidprofiler/" + lib_prefix + files[
            key]
        pp.run_cmd("cp %s %s" % (args.prefix + files[key], new_file_location))
    pp.run_cmd("samtools faidx %s" % sys.base_prefix +
               "/share/covidprofiler/" + lib_prefix + ".fasta")
    pp.run_cmd("bwa index %s" % sys.base_prefix + "/share/covidprofiler/" +
               lib_prefix + ".fasta")
    if os.path.isfile("%s" % sys.base_prefix + "/share/covidprofiler/" +
                      lib_prefix + ".dict"):
        pp.run_cmd("rm %s" % sys.base_prefix + "/share/covidprofiler/" +
                   lib_prefix + ".dict")
    pp.run_cmd("gatk CreateSequenceDictionary -R %s" % sys.base_prefix +
               "/share/covidprofiler/" + lib_prefix + ".fasta")
    pp.log("Sucessfully imported library")

예제 #3

0

파일 보기

def get_summary(json_results,
                conf,
                columns=None,
                drug_order=None,
                reporting_af=0.0):
    if not columns:
        columns = []
    drugs = set()
    for l in open(conf["bed"]):
        arr = l.rstrip().split()
        for d in arr[5].split(","):
            drugs.add(d)
    if drug_order:
        drugs = drug_order
    drug_table = []
    results = {}
    annotation = {}
    for key in columns:
        if key not in json_results["dr_variants"][0]:
            pp.log(
                "%s not found in variant annotation, is this a valid column in the database CSV file? Exiting!"
                % key, True)
    for x in json_results["dr_variants"]:
        d = x["drug"]
        if float(x["freq"]) < reporting_af: continue
        if d not in results: results[d] = list()
        results[d].append("%s %s (%.2f)" % (x["gene"], x["change"], x["freq"]))
        if d not in annotation: annotation[d] = {key: [] for key in columns}
        for key in columns:
            annotation[d][key].append(x[key])
    for d in drugs:
        if d in results:
            results[d] = ", ".join(results[d]) if len(results[d]) > 0 else ""
            r = "R" if len(results[d]) > 0 else ""
            for key in columns:
                annotation[d][key] = ", ".join(
                    annotation[d][key]) if len(annotation[d][key]) > 0 else ""
        else:
            results[d] = ""
            r = ""
        dictline = {
            "Drug": d.capitalize(),
            "Genotypic Resistance": r,
            "Mutations": results[d]
        }
        for key in columns:
            dictline[key] = annotation[d][key] if d in annotation else ""
        drug_table.append(dictline)
    pipeline_tbl = [{
        "Analysis": "Mapping",
        "Program": json_results["pipeline"]["mapper"]
    }, {
        "Analysis": "Variant Calling",
        "Program": json_results["pipeline"]["variant_caller"]
    }]
    new_json = json_results.copy()
    new_json["drug_table"] = drug_table
    new_json["pipline_table"] = pipeline_tbl
    return new_json

예제 #4

0

파일 보기

def write_html(json_results,conf,outfile,columns = None,drug_order = None):
	json_results = get_summary(json_results,conf,columns = columns, drug_order=drug_order)
	html_strings = {}
	html_strings["id"] = json_results["id"]
	html_strings["date"] = time.ctime()
	html_strings["strain"] = json_results["sublin"]
	html_strings["drtype"] = json_results["drtype"]
	html_strings["dr_report"] = dict_list2html(json_results["drug_table"],["Drug","Genotypic Resistance","Mutations"]+columns,{"Drug":"Drug<sup>1</sup>","Genotypic Resistance":"Resistance","Mutations":"Supporting Mutations (frequency)"})
	html_strings["lineage_report"] = dict_list2html(json_results["lineage"],["lin","family","spoligotype","rd"],{"lin":"Lineage<sup>2</sup>","frac":"Estimated fraction","family":"Family","spoligotype":"Main Spoligotype","rd":"RDS"})
	html_strings["other_var_report"] = dict_list2html(json_results["other_variants"],["gene","genome_pos","change","freq"],{"gene":"Gene","genome_pos":"Chromosome Position","change":"Mutation","freq":"Estimated fraction"})
	html_strings["pipeline"] = dict_list2html(json_results["pipline_table"],["Analysis","Program"])
	html_strings["version"] = json_results["tbprofiler_version"]
	o = open(outfile,"w")
	pp.log("Writing results to %s" % outfile)
	o.write(load_html(html_strings))
	o.close()

예제 #5

0

파일 보기

파일: vcf_gwas.py 프로젝트: pathogenseq/pathogenseq-scripts

def main(args):
    vcf = vcf_class(args.vcf)
    # vcf.get_mean_genotype()
    if args.genes:
        vcf.get_genesum()
    geno_file = vcf.prefix + ".geno"
    genesum_file = vcf.prefix + ".genesum"
    meta = {}
    for s in vcf.samples:
        meta[s] = {}
    for row in csv.DictReader(open(args.csv)):
        for pheno in row.keys():
            if pheno == "id": continue
            if row['id'] not in meta: continue
            meta[row["id"]][pheno] = row[pheno]
    phenos = [x.rstrip() for x in open(args.phenos).readlines()]
    cmd_file = pp.get_random_file()
    X = open(cmd_file, "w")
    for pheno in phenos:
        pheno_file = "%s.pheno" % pheno
        if pheno not in row:
            pp.log("%s not in CSV file" % pheno, True)
        P = open(pheno_file, "w")
        P.write("\n".join([
            meta[s][pheno] if pheno in meta[s] else "NA" for s in vcf.samples
        ]))
        P.close()
        X.write(
            "gemma -p %s -g %s -gk 1 -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s.genesum -notsnp\n"
            % (pheno_file, geno_file, pheno, pheno_file, geno_file, pheno,
               pheno, pheno_file, genesum_file, pheno, pheno))
    X.close()

    if args.preprocess:
        pp.log("Preprocessing finished\n", True)
    else:
        pp.run_cmd("cat %s | parallel -j %s" % (cmd_file, args.threads))

예제 #6

0

파일 보기

파일: tbprofiler_performance.py 프로젝트: yujun2017/TBProfiler

def calculate(args):
    sample_file = args.samples
    dst_file = args.dst

    dst = load_dst(dst_file)
    drug_loci = pp.load_bed(args.bed, [6], 4)  # {'Rv0668': ('rifampicin')}
    FAIL = open("samples_not_found.txt", "w")
    samples = [x.rstrip() for x in open(sample_file).readlines()]
    ext = ".results.json"
    drugs = [d.lower() for d in dst[samples[0]].keys()]
    results = {
        d: {
            "tp": [],
            "tn": [],
            "fp": [],
            "fn": []
        }
        for d in drugs + ["flq", "mdr", "xdr", "sus"]
    }
    counts = {
        d: {
            "tp": 0,
            "tn": 0,
            "fp": 0,
            "fn": 0
        }
        for d in drugs + ["flq", "mdr", "xdr", "sus"]
    }
    pre = args.dir if args.dir else ""
    for s in tqdm(samples):
        res_file = "%s/%s%s" % (pre, s, ext)
        if pp.nofile(res_file):
            pp.log("Warning: %s does not exist!" % res_file)
            FAIL.write("%s\n" % s)
            continue
        res = json.load(open(res_file))
        na_drugs = set()
        for locus in drug_loci:
            if res["missing_regions"][locus] > args.miss:
                for tmp in drug_loci[locus][0].split(","):
                    na_drugs.add(tmp)
        resistant_drugs = [d["drug"].lower() for d in res["dr_variants"]]
        for d in drugs:
            if d in na_drugs:
                dst[s][d] = "NA"

        for d in drugs:
            if dst[s][d] == "0" and d not in resistant_drugs:
                results[d]["tn"].append(s)
                counts[d]["tn"] += 1
            elif dst[s][d] == "0" and d in resistant_drugs:
                results[d]["fp"].append(s)
                counts[d]["fp"] += 1
            elif dst[s][d] == "1" and d not in resistant_drugs:
                results[d]["fn"].append(s)
                counts[d]["fn"] += 1
            elif dst[s][d] == "1" and d in resistant_drugs:
                results[d]["tp"].append(s)
                counts[d]["tp"] += 1

        #### Fluoroquinolones ####
        dst_flq = "0"
        dst_flq_NA = True

        for d in fluoroquinolones:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": dst_flq_NA = False
            if dst[s][d] == "1": dst_flq = "1"

        dst_flq_list = [dst[s][d] for d in fluoroquinolones if d in dst[s]]
        if "1" in dst_flq_list and "0" in dst_flq_list:
            dst_flq = "NA"
        if dst_flq_NA: dst_flq = "NA"

        gst_flq = "0"
        for d in fluoroquinolones:
            if d in resistant_drugs: gst_flq = "1"

        if dst_flq == "1" and gst_flq == "1":
            results["flq"]["tp"].append(s)
            counts["flq"]["tp"] += 1
        if dst_flq == "0" and gst_flq == "1":
            results["flq"]["fp"].append(s)
            counts["flq"]["fp"] += 1
        if dst_flq == "1" and gst_flq == "0":
            results["flq"]["fn"].append(s)
            counts["flq"]["fn"] += 1
        if dst_flq == "0" and gst_flq == "0":
            results["flq"]["tn"].append(s)
            counts["flq"]["tn"] += 1

        #### MDR & XDR ####
        dst_mdr = "1" if dst[s]["rifampicin"] == "1" and dst[s][
            "isoniazid"] == "1" else "0"
        if dst[s]["rifampicin"] == "NA" or dst[s]["isoniazid"] == "NA":
            dst_mdr = "NA"
        flq = False
        flq_NA = True
        for d in fluoroquinolones:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": flq_NA = False
            if dst[s][d] == "1": flq = True
        amg = False
        amg_NA = True
        for d in aminoglycosides:
            if d not in dst[s]: continue
            if dst[s][d] != "NA": amg_NA = False
            if dst[s][d] == "1": amg = True
        dst_xdr = "1" if dst_mdr == "1" and flq and amg else "0"

        if flq_NA or amg_NA: dst_xdr = "NA"
        if dst_mdr == "NA": dst_xdr = "NA"

        #### Profiling results #####
        gst_mdr = "1" if "rifampicin" in resistant_drugs and "isoniazid" in resistant_drugs else "0"
        flq = False
        for d in fluoroquinolones:
            if d in resistant_drugs: flq = True
        amg = False
        for d in aminoglycosides:
            if d in resistant_drugs: amg = True
        gst_xdr = "1" if gst_mdr == "1" and flq and amg else "0"
        if dst_mdr == "1" and gst_mdr == "1":
            results["mdr"]["tp"].append(s)
            counts["mdr"]["tp"] += 1
        if dst_mdr == "0" and gst_mdr == "1":
            results["mdr"]["fp"].append(s)
            counts["mdr"]["fp"] += 1
        if dst_mdr == "1" and gst_mdr == "0":
            results["mdr"]["fn"].append(s)
            counts["mdr"]["fn"] += 1
        if dst_mdr == "0" and gst_mdr == "0":
            results["mdr"]["tn"].append(s)
            counts["mdr"]["tn"] += 1
        if dst_xdr == "1" and gst_xdr == "1":
            results["xdr"]["tp"].append(s)
            counts["xdr"]["tp"] += 1
        if dst_xdr == "0" and gst_xdr == "1":
            results["xdr"]["fp"].append(s)
            counts["xdr"]["fp"] += 1
        if dst_xdr == "1" and gst_xdr == "0":
            results["xdr"]["fn"].append(s)
            counts["xdr"]["fn"] += 1
        if dst_xdr == "0" and gst_xdr == "0":
            results["xdr"]["tn"].append(s)
            counts["xdr"]["tn"] += 1
        ### susceptibility
        if "NA" not in [dst[s][d] for d in first_line]:
            dst_sus = "1" if "1" not in [dst[s][d] for d in drugs] else "0"
            gst_sus = "1" if all(
                [x not in resistant_drugs for x in first_line]) else "0"
            if dst_sus == "1" and gst_sus == "1":
                results["sus"]["tp"].append(s)
                counts["sus"]["tp"] += 1
            if dst_sus == "0" and gst_sus == "1":
                results["sus"]["fp"].append(s)
                counts["sus"]["fp"] += 1
            if dst_sus == "1" and gst_sus == "0":
                results["sus"]["fn"].append(s)
                counts["sus"]["fn"] += 1
            if dst_sus == "0" and gst_sus == "0":
                results["sus"]["tn"].append(s)
                counts["sus"]["tn"] += 1
    json.dump(results, open("results.json", "w"))
    json.dump(counts, open("counts.json", "w"))
    counts = json.load(open("counts.json"))
    drugs = [x.rstrip().lower() for x in open(args.drugs).readlines()
             ] if args.drugs else list(counts.keys())
    print("Drug\tNum\tSusceptible\tResistant\tSensitivity\tSpecificity")
    for d in drugs:
        if d not in counts: continue
        if counts[d]["tp"] + counts[d]["fn"] == 0 or counts[d]["tn"] + counts[
                d]["fp"] == 0:
            continue
        sensitivity = counts[d]["tp"] / (counts[d]["tp"] + counts[d]["fn"])
        specificity = counts[d]["tn"] / (counts[d]["tn"] + counts[d]["fp"])
        total = counts[d]["tp"] + counts[d]["fp"] + counts[d]["tn"] + counts[
            d]["fn"]
        suc = counts[d]["tn"] + counts[d]["fp"]
        res = counts[d]["tp"] + counts[d]["fn"]
        print("%s\t%s\t%s\t%s\t%s\t%s" %
              (d.capitalize(), total, suc, res, sensitivity, specificity))

예제 #7

0

파일 보기

def main_profile(args):
    #### Setup conf dictionary ###
    if args.db == "tbdb" and not args.external_db and pp.nofile(
            sys.base_prefix + "/share/tbprofiler/tbdb.fasta"):
        pp.log(
            "Can't find the tbdb file at %s. Please run 'tb-profiler update_tbdb' to load the default library or specify another using the '--external_db' flag"
            % sys.base_prefix,
            ext=True)
    if args.external_db:
        conf = get_conf_dict(args.external_db)
    else:
        conf = get_conf_dict(sys.base_prefix +
                             "/share/tbprofiler/%s" % args.db)

    ### Create folders for results if they don't exist ###
    if pp.nofolder(args.dir):
        os.mkdir(args.dir)

    for x in ["bam", "vcf", "results"]:
        if pp.nofolder(args.dir + "/" + x):
            os.mkdir(args.dir + "/" + x)

    ### Set up platform dependant parameters ###
    if args.platform == "nanopore":
        args.mapper = "minimap2"
        args.caller = "bcftools"
        args.no_trim = True
        run_delly = False
    else:
        if args.no_delly:
            run_delly = False
        else:
            run_delly = True

    ### Setup prefix for files ###
    files_prefix = args.dir + "/" + args.prefix

    ### Create bam file if fastq has been supplied ###
    if args.bam == None:
        if args.read1 and args.read2 and args.no_trim:
            # Paired + no trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and args.read2 and not args.no_trim:
            # Paired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1, args.read2)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        elif args.read1 and not args.read2 and args.no_trim:
            # Unpaired + trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and not args.read2 and not args.no_trim:
            # Unpaired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        else:
            exit("\nPlease provide a bam file or a fastq file(s)...Exiting!\n")
        bam_obj = fastq_obj.map_to_ref(ref_file=conf["ref"],
                                       prefix=files_prefix,
                                       sample_name=args.prefix,
                                       aligner=args.mapper,
                                       platform=args.platform,
                                       threads=args.threads)
        bam_file = bam_obj.bam_file
    else:
        bam_file = args.bam

    print(args.delly_bcf_file)
    run_coverage = False if args.no_coverage else True
    ### Run profiling module from pathogen-profiler ###
    results = pp.bam_profiler(
        conf=conf,
        bam_file=bam_file,
        prefix=files_prefix,
        platform=args.platform,
        caller=args.caller,
        threads=args.threads,
        no_flagstat=args.no_flagstat,
        run_delly=run_delly,
        calling_params=args.calling_params,
        coverage_fraction_threshold=args.coverage_fraction_threshold,
        missing_cov_threshold=args.missing_cov_threshold,
        delly_bcf_file=args.delly_bcf_file)
    json.dump(results, open(args.prefix + ".tmp_results.json", "w"))
    ### Reformat the results to TB-Profiler style ###
    results = tbp.reformat(results, conf, reporting_af=args.reporting_af)
    results["id"] = args.prefix
    results["tbprofiler_version"] = tbp._VERSION
    results["pipeline"] = {
        "mapper": args.mapper if not args.bam else "N/A",
        "variant_caller": args.caller
    }

    json_output = args.dir + "/results/" + args.prefix + ".results.json"
    tex_output = args.dir + "/results/" + args.prefix + ".results.tex"
    text_output = args.dir + "/results/" + args.prefix + ".results.txt"
    csv_output = args.dir + "/results/" + args.prefix + ".results.csv"

    json.dump(results, open(json_output, "w"))
    extra_columns = [x.lower() for x in args.add_columns.split(",")
                     ] if args.add_columns else []
    if args.pdf:
        tbp.write_tex(results, conf, tex_output, extra_columns)
        pp.run_cmd("pdflatex %s" % tex_output, verbose=1)
        pp.rm_files([
            tex_output, args.dir + "/" + args.prefix + ".results.aux",
            args.dir + "/" + args.prefix + ".results.log"
        ])
    if args.txt:
        tbp.write_text(results,
                       conf,
                       text_output,
                       extra_columns,
                       reporting_af=args.reporting_af)
    if args.csv:
        tbp.write_csv(results, conf, csv_output, extra_columns)

    ### Move files to respective directories ###
    if not args.bam:
        pp.run_cmd("mv %(dir)s/%(prefix)s.bam* %(dir)s/bam/" % vars(args))
        if not args.no_trim:
            pp.run_cmd("rm -f %s" % " ".join(fastq_obj.files))
    pp.run_cmd("mv -f %(dir)s/%(prefix)s*.vcf.gz* %(dir)s/vcf/" % vars(args))
    if run_delly and results["delly"] == "success" and not args.delly_bcf_file:
        pp.run_cmd("mv -f %(dir)s/%(prefix)s.delly.bcf* %(dir)s/vcf/" %
                   vars(args))

    ### Add meta data to results
    if args.meta:
        for row in csv.DictReader(open(args.meta)):
            if row["id"] == results["id"]:
                for col in row:
                    results["meta_" + col] = row[col]
    pp.log("Profiling finished sucessfully!")

예제 #8

0

파일 보기

파일: tbprofiler_analyse_variants.py 프로젝트: yujun2017/TBProfiler

def main(args):
    if args.drugs:
        args.drugs = [x.lower() for x in args.drugs.split(",")]
    conf = conf = get_conf_dict(sys.base_prefix +
                                "/share/tbprofiler/%s" % args.db)
    json_db = json.load(open(conf["json_db"]))
    drug2genes = defaultdict(set)
    gene2drugs = defaultdict(set)
    gene2lt = {}
    lt2gene = {}
    for l in open(conf["bed"]):
        row = l.rstrip().split()
        for d in row[5].split(","):
            drug2genes[d].add(row[3])
            gene2drugs[row[3]].add(d)
            gene2lt[row[3]] = row[3]
            gene2lt[row[4]] = row[3]
            lt2gene[row[3]] = row[4]

    mutations = []
    for l in open(args.mutations):
        row = l.strip().split()
        mutations.append((gene2lt[row[0]], row[1]))

    meta = {}
    reader = csv.DictReader(open(args.meta))
    drug_resistant_isolates = {
        d: set()
        for d in drug2genes if d in reader.fieldnames
    }
    for row in reader:
        meta[row["id"]] = row
        for drug in drug_resistant_isolates:
            if row[drug] == "1":
                drug_resistant_isolates[drug].add(row["id"])
    pp.log(f"Analysing {len(drug_resistant_isolates)} drugs")
    if args.samples:
        samples = [
            x.rstrip() for x in open(args.samples).readlines()
            if x.rstrip() in meta
        ]
    else:
        samples = [
            x.replace(".results.json", "") for x in os.listdir("results/")
            if x[-13:] == ".results.json"
            if x.replace(".results.json", "") in meta
        ]

    variants = {x: set() for x in mutations}
    hgvs2bcftools = {}
    variant_drug_associations = defaultdict(set)
    for s in tqdm(samples):
        tmp = json.load(open(f"{args.dir}/{s}.results.json"))
        for var in tmp["dr_variants"] + tmp["other_variants"]:
            if (var["locus_tag"], var["change"]) in mutations:
                hgvs2bcftools[var["change"]] = var["_internal_change"]
                variants[(var["locus_tag"], var["change"])].add(s)
                if "drug" in var:
                    variant_drug_associations[(var["locus_tag"],
                                               var["change"])].add(var["drug"])

    total_sample_n = len(samples)
    pp.log(f"Found {total_sample_n} samples in meta list with result files")
    pp.log("-" * 40)
    pp.log(variant_drug_associations)
    print(
        "Gene,Mutation,Drug resistance association,Total frequency (percentage),Associated drugs,Drug resitant frequency (percentage)"
    )
    for gene, mut in variants:
        total_freq = len(variants[(gene, mut)])
        total_pct = total_freq / total_sample_n * 100
        dr_associated = "Not associated"
        if (gene, mut) in variant_drug_associations:
            drugs = variant_drug_associations[(gene, mut)]
            dr_associated = "Associated"
        else:
            drugs = gene2drugs[gene]
        if args.drugs:
            drugs = [d for d in drugs if d in args.drugs]
        dr_freqs = []
        dr_pcts = []
        for drug in drugs:
            dr_freq = len(variants[(gene, mut)].intersection(
                drug_resistant_isolates[drug]))
            dr_pct = dr_freq / len(drug_resistant_isolates[drug]) * 100
            dr_freqs.append(dr_freq)
            dr_pcts.append(dr_pct)

        zipped_list = ["%s (%.2f)" % (x, y) for x, y in zip(dr_freqs, dr_pcts)]
        print("%s,%s,%s,%s (%.2f),%s,%s" %
              (lt2gene[gene], mut, dr_associated, total_freq, total_pct,
               ';'.join(drugs), ';'.join(zipped_list)))

예제 #9

0

파일 보기

def main(args):
    if pp.nofolder(args.out_dir):
        pp.run_cmd("mkdir %s" % args.out_dir)
    conf = {
        "ref": args.ref,
        "gff": args.gff,
        "bed": args.bed,
        "ann": args.ann,
    }
    if args.conf:
        conf = json.load(open(args.conf))
    for x in ["ref", "gff", "bed", "ann"]:
        if conf[x] == None:
            pp.log("%s variable is not defined" % x, True)
    bam_obj = pp.bam(args.bam,
                     args.prefix,
                     conf["ref"],
                     platform=args.platform)
    bcf_obj = bam_obj.call_variants(
        prefix=args.prefix + ".targets",
        call_method=args.call_method,
        gff_file=conf["gff"],
        bed_file=conf["bed"],
        mixed_as_missing=False if args.platform == "Illumina" else True,
        threads=args.threads,
        min_dp=args.min_depth,
        af=args.af,
        caller=args.caller)
    csq = bcf_obj.load_csq(ann_file=conf["ann"])
    variants = []
    chr2gene_pos = {}
    for l in open(conf["ann"]):
        row = l.rstrip().split()
        chr2gene_pos[int(row[1])] = int(row[3])
    for var in list(csq.values())[0]:
        var["_internal_change"] = var["change"]
        var["change"] = pp.reformat_mutations(var["change"], var["type"],
                                              var["gene_id"], chr2gene_pos)
        variants.append(var)
    if not args.no_delly:
        delly_bcf = bam_obj.run_delly()
        deletions = delly_bcf.overlap_bed(conf["bed"])
        for deletion in deletions:
            tmp_change = pp.reformat_mutations(
                "%(chr)s_%(start)s_%(end)s" % deletion, var["type"],
                var["gene_id"], chr2gene_pos)
            tmp = {
                "genome_pos": deletion["start"],
                "gene_id": deletion["region"],
                "chr": deletion["chr"],
                "freq": 1,
                "type": "large_deletion",
                "change": tmp_change
            }
            variants.append(tmp)
    json.dump(variants,
              open("%s/%s.pp-results.json" % (args.out_dir, args.prefix), "w"))
    for x in [
            ".targets.bcf", ".targets.csq.bcf", ".targets.csq.bcf.csi",
            ".targets.delly.bcf", ".targets.delly.bcf.csi",
            ".targets.del_pos.bed", ".targets.gvcf.gz", ".targets.gvcf.gz.csi",
            ".targets.missing.bcf"
    ]:
        if args.no_delly and "delly" in x: continue
        pp.run_cmd("rm %s%s" % (args.prefix, x))