def tbprofiler(fq1,fq2,uniq_id,db,storage_dir,platform):
    conf = get_conf_dict(sys.base_prefix+"/share/tbprofiler/tbdb")
    drug_order = ["isoniazid","rifampicin","ethambutol","pyrazinamide","streptomycin","ethionamide","fluoroquinolones","amikacin","capreomycin","kanamycin"]

    if fq1 and fq2:
        fastq_obj = pp.fastq(fq1,fq2)
    elif fq1 and fq2==None:
        fastq_obj = pp.fastq(fq1)
    files_prefix = storage_dir+"/"+uniq_id
    bam_obj = fastq_obj.map_to_ref(
        ref_file=conf["ref"], prefix=files_prefix,sample_name=uniq_id,
        aligner="bwa", platform=platform, threads=4
    )
    bam_file = bam_obj.bam_file

    results = pp.bam_profiler(
        conf=conf, bam_file=bam_file, prefix=files_prefix, platform=platform,
        caller="bcftools", threads=4, no_flagstat=False,
        run_delly = True
    )

    results = tbp.reformat(results, conf, reporting_af=0.1)

    results["id"] = uniq_id
    results["tbprofiler_version"] = tbp._VERSION
    results["pipeline"] = {"mapper":"bcftools","variant_caller":"bcftools"}
    results = tbp.get_summary(results,conf,drug_order=drug_order)
    outfile = "%s.results.json" % (storage_dir+"/"+uniq_id)

    json.dump(results,open(outfile,"w"))



    conn = sqlite3.connect(db)
    c = conn.cursor()
    c.execute("UPDATE results SET result = ?, lineage = ?, drtype = ?, status = 'completed' where id = ?", (open(outfile).readline(),results["sublin"],results["drtype"],uniq_id,))
    c.execute("UPDATE full_results SET main_lineage = ?, sub_lineage = ?, DR_type = ?, MDR = ?, XDR = ?",(results["main_lin"],results["sublin"],results["drtype"],results["MDR"],results["XDR"]))
    for d in results["drug_table"]:
        c.execute("UPDATE full_results SET %s = ? where id = ?" % d["Drug"].lower().replace("-","_"), (d["Mutations"],uniq_id,))
    conn.commit()
    pp.run_cmd("rm %s/%s*" % (storage_dir,uniq_id))

    return True
Esempio n. 2
0
def main_profile(args):
    #### Setup conf dictionary ###
    if args.db == "tbdb" and not args.external_db and pp.nofile(
            sys.base_prefix + "/share/tbprofiler/tbdb.fasta"):
        pp.log(
            "Can't find the tbdb file at %s. Please run 'tb-profiler update_tbdb' to load the default library or specify another using the '--external_db' flag"
            % sys.base_prefix,
            ext=True)
    if args.external_db:
        conf = get_conf_dict(args.external_db)
    else:
        conf = get_conf_dict(sys.base_prefix +
                             "/share/tbprofiler/%s" % args.db)

    ### Create folders for results if they don't exist ###
    if pp.nofolder(args.dir):
        os.mkdir(args.dir)

    for x in ["bam", "vcf", "results"]:
        if pp.nofolder(args.dir + "/" + x):
            os.mkdir(args.dir + "/" + x)

    ### Set up platform dependant parameters ###
    if args.platform == "nanopore":
        args.mapper = "minimap2"
        args.caller = "bcftools"
        args.no_trim = True
        run_delly = False
    else:
        if args.no_delly:
            run_delly = False
        else:
            run_delly = True

    ### Setup prefix for files ###
    files_prefix = args.dir + "/" + args.prefix

    ### Create bam file if fastq has been supplied ###
    if args.bam == None:
        if args.read1 and args.read2 and args.no_trim:
            # Paired + no trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and args.read2 and not args.no_trim:
            # Paired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1, args.read2)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        elif args.read1 and not args.read2 and args.no_trim:
            # Unpaired + trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and not args.read2 and not args.no_trim:
            # Unpaired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        else:
            exit("\nPlease provide a bam file or a fastq file(s)...Exiting!\n")
        bam_obj = fastq_obj.map_to_ref(ref_file=conf["ref"],
                                       prefix=files_prefix,
                                       sample_name=args.prefix,
                                       aligner=args.mapper,
                                       platform=args.platform,
                                       threads=args.threads)
        bam_file = bam_obj.bam_file
    else:
        bam_file = args.bam

    print(args.delly_bcf_file)
    run_coverage = False if args.no_coverage else True
    ### Run profiling module from pathogen-profiler ###
    results = pp.bam_profiler(
        conf=conf,
        bam_file=bam_file,
        prefix=files_prefix,
        platform=args.platform,
        caller=args.caller,
        threads=args.threads,
        no_flagstat=args.no_flagstat,
        run_delly=run_delly,
        calling_params=args.calling_params,
        coverage_fraction_threshold=args.coverage_fraction_threshold,
        missing_cov_threshold=args.missing_cov_threshold,
        delly_bcf_file=args.delly_bcf_file)
    json.dump(results, open(args.prefix + ".tmp_results.json", "w"))
    ### Reformat the results to TB-Profiler style ###
    results = tbp.reformat(results, conf, reporting_af=args.reporting_af)
    results["id"] = args.prefix
    results["tbprofiler_version"] = tbp._VERSION
    results["pipeline"] = {
        "mapper": args.mapper if not args.bam else "N/A",
        "variant_caller": args.caller
    }

    json_output = args.dir + "/results/" + args.prefix + ".results.json"
    tex_output = args.dir + "/results/" + args.prefix + ".results.tex"
    text_output = args.dir + "/results/" + args.prefix + ".results.txt"
    csv_output = args.dir + "/results/" + args.prefix + ".results.csv"

    json.dump(results, open(json_output, "w"))
    extra_columns = [x.lower() for x in args.add_columns.split(",")
                     ] if args.add_columns else []
    if args.pdf:
        tbp.write_tex(results, conf, tex_output, extra_columns)
        pp.run_cmd("pdflatex %s" % tex_output, verbose=1)
        pp.rm_files([
            tex_output, args.dir + "/" + args.prefix + ".results.aux",
            args.dir + "/" + args.prefix + ".results.log"
        ])
    if args.txt:
        tbp.write_text(results,
                       conf,
                       text_output,
                       extra_columns,
                       reporting_af=args.reporting_af)
    if args.csv:
        tbp.write_csv(results, conf, csv_output, extra_columns)

    ### Move files to respective directories ###
    if not args.bam:
        pp.run_cmd("mv %(dir)s/%(prefix)s.bam* %(dir)s/bam/" % vars(args))
        if not args.no_trim:
            pp.run_cmd("rm -f %s" % " ".join(fastq_obj.files))
    pp.run_cmd("mv -f %(dir)s/%(prefix)s*.vcf.gz* %(dir)s/vcf/" % vars(args))
    if run_delly and results["delly"] == "success" and not args.delly_bcf_file:
        pp.run_cmd("mv -f %(dir)s/%(prefix)s.delly.bcf* %(dir)s/vcf/" %
                   vars(args))

    ### Add meta data to results
    if args.meta:
        for row in csv.DictReader(open(args.meta)):
            if row["id"] == results["id"]:
                for col in row:
                    results["meta_" + col] = row[col]
    pp.log("Profiling finished sucessfully!")
Esempio n. 3
0
def main_profile(args):
    if pp.nofolder(args.dir):
        os.mkdir(args.dir)
    conf = get_conf_dict(sys.base_prefix + "/share/covidprofiler/%s" % args.db)

    ### Setup prefix for files ###
    files_prefix = args.dir + "/" + args.prefix

    if args.fasta:
        if args.read1 or args.read2:
            sys.stderr.write(
                "Please use --fasta or --read1/2 but not both... Exiting!\n")
            quit()
        fasta_obj = pp.fasta(args.fasta)
        wg_vcf_obj = pp.vcf(
            fasta_obj.get_ref_variants(conf["ref"],
                                       prefix=args.prefix,
                                       file_prefix=files_prefix))
    else:
        if not args.read1:
            sys.stderr.write(
                "Please provide assembly using --fasta or at least one read file using --read1... Exiting!\n"
            )
            quit()
        ### Create bam file if fastq has been supplied ###
        if args.read1 and args.read2 and args.no_trim:
            # Paired + no trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and args.read2 and not args.no_trim:
            # Paired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1, args.read2)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        elif args.read1 and not args.read2 and args.no_trim:
            # Unpaired + trimming
            fastq_obj = pp.fastq(args.read1, args.read2)
        elif args.read1 and not args.read2 and not args.no_trim:
            # Unpaired + trimming
            untrimmed_fastq_obj = pp.fastq(args.read1)
            fastq_obj = untrimmed_fastq_obj.trim(files_prefix,
                                                 threads=args.threads)
        bam_obj = fastq_obj.map_to_ref(ref_file=conf["ref"],
                                       prefix=files_prefix,
                                       sample_name=args.prefix,
                                       aligner=args.mapper,
                                       platform=args.platform,
                                       threads=args.threads)
        wg_vcf_obj = bam_obj.call_variants(conf["ref"],
                                           args.caller,
                                           remove_missing=True)
        cp.vcf2consensus(bam_obj.bam_file, wg_vcf_obj.filename, conf["ref"],
                         wg_vcf_obj.samples[0],
                         wg_vcf_obj.prefix + ".consensus.fasta")
        if not args.no_trim:
            pp.run_cmd("rm -f %s" % " ".join(fastq_obj.files))
    refseq = pp.fasta(conf["ref"]).fa_dict
    refseqname = list(refseq.keys())[0]

    results = {}
    barcode_mutations = wg_vcf_obj.get_bed_gt(conf["barcode"], conf["ref"])
    barcode = pp.barcode(barcode_mutations, conf["barcode"])
    clade = ";".join(sorted([d["annotation"] for d in barcode]))
    sys.stdout.write("%s\t%s\n" % (args.prefix, clade))
    results["clade"] = clade

    variant_data = cp.get_variant_data(wg_vcf_obj.filename, conf["ref"],
                                       conf["gff"], conf["proteins"])
    results["variants"] = variant_data

    json.dump(results, open("%s.results.json" % files_prefix, "w"))