def tbprofiler(fq1,fq2,uniq_id,db,storage_dir,platform): conf = get_conf_dict(sys.base_prefix+"/share/tbprofiler/tbdb") drug_order = ["isoniazid","rifampicin","ethambutol","pyrazinamide","streptomycin","ethionamide","fluoroquinolones","amikacin","capreomycin","kanamycin"] if fq1 and fq2: fastq_obj = pp.fastq(fq1,fq2) elif fq1 and fq2==None: fastq_obj = pp.fastq(fq1) files_prefix = storage_dir+"/"+uniq_id bam_obj = fastq_obj.map_to_ref( ref_file=conf["ref"], prefix=files_prefix,sample_name=uniq_id, aligner="bwa", platform=platform, threads=4 ) bam_file = bam_obj.bam_file results = pp.bam_profiler( conf=conf, bam_file=bam_file, prefix=files_prefix, platform=platform, caller="bcftools", threads=4, no_flagstat=False, run_delly = True ) results = tbp.reformat(results, conf, reporting_af=0.1) results["id"] = uniq_id results["tbprofiler_version"] = tbp._VERSION results["pipeline"] = {"mapper":"bcftools","variant_caller":"bcftools"} results = tbp.get_summary(results,conf,drug_order=drug_order) outfile = "%s.results.json" % (storage_dir+"/"+uniq_id) json.dump(results,open(outfile,"w")) conn = sqlite3.connect(db) c = conn.cursor() c.execute("UPDATE results SET result = ?, lineage = ?, drtype = ?, status = 'completed' where id = ?", (open(outfile).readline(),results["sublin"],results["drtype"],uniq_id,)) c.execute("UPDATE full_results SET main_lineage = ?, sub_lineage = ?, DR_type = ?, MDR = ?, XDR = ?",(results["main_lin"],results["sublin"],results["drtype"],results["MDR"],results["XDR"])) for d in results["drug_table"]: c.execute("UPDATE full_results SET %s = ? where id = ?" % d["Drug"].lower().replace("-","_"), (d["Mutations"],uniq_id,)) conn.commit() pp.run_cmd("rm %s/%s*" % (storage_dir,uniq_id)) return True
def main_profile(args): #### Setup conf dictionary ### if args.db == "tbdb" and not args.external_db and pp.nofile( sys.base_prefix + "/share/tbprofiler/tbdb.fasta"): pp.log( "Can't find the tbdb file at %s. Please run 'tb-profiler update_tbdb' to load the default library or specify another using the '--external_db' flag" % sys.base_prefix, ext=True) if args.external_db: conf = get_conf_dict(args.external_db) else: conf = get_conf_dict(sys.base_prefix + "/share/tbprofiler/%s" % args.db) ### Create folders for results if they don't exist ### if pp.nofolder(args.dir): os.mkdir(args.dir) for x in ["bam", "vcf", "results"]: if pp.nofolder(args.dir + "/" + x): os.mkdir(args.dir + "/" + x) ### Set up platform dependant parameters ### if args.platform == "nanopore": args.mapper = "minimap2" args.caller = "bcftools" args.no_trim = True run_delly = False else: if args.no_delly: run_delly = False else: run_delly = True ### Setup prefix for files ### files_prefix = args.dir + "/" + args.prefix ### Create bam file if fastq has been supplied ### if args.bam == None: if args.read1 and args.read2 and args.no_trim: # Paired + no trimming fastq_obj = pp.fastq(args.read1, args.read2) elif args.read1 and args.read2 and not args.no_trim: # Paired + trimming untrimmed_fastq_obj = pp.fastq(args.read1, args.read2) fastq_obj = untrimmed_fastq_obj.trim(files_prefix, threads=args.threads) elif args.read1 and not args.read2 and args.no_trim: # Unpaired + trimming fastq_obj = pp.fastq(args.read1, args.read2) elif args.read1 and not args.read2 and not args.no_trim: # Unpaired + trimming untrimmed_fastq_obj = pp.fastq(args.read1) fastq_obj = untrimmed_fastq_obj.trim(files_prefix, threads=args.threads) else: exit("\nPlease provide a bam file or a fastq file(s)...Exiting!\n") bam_obj = fastq_obj.map_to_ref(ref_file=conf["ref"], prefix=files_prefix, sample_name=args.prefix, aligner=args.mapper, platform=args.platform, threads=args.threads) bam_file = bam_obj.bam_file else: bam_file = args.bam print(args.delly_bcf_file) run_coverage = False if args.no_coverage else True ### Run profiling module from pathogen-profiler ### results = pp.bam_profiler( conf=conf, bam_file=bam_file, prefix=files_prefix, platform=args.platform, caller=args.caller, threads=args.threads, no_flagstat=args.no_flagstat, run_delly=run_delly, calling_params=args.calling_params, coverage_fraction_threshold=args.coverage_fraction_threshold, missing_cov_threshold=args.missing_cov_threshold, delly_bcf_file=args.delly_bcf_file) json.dump(results, open(args.prefix + ".tmp_results.json", "w")) ### Reformat the results to TB-Profiler style ### results = tbp.reformat(results, conf, reporting_af=args.reporting_af) results["id"] = args.prefix results["tbprofiler_version"] = tbp._VERSION results["pipeline"] = { "mapper": args.mapper if not args.bam else "N/A", "variant_caller": args.caller } json_output = args.dir + "/results/" + args.prefix + ".results.json" tex_output = args.dir + "/results/" + args.prefix + ".results.tex" text_output = args.dir + "/results/" + args.prefix + ".results.txt" csv_output = args.dir + "/results/" + args.prefix + ".results.csv" json.dump(results, open(json_output, "w")) extra_columns = [x.lower() for x in args.add_columns.split(",") ] if args.add_columns else [] if args.pdf: tbp.write_tex(results, conf, tex_output, extra_columns) pp.run_cmd("pdflatex %s" % tex_output, verbose=1) pp.rm_files([ tex_output, args.dir + "/" + args.prefix + ".results.aux", args.dir + "/" + args.prefix + ".results.log" ]) if args.txt: tbp.write_text(results, conf, text_output, extra_columns, reporting_af=args.reporting_af) if args.csv: tbp.write_csv(results, conf, csv_output, extra_columns) ### Move files to respective directories ### if not args.bam: pp.run_cmd("mv %(dir)s/%(prefix)s.bam* %(dir)s/bam/" % vars(args)) if not args.no_trim: pp.run_cmd("rm -f %s" % " ".join(fastq_obj.files)) pp.run_cmd("mv -f %(dir)s/%(prefix)s*.vcf.gz* %(dir)s/vcf/" % vars(args)) if run_delly and results["delly"] == "success" and not args.delly_bcf_file: pp.run_cmd("mv -f %(dir)s/%(prefix)s.delly.bcf* %(dir)s/vcf/" % vars(args)) ### Add meta data to results if args.meta: for row in csv.DictReader(open(args.meta)): if row["id"] == results["id"]: for col in row: results["meta_" + col] = row[col] pp.log("Profiling finished sucessfully!")
def main_profile(args): if pp.nofolder(args.dir): os.mkdir(args.dir) conf = get_conf_dict(sys.base_prefix + "/share/covidprofiler/%s" % args.db) ### Setup prefix for files ### files_prefix = args.dir + "/" + args.prefix if args.fasta: if args.read1 or args.read2: sys.stderr.write( "Please use --fasta or --read1/2 but not both... Exiting!\n") quit() fasta_obj = pp.fasta(args.fasta) wg_vcf_obj = pp.vcf( fasta_obj.get_ref_variants(conf["ref"], prefix=args.prefix, file_prefix=files_prefix)) else: if not args.read1: sys.stderr.write( "Please provide assembly using --fasta or at least one read file using --read1... Exiting!\n" ) quit() ### Create bam file if fastq has been supplied ### if args.read1 and args.read2 and args.no_trim: # Paired + no trimming fastq_obj = pp.fastq(args.read1, args.read2) elif args.read1 and args.read2 and not args.no_trim: # Paired + trimming untrimmed_fastq_obj = pp.fastq(args.read1, args.read2) fastq_obj = untrimmed_fastq_obj.trim(files_prefix, threads=args.threads) elif args.read1 and not args.read2 and args.no_trim: # Unpaired + trimming fastq_obj = pp.fastq(args.read1, args.read2) elif args.read1 and not args.read2 and not args.no_trim: # Unpaired + trimming untrimmed_fastq_obj = pp.fastq(args.read1) fastq_obj = untrimmed_fastq_obj.trim(files_prefix, threads=args.threads) bam_obj = fastq_obj.map_to_ref(ref_file=conf["ref"], prefix=files_prefix, sample_name=args.prefix, aligner=args.mapper, platform=args.platform, threads=args.threads) wg_vcf_obj = bam_obj.call_variants(conf["ref"], args.caller, remove_missing=True) cp.vcf2consensus(bam_obj.bam_file, wg_vcf_obj.filename, conf["ref"], wg_vcf_obj.samples[0], wg_vcf_obj.prefix + ".consensus.fasta") if not args.no_trim: pp.run_cmd("rm -f %s" % " ".join(fastq_obj.files)) refseq = pp.fasta(conf["ref"]).fa_dict refseqname = list(refseq.keys())[0] results = {} barcode_mutations = wg_vcf_obj.get_bed_gt(conf["barcode"], conf["ref"]) barcode = pp.barcode(barcode_mutations, conf["barcode"]) clade = ";".join(sorted([d["annotation"] for d in barcode])) sys.stdout.write("%s\t%s\n" % (args.prefix, clade)) results["clade"] = clade variant_data = cp.get_variant_data(wg_vcf_obj.filename, conf["ref"], conf["gff"], conf["proteins"]) results["variants"] = variant_data json.dump(results, open("%s.results.json" % files_prefix, "w"))