def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' args = get_args() sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "fastq_rich": args.fastq_rich, "fastq_minimal": args.fastq_minimal, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } get_input( source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded, huge=args.huge, keep_supp=not(args.no_supplementary)) \ .to_feather(args.output)
def main(): args = get_args() if args.outdir and not os.path.exists(args.outdir): os.makedirs(args.outdir) if args.feather: from nanoget import combine_dfs from pandas import read_feather datadf = combine_dfs([read_feather(p) for p in args.feather], method="simple") else: sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } datadf = nanoget.get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="track", barcoded=args.barcoded, keep_supp=not (args.no_supplementary)) if args.barcoded: barcodes = list(datadf["barcode"].unique()) write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=os.path.join(args.outdir, args.name), names=barcodes) write_stats(datadfs=[datadf], outputfile=os.path.join(args.outdir, args.name), as_tsv=args.tsv)
def main(): args = get_args() merged_df = get_input(source="summary", files=args.summary).set_index("readIDs") \ .merge(right=get_input(source="bam", files=args.bam).set_index("readIDs"), how="left", left_index=True, right_index=True) plot_retrotect(df=merged_df, path=path.join(args.outdir, args.prefix), figformat=args.format, title=args.title, hours=args.hours) merged_df.dropna(axis="index", how="any").sort_values(by="start_time").to_csv( path_or_buf=path.join(args.outdir, args.prefix) + "Retrotect_details.txt.gz", sep="\t", columns=["start_time"], compression='gzip')
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) args.format = utils.check_valid_format(args.format) sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } if args.split_runs: split_dict = utils.validate_split_runs_file(args.split_runs) datadf = nanoget.get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, names=args.names, barcoded=args.barcoded, combine="track") datadf, settings = filter_and_transform_data(datadf, vars(args)) if args.raw: datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz", sep="\t", index=False, compression="gzip") if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoComp-data.pickle", 'wb')) if args.split_runs: utils.change_identifiers(datadf, split_dict) if args.barcoded: datadf["dataset"] = datadf["barcode"] identifiers = list(datadf["dataset"].unique()) write_stats( datadfs=[datadf[datadf["dataset"] == i] for i in identifiers], outputfile=settings["path"] + "NanoStats.txt", names=identifiers) if args.plot != 'false': plots = make_plots(datadf, settings) make_report(plots, settings["path"]) logging.info("Succesfully processed all input.") except Exception as e: logging.error(e, exc_info=True) raise
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' args = get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args, tool="NanoComp") args.format = nanoplotter.check_valid_format(args.format) settings = vars(args) settings["path"] = path.join(args.outdir, args.prefix) sources = [args.fastq, args.bam, args.summary, args.fasta] sourcename = ["fastq", "bam", "summary", "fasta"] if args.split_runs: split_dict = validate_split_runs_file(args.split_runs) datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, names=args.names, barcoded=args.barcoded, combine="track") if args.raw: datadf.to_csv("NanoComp-data.tsv.gz", sep="\t", index=False, compression="gzip") if args.split_runs: change_identifiers(datadf, split_dict) if args.barcoded: datadf["dataset"] = datadf["barcode"] identifiers = list(datadf["dataset"].unique()) write_stats( datadfs=[datadf[datadf["dataset"] == i] for i in identifiers], outputfile=settings["path"] + "NanoStats.txt", names=identifiers) plots = make_plots(datadf, settings) make_report(plots, path.join(args.outdir, args.prefix)) logging.info("Succesfully processed all input.") except Exception as e: logging.error(e, exc_info=True) raise
def main(): args = get_args() if not os.path.exists(args.outdir): os.makedirs(args.outdir) sources = [args.fastq, args.bam, args.summary] sourcename = ["fastq", "bam", "summary"] datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="track") if args.name: output = args.name else: output = os.path.join(args.outdir, args.prefix + "NanoStats.txt") write_stats([datadf], output)
def main(): args = get_args() if not os.path.exists(args.outdir): os.makedirs(args.outdir) sources = [args.fastq, args.bam, args.summary, args.fasta] sourcename = ["fastq", "bam", "summary", "fasta"] datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="track", barcoded=args.barcoded) if args.barcoded: barcodes = list(datadf["barcode"].unique()) write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=args.name, names=barcodes) write_stats(datadfs=[datadf], outputfile=args.name)
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) args.format = nanoplotter.check_valid_format(args.format) sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "fastq_rich": args.fastq_rich, "fastq_minimal": args.fastq_minimal, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } if args.pickle: datadf = pickle.load(open(args.pickle, 'rb')) else: datadf = get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded, huge=args.huge) if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoPlot-data.pickle", 'wb')) if args.raw: datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz", sep="\t", index=False, compression="gzip") settings["statsfile"] = [make_stats(datadf, settings, suffix="")] datadf, settings = filter_and_transform_data(datadf, settings) if settings[ "filtered"]: # Bool set when filter was applied in filter_and_transform_data() settings["statsfile"].append( make_stats(datadf[datadf["length_filter"]], settings, suffix="_post_filtering")) if args.barcoded: barcodes = list(datadf["barcode"].unique()) plots = [] for barc in barcodes: logging.info("Processing {}".format(barc)) settings["path"] = path.join(args.outdir, args.prefix + barc + "_") dfbarc = datadf[datadf["barcode"] == barc] if len(dfbarc) > 5: settings["title"] = barc plots.extend(make_plots(dfbarc, settings)) else: sys.stderr.write( "Found barcode {} less than 5x, ignoring...\n".format( barc)) logging.info( "Found barcode {} less than 5 times, ignoring".format( barc)) else: plots = make_plots(datadf, settings) make_report(plots, settings) logging.info("Finished!") except Exception as e: logging.error(e, exc_info=True) print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format( __version__)) print("Please try updating NanoPlot and see if that helps...\n") print( "If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues" ) print( "If you could include the log file that would be really helpful.") print("Thanks!\n\n\n") raise
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' args = get_args() try: utils.make_output_dir(args.outdir) logfile = utils.init_logs(args) args.format = nanoplotter.check_valid_format(args.format) settings = vars(args) settings["path"] = path.join(args.outdir, args.prefix) sources = [ args.fastq, args.bam, args.fastq_rich, args.fastq_minimal, args.summary ] sourcename = ["fastq", "bam", "fastq_rich", "fastq_minimal", "summary"] if args.pickle: datadf = pickle.load(open(args.pickle, 'rb')) else: datadf = get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded) if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoPlot-data.pickle", 'wb')) if args.raw: datadf.to_csv("NanoPlot-data.tsv.gz", sep="\t", index=False, compression="gzip") statsfile = settings["path"] + "NanoStats.txt" nanomath.write_stats(datadfs=[datadf], outputfile=statsfile) logging.info("Calculated statistics") datadf, settings = filter_data(datadf, settings) if args.barcoded: barcodes = list(datadf["barcode"].unique()) statsfile = settings["path"] + "NanoStats_barcoded.txt" nanomath.write_stats( datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=statsfile, names=barcodes) plots = [] for barc in barcodes: logging.info("Processing {}".format(barc)) settings["path"] = path.join(args.outdir, args.prefix + barc + "_") dfbarc = datadf[datadf["barcode"] == barc] settings["title"] = barc plots.extend(make_plots(dfbarc, settings)) settings["path"] = path.join(args.outdir, args.prefix) else: plots = make_plots(datadf, settings) make_report(plots, settings["path"], logfile, statsfile) logging.info("Finished!") except Exception as e: logging.error(e, exc_info=True) print("\n\n\nIf you read this then NanoPlot has crashed :-(") print( "Please report this issue at https://github.com/wdecoster/NanoPlot/issues" ) print("If you include the log file that would be really helpful.") print("Thanks!\n\n\n") raise
def run_tests(): """Test functions using testdata from the nanotest repo.""" nanoget.get_input("bam", ["nanotest/alignment.bam"]) nanoget.get_input("bam", ["nanotest/alignment.bam"], keep_supp=False) nanoget.get_input("fastq_rich", ["nanotest/reads.fastq.gz"]) nanoget.get_input("summary", ["nanotest/sequencing_summary.txt"], combine="track") nanoget.get_input("fastq_minimal", ["nanotest/reads.fastq.gz"]) nanoget.get_input("fastq", ["nanotest/reads.fastq.gz"]) nanoget.get_input("fasta", ["nanotest/reads.fa.gz"])
def main(): args = getArgs() fqin = get_input(args.fastqfile.name) splitFq(fqin, args)