def main(): args = get_args() if args.outdir and not os.path.exists(args.outdir): os.makedirs(args.outdir) if args.feather: from nanoget import combine_dfs from pandas import read_feather datadf = combine_dfs([read_feather(p) for p in args.feather], method="simple") else: sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } datadf = nanoget.get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="track", barcoded=args.barcoded, keep_supp=not (args.no_supplementary)) if args.barcoded: barcodes = list(datadf["barcode"].unique()) write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=os.path.join(args.outdir, args.name), names=barcodes) write_stats(datadfs=[datadf], outputfile=os.path.join(args.outdir, args.name), as_tsv=args.tsv)
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) args.format = utils.check_valid_format(args.format) sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } if args.split_runs: split_dict = utils.validate_split_runs_file(args.split_runs) datadf = nanoget.get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, names=args.names, barcoded=args.barcoded, combine="track") datadf, settings = filter_and_transform_data(datadf, vars(args)) if args.raw: datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz", sep="\t", index=False, compression="gzip") if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoComp-data.pickle", 'wb')) if args.split_runs: utils.change_identifiers(datadf, split_dict) if args.barcoded: datadf["dataset"] = datadf["barcode"] identifiers = list(datadf["dataset"].unique()) write_stats( datadfs=[datadf[datadf["dataset"] == i] for i in identifiers], outputfile=settings["path"] + "NanoStats.txt", names=identifiers) if args.plot != 'false': plots = make_plots(datadf, settings) make_report(plots, settings["path"]) logging.info("Succesfully processed all input.") except Exception as e: logging.error(e, exc_info=True) raise
def make_stats(datadf, settings, suffix): statsfile = settings["path"] + "NanoStats" + suffix + ".txt" nanomath.write_stats(datadfs=[datadf], outputfile=statsfile) logging.info("Calculated statistics") if settings["barcoded"]: barcodes = list(datadf["barcode"].unique()) statsfile = settings["path"] + "NanoStats_barcoded.txt" nanomath.write_stats( datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=statsfile, names=barcodes) return statsfile
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' args = get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args, tool="NanoComp") args.format = nanoplotter.check_valid_format(args.format) settings = vars(args) settings["path"] = path.join(args.outdir, args.prefix) sources = [args.fastq, args.bam, args.summary, args.fasta] sourcename = ["fastq", "bam", "summary", "fasta"] if args.split_runs: split_dict = validate_split_runs_file(args.split_runs) datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, names=args.names, barcoded=args.barcoded, combine="track") if args.raw: datadf.to_csv("NanoComp-data.tsv.gz", sep="\t", index=False, compression="gzip") if args.split_runs: change_identifiers(datadf, split_dict) if args.barcoded: datadf["dataset"] = datadf["barcode"] identifiers = list(datadf["dataset"].unique()) write_stats( datadfs=[datadf[datadf["dataset"] == i] for i in identifiers], outputfile=settings["path"] + "NanoStats.txt", names=identifiers) plots = make_plots(datadf, settings) make_report(plots, path.join(args.outdir, args.prefix)) logging.info("Succesfully processed all input.") except Exception as e: logging.error(e, exc_info=True) raise
def main(): args = get_args() if not os.path.exists(args.outdir): os.makedirs(args.outdir) sources = [args.fastq, args.bam, args.summary] sourcename = ["fastq", "bam", "summary"] datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="track") if args.name: output = args.name else: output = os.path.join(args.outdir, args.prefix + "NanoStats.txt") write_stats([datadf], output)
def main(): args = get_args() if not os.path.exists(args.outdir): os.makedirs(args.outdir) sources = [args.fastq, args.bam, args.summary, args.fasta] sourcename = ["fastq", "bam", "summary", "fasta"] datadf = nanoget.get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="track", barcoded=args.barcoded) if args.barcoded: barcodes = list(datadf["barcode"].unique()) write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=args.name, names=barcodes) write_stats(datadfs=[datadf], outputfile=args.name)
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' args = get_args() try: utils.make_output_dir(args.outdir) logfile = utils.init_logs(args) args.format = nanoplotter.check_valid_format(args.format) settings = vars(args) settings["path"] = path.join(args.outdir, args.prefix) sources = [ args.fastq, args.bam, args.fastq_rich, args.fastq_minimal, args.summary ] sourcename = ["fastq", "bam", "fastq_rich", "fastq_minimal", "summary"] if args.pickle: datadf = pickle.load(open(args.pickle, 'rb')) else: datadf = get_input( source=[n for n, s in zip(sourcename, sources) if s][0], files=[f for f in sources if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded) if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoPlot-data.pickle", 'wb')) if args.raw: datadf.to_csv("NanoPlot-data.tsv.gz", sep="\t", index=False, compression="gzip") statsfile = settings["path"] + "NanoStats.txt" nanomath.write_stats(datadfs=[datadf], outputfile=statsfile) logging.info("Calculated statistics") datadf, settings = filter_data(datadf, settings) if args.barcoded: barcodes = list(datadf["barcode"].unique()) statsfile = settings["path"] + "NanoStats_barcoded.txt" nanomath.write_stats( datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=statsfile, names=barcodes) plots = [] for barc in barcodes: logging.info("Processing {}".format(barc)) settings["path"] = path.join(args.outdir, args.prefix + barc + "_") dfbarc = datadf[datadf["barcode"] == barc] settings["title"] = barc plots.extend(make_plots(dfbarc, settings)) settings["path"] = path.join(args.outdir, args.prefix) else: plots = make_plots(datadf, settings) make_report(plots, settings["path"], logfile, statsfile) logging.info("Finished!") except Exception as e: logging.error(e, exc_info=True) print("\n\n\nIf you read this then NanoPlot has crashed :-(") print( "Please report this issue at https://github.com/wdecoster/NanoPlot/issues" ) print("If you include the log file that would be really helpful.") print("Thanks!\n\n\n") raise