def main(): args = get_args() if args.outdir and not os.path.exists(args.outdir): os.makedirs(args.outdir) if args.feather: from nanoget import combine_dfs from pandas import read_feather datadf = combine_dfs([read_feather(p) for p in args.feather], method="simple") else: sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } datadf = nanoget.get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="track", barcoded=args.barcoded, keep_supp=not (args.no_supplementary)) if args.barcoded: barcodes = list(datadf["barcode"].unique()) write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes], outputfile=os.path.join(args.outdir, args.name), names=barcodes) write_stats(datadfs=[datadf], outputfile=os.path.join(args.outdir, args.name), as_tsv=args.tsv)
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) # args.format = nanoplotter.check_valid_format(args.format) if args.pickle: datadf = pickle.load(open(args.pickle, 'rb')) elif args.feather: from nanoget import combine_dfs from pandas import read_feather datadf = combine_dfs([read_feather(p) for p in args.feather], method="simple") else: sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "fastq_rich": args.fastq_rich, "fastq_minimal": args.fastq_minimal, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } datadf = get_input( source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded, huge=args.huge, keep_supp=not (args.no_supplementary)) if args.store: pickle.dump( obj=datadf, file=open(settings["path"] + "NanoPlot-data.pickle", 'wb')) if args.raw: datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz", sep="\t", index=False, compression="gzip") settings["statsfile"] = [make_stats(datadf, settings, suffix="", tsv_stats=args.tsv_stats)] datadf, settings = filter_and_transform_data(datadf, settings) if settings["filtered"]: # Bool set when filter was applied in filter_and_transform_data() settings["statsfile"].append( make_stats(datadf[datadf["length_filter"]], settings, suffix="_post_filtering", tsv_stats=args.tsv_stats) ) if args.barcoded: main_path = settings["path"] barcodes = list(datadf["barcode"].unique()) plots = [] for barc in barcodes: logging.info("Processing {}".format(barc)) dfbarc = datadf[datadf["barcode"] == barc] if len(dfbarc) > 5: settings["title"] = barc settings["path"] = path.join(args.outdir, args.prefix + barc + "_") plots.append(report.BarcodeTitle(barc)) plots.extend( make_plots(dfbarc, settings) ) else: sys.stderr.write("Found barcode {} less than 5x, ignoring...\n".format(barc)) logging.info("Found barcode {} less than 5 times, ignoring".format(barc)) settings["path"] = main_path else: plots = make_plots(datadf, settings) make_report(plots, settings) logging.info("Finished!") except Exception as e: logging.error(e, exc_info=True) print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format(__version__)) print("Please try updating NanoPlot and see if that helps...\n") print("If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues") print("If you could include the log file that would be really helpful.") print("Thanks!\n\n\n") raise
def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } if args.split_runs: split_dict = utils.validate_split_runs_file(args.split_runs) if args.pickle: from nanoget import combine_dfs datadf = combine_dfs( dfs=[pickle.load(open(p, 'rb')) for p in args.pickle], names=args.names, method="track") elif args.feather: from nanoget import combine_dfs from pandas import read_feather datadf = combine_dfs([read_feather(p) for p in args.feather], names=args.names or args.feather, method="track") else: datadf = nanoget.get_input( source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, names=args.names, barcoded=args.barcoded, combine="track") datadf, settings = filter_and_transform_data(datadf, vars(args)) if args.raw: datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz", sep="\t", index=False, compression="gzip") if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoComp-data.pickle", 'wb')) if args.split_runs: utils.change_identifiers(datadf, split_dict) if args.barcoded: datadf["dataset"] = datadf["barcode"] identifiers = list(datadf["dataset"].unique()) stats_df = write_stats( datadfs=[datadf[datadf["dataset"] == i] for i in identifiers], outputfile=settings["path"] + "NanoStats.txt", names=identifiers, as_tsv=args.tsv_stats) if args.plot != 'false': plots = make_plots(datadf, settings) make_report(plots, settings["path"], stats_df=stats_df) logging.info("Succesfully processed all input.") except Exception as e: logging.error(e, exc_info=True) raise