def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        args.format = utils.check_valid_format(args.format)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }
        if args.split_runs:
            split_dict = utils.validate_split_runs_file(args.split_runs)
        datadf = nanoget.get_input(source=[n for n, s in sources.items()
                                           if s][0],
                                   files=[f for f in sources.values() if f][0],
                                   threads=args.threads,
                                   readtype=args.readtype,
                                   names=args.names,
                                   barcoded=args.barcoded,
                                   combine="track")
        datadf, settings = filter_and_transform_data(datadf, vars(args))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoComp-data.pickle",
                                  'wb'))
        if args.split_runs:
            utils.change_identifiers(datadf, split_dict)
        if args.barcoded:
            datadf["dataset"] = datadf["barcode"]
        identifiers = list(datadf["dataset"].unique())
        write_stats(
            datadfs=[datadf[datadf["dataset"] == i] for i in identifiers],
            outputfile=settings["path"] + "NanoStats.txt",
            names=identifiers)
        if args.plot != 'false':
            plots = make_plots(datadf, settings)
            make_report(plots, settings["path"])
        logging.info("Succesfully processed all input.")
    except Exception as e:
        logging.error(e, exc_info=True)
        raise
Example #2
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        args.format = nanoplotter.check_valid_format(args.format)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "fastq_rich": args.fastq_rich,
            "fastq_minimal": args.fastq_minimal,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }

        if args.pickle:
            datadf = pickle.load(open(args.pickle, 'rb'))
        else:
            datadf = get_input(source=[n for n, s in sources.items() if s][0],
                               files=[f for f in sources.values() if f][0],
                               threads=args.threads,
                               readtype=args.readtype,
                               combine="simple",
                               barcoded=args.barcoded,
                               huge=args.huge)
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoPlot-data.pickle",
                                  'wb'))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")

        settings["statsfile"] = [make_stats(datadf, settings, suffix="")]
        datadf, settings = filter_and_transform_data(datadf, settings)
        if settings[
                "filtered"]:  # Bool set when filter was applied in filter_and_transform_data()
            settings["statsfile"].append(
                make_stats(datadf[datadf["length_filter"]],
                           settings,
                           suffix="_post_filtering"))

        if args.barcoded:
            barcodes = list(datadf["barcode"].unique())
            plots = []
            for barc in barcodes:
                logging.info("Processing {}".format(barc))
                settings["path"] = path.join(args.outdir,
                                             args.prefix + barc + "_")
                dfbarc = datadf[datadf["barcode"] == barc]
                if len(dfbarc) > 5:
                    settings["title"] = barc
                    plots.extend(make_plots(dfbarc, settings))
                else:
                    sys.stderr.write(
                        "Found barcode {} less than 5x, ignoring...\n".format(
                            barc))
                    logging.info(
                        "Found barcode {} less than 5 times, ignoring".format(
                            barc))
        else:
            plots = make_plots(datadf, settings)
        make_report(plots, settings)
        logging.info("Finished!")
    except Exception as e:
        logging.error(e, exc_info=True)
        print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format(
            __version__))
        print("Please try updating NanoPlot and see if that helps...\n")
        print(
            "If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues"
        )
        print(
            "If you could include the log file that would be really helpful.")
        print("Thanks!\n\n\n")
        raise