Python combine_dfs Exemples, nanoget.combine_dfs Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : NanoStat.py Projet : wdecoster/nanostat

def main():
    args = get_args()
    if args.outdir and not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    if args.feather:
        from nanoget import combine_dfs
        from pandas import read_feather
        datadf = combine_dfs([read_feather(p) for p in args.feather],
                             method="simple")
    else:
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }
        datadf = nanoget.get_input(source=[n for n, s in sources.items()
                                           if s][0],
                                   files=[f for f in sources.values() if f][0],
                                   threads=args.threads,
                                   readtype=args.readtype,
                                   combine="track",
                                   barcoded=args.barcoded,
                                   keep_supp=not (args.no_supplementary))
    if args.barcoded:
        barcodes = list(datadf["barcode"].unique())
        write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes],
                    outputfile=os.path.join(args.outdir, args.name),
                    names=barcodes)
    write_stats(datadfs=[datadf],
                outputfile=os.path.join(args.outdir, args.name),
                as_tsv=args.tsv)

Exemple #2

0

Afficher le fichier

Fichier : NanoPlot.py Projet : iliasbukraa/NanoPlot

def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        # args.format = nanoplotter.check_valid_format(args.format)
        if args.pickle:
            datadf = pickle.load(open(args.pickle, 'rb'))
        elif args.feather:
            from nanoget import combine_dfs
            from pandas import read_feather
            datadf = combine_dfs([read_feather(p) for p in args.feather], method="simple")
        else:
            sources = {
                "fastq": args.fastq,
                "bam": args.bam,
                "cram": args.cram,
                "fastq_rich": args.fastq_rich,
                "fastq_minimal": args.fastq_minimal,
                "summary": args.summary,
                "fasta": args.fasta,
                "ubam": args.ubam,
            }
            datadf = get_input(
                source=[n for n, s in sources.items() if s][0],
                files=[f for f in sources.values() if f][0],
                threads=args.threads,
                readtype=args.readtype,
                combine="simple",
                barcoded=args.barcoded,
                huge=args.huge,
                keep_supp=not (args.no_supplementary))
        if args.store:
            pickle.dump(
                obj=datadf,
                file=open(settings["path"] + "NanoPlot-data.pickle", 'wb'))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")

        settings["statsfile"] = [make_stats(datadf, settings, suffix="", tsv_stats=args.tsv_stats)]
        datadf, settings = filter_and_transform_data(datadf, settings)
        if settings["filtered"]:  # Bool set when filter was applied in filter_and_transform_data()
            settings["statsfile"].append(
                make_stats(datadf[datadf["length_filter"]], settings,
                           suffix="_post_filtering", tsv_stats=args.tsv_stats)
            )

        if args.barcoded:
            main_path = settings["path"]
            barcodes = list(datadf["barcode"].unique())
            plots = []
            for barc in barcodes:
                logging.info("Processing {}".format(barc))
                dfbarc = datadf[datadf["barcode"] == barc]
                if len(dfbarc) > 5:
                    settings["title"] = barc
                    settings["path"] = path.join(args.outdir, args.prefix + barc + "_")
                    plots.append(report.BarcodeTitle(barc))
                    plots.extend(
                        make_plots(dfbarc, settings)
                    )
                else:
                    sys.stderr.write("Found barcode {} less than 5x, ignoring...\n".format(barc))
                    logging.info("Found barcode {} less than 5 times, ignoring".format(barc))
            settings["path"] = main_path
        else:
            plots = make_plots(datadf, settings)
        make_report(plots, settings)
        logging.info("Finished!")
    except Exception as e:
        logging.error(e, exc_info=True)
        print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format(__version__))
        print("Please try updating NanoPlot and see if that helps...\n")
        print("If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues")
        print("If you could include the log file that would be really helpful.")
        print("Thanks!\n\n\n")
        raise

Exemple #3

0

Afficher le fichier

Fichier : NanoComp.py Projet : iliasbukraa/nanocomp

def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }
        if args.split_runs:
            split_dict = utils.validate_split_runs_file(args.split_runs)
        if args.pickle:
            from nanoget import combine_dfs
            datadf = combine_dfs(
                dfs=[pickle.load(open(p, 'rb')) for p in args.pickle],
                names=args.names,
                method="track")
        elif args.feather:
            from nanoget import combine_dfs
            from pandas import read_feather
            datadf = combine_dfs([read_feather(p) for p in args.feather],
                                 names=args.names or args.feather,
                                 method="track")
        else:
            datadf = nanoget.get_input(
                source=[n for n, s in sources.items() if s][0],
                files=[f for f in sources.values() if f][0],
                threads=args.threads,
                readtype=args.readtype,
                names=args.names,
                barcoded=args.barcoded,
                combine="track")
        datadf, settings = filter_and_transform_data(datadf, vars(args))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoComp-data.pickle",
                                  'wb'))
        if args.split_runs:
            utils.change_identifiers(datadf, split_dict)
        if args.barcoded:
            datadf["dataset"] = datadf["barcode"]
        identifiers = list(datadf["dataset"].unique())
        stats_df = write_stats(
            datadfs=[datadf[datadf["dataset"] == i] for i in identifiers],
            outputfile=settings["path"] + "NanoStats.txt",
            names=identifiers,
            as_tsv=args.tsv_stats)
        if args.plot != 'false':
            plots = make_plots(datadf, settings)
            make_report(plots, settings["path"], stats_df=stats_df)
        logging.info("Succesfully processed all input.")
    except Exception as e:
        logging.error(e, exc_info=True)
        raise