Ejemplo n.º 1
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    args = get_args()

    sources = {
        "fastq": args.fastq,
        "bam": args.bam,
        "cram": args.cram,
        "fastq_rich": args.fastq_rich,
        "fastq_minimal": args.fastq_minimal,
        "summary": args.summary,
        "fasta": args.fasta,
        "ubam": args.ubam,
    }

    get_input(
        source=[n for n, s in sources.items() if s][0],
        files=[f for f in sources.values() if f][0],
        threads=args.threads,
        readtype=args.readtype,
        combine="simple",
        barcoded=args.barcoded,
        huge=args.huge,
        keep_supp=not(args.no_supplementary)) \
        .to_feather(args.output)
Ejemplo n.º 2
0
def main():
    args = get_args()
    if args.outdir and not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    if args.feather:
        from nanoget import combine_dfs
        from pandas import read_feather
        datadf = combine_dfs([read_feather(p) for p in args.feather],
                             method="simple")
    else:
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }
        datadf = nanoget.get_input(source=[n for n, s in sources.items()
                                           if s][0],
                                   files=[f for f in sources.values() if f][0],
                                   threads=args.threads,
                                   readtype=args.readtype,
                                   combine="track",
                                   barcoded=args.barcoded,
                                   keep_supp=not (args.no_supplementary))
    if args.barcoded:
        barcodes = list(datadf["barcode"].unique())
        write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes],
                    outputfile=os.path.join(args.outdir, args.name),
                    names=barcodes)
    write_stats(datadfs=[datadf],
                outputfile=os.path.join(args.outdir, args.name),
                as_tsv=args.tsv)
Ejemplo n.º 3
0
def main():
    args = get_args()
    merged_df = get_input(source="summary", files=args.summary).set_index("readIDs") \
        .merge(right=get_input(source="bam", files=args.bam).set_index("readIDs"),
               how="left",
               left_index=True,
               right_index=True)
    plot_retrotect(df=merged_df,
                   path=path.join(args.outdir, args.prefix),
                   figformat=args.format,
                   title=args.title,
                   hours=args.hours)
    merged_df.dropna(axis="index", how="any").sort_values(by="start_time").to_csv(
        path_or_buf=path.join(args.outdir, args.prefix) + "Retrotect_details.txt.gz",
        sep="\t",
        columns=["start_time"],
        compression='gzip')
Ejemplo n.º 4
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        args.format = utils.check_valid_format(args.format)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }
        if args.split_runs:
            split_dict = utils.validate_split_runs_file(args.split_runs)
        datadf = nanoget.get_input(source=[n for n, s in sources.items()
                                           if s][0],
                                   files=[f for f in sources.values() if f][0],
                                   threads=args.threads,
                                   readtype=args.readtype,
                                   names=args.names,
                                   barcoded=args.barcoded,
                                   combine="track")
        datadf, settings = filter_and_transform_data(datadf, vars(args))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoComp-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoComp-data.pickle",
                                  'wb'))
        if args.split_runs:
            utils.change_identifiers(datadf, split_dict)
        if args.barcoded:
            datadf["dataset"] = datadf["barcode"]
        identifiers = list(datadf["dataset"].unique())
        write_stats(
            datadfs=[datadf[datadf["dataset"] == i] for i in identifiers],
            outputfile=settings["path"] + "NanoStats.txt",
            names=identifiers)
        if args.plot != 'false':
            plots = make_plots(datadf, settings)
            make_report(plots, settings["path"])
        logging.info("Succesfully processed all input.")
    except Exception as e:
        logging.error(e, exc_info=True)
        raise
Ejemplo n.º 5
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    args = get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args, tool="NanoComp")
        args.format = nanoplotter.check_valid_format(args.format)
        settings = vars(args)
        settings["path"] = path.join(args.outdir, args.prefix)
        sources = [args.fastq, args.bam, args.summary, args.fasta]
        sourcename = ["fastq", "bam", "summary", "fasta"]
        if args.split_runs:
            split_dict = validate_split_runs_file(args.split_runs)
        datadf = nanoget.get_input(
            source=[n for n, s in zip(sourcename, sources) if s][0],
            files=[f for f in sources if f][0],
            threads=args.threads,
            readtype=args.readtype,
            names=args.names,
            barcoded=args.barcoded,
            combine="track")
        if args.raw:
            datadf.to_csv("NanoComp-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")
        if args.split_runs:
            change_identifiers(datadf, split_dict)
        if args.barcoded:
            datadf["dataset"] = datadf["barcode"]
        identifiers = list(datadf["dataset"].unique())
        write_stats(
            datadfs=[datadf[datadf["dataset"] == i] for i in identifiers],
            outputfile=settings["path"] + "NanoStats.txt",
            names=identifiers)
        plots = make_plots(datadf, settings)
        make_report(plots, path.join(args.outdir, args.prefix))
        logging.info("Succesfully processed all input.")
    except Exception as e:
        logging.error(e, exc_info=True)
        raise
Ejemplo n.º 6
0
def main():
    args = get_args()
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    sources = [args.fastq, args.bam, args.summary]
    sourcename = ["fastq", "bam", "summary"]
    datadf = nanoget.get_input(
        source=[n for n, s in zip(sourcename, sources) if s][0],
        files=[f for f in sources if f][0],
        threads=args.threads,
        readtype=args.readtype,
        combine="track")
    if args.name:
        output = args.name
    else:
        output = os.path.join(args.outdir, args.prefix + "NanoStats.txt")
    write_stats([datadf], output)
Ejemplo n.º 7
0
def main():
    args = get_args()
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    sources = [args.fastq, args.bam, args.summary, args.fasta]
    sourcename = ["fastq", "bam", "summary", "fasta"]
    datadf = nanoget.get_input(
        source=[n for n, s in zip(sourcename, sources) if s][0],
        files=[f for f in sources if f][0],
        threads=args.threads,
        readtype=args.readtype,
        combine="track",
        barcoded=args.barcoded)
    if args.barcoded:
        barcodes = list(datadf["barcode"].unique())
        write_stats(datadfs=[datadf[datadf["barcode"] == b] for b in barcodes],
                    outputfile=args.name,
                    names=barcodes)
    write_stats(datadfs=[datadf], outputfile=args.name)
Ejemplo n.º 8
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        args.format = nanoplotter.check_valid_format(args.format)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "fastq_rich": args.fastq_rich,
            "fastq_minimal": args.fastq_minimal,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }

        if args.pickle:
            datadf = pickle.load(open(args.pickle, 'rb'))
        else:
            datadf = get_input(source=[n for n, s in sources.items() if s][0],
                               files=[f for f in sources.values() if f][0],
                               threads=args.threads,
                               readtype=args.readtype,
                               combine="simple",
                               barcoded=args.barcoded,
                               huge=args.huge)
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoPlot-data.pickle",
                                  'wb'))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")

        settings["statsfile"] = [make_stats(datadf, settings, suffix="")]
        datadf, settings = filter_and_transform_data(datadf, settings)
        if settings[
                "filtered"]:  # Bool set when filter was applied in filter_and_transform_data()
            settings["statsfile"].append(
                make_stats(datadf[datadf["length_filter"]],
                           settings,
                           suffix="_post_filtering"))

        if args.barcoded:
            barcodes = list(datadf["barcode"].unique())
            plots = []
            for barc in barcodes:
                logging.info("Processing {}".format(barc))
                settings["path"] = path.join(args.outdir,
                                             args.prefix + barc + "_")
                dfbarc = datadf[datadf["barcode"] == barc]
                if len(dfbarc) > 5:
                    settings["title"] = barc
                    plots.extend(make_plots(dfbarc, settings))
                else:
                    sys.stderr.write(
                        "Found barcode {} less than 5x, ignoring...\n".format(
                            barc))
                    logging.info(
                        "Found barcode {} less than 5 times, ignoring".format(
                            barc))
        else:
            plots = make_plots(datadf, settings)
        make_report(plots, settings)
        logging.info("Finished!")
    except Exception as e:
        logging.error(e, exc_info=True)
        print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format(
            __version__))
        print("Please try updating NanoPlot and see if that helps...\n")
        print(
            "If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues"
        )
        print(
            "If you could include the log file that would be really helpful.")
        print("Thanks!\n\n\n")
        raise
Ejemplo n.º 9
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    args = get_args()
    try:
        utils.make_output_dir(args.outdir)
        logfile = utils.init_logs(args)
        args.format = nanoplotter.check_valid_format(args.format)
        settings = vars(args)
        settings["path"] = path.join(args.outdir, args.prefix)
        sources = [
            args.fastq, args.bam, args.fastq_rich, args.fastq_minimal,
            args.summary
        ]
        sourcename = ["fastq", "bam", "fastq_rich", "fastq_minimal", "summary"]
        if args.pickle:
            datadf = pickle.load(open(args.pickle, 'rb'))
        else:
            datadf = get_input(
                source=[n for n, s in zip(sourcename, sources) if s][0],
                files=[f for f in sources if f][0],
                threads=args.threads,
                readtype=args.readtype,
                combine="simple",
                barcoded=args.barcoded)
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoPlot-data.pickle",
                                  'wb'))
        if args.raw:
            datadf.to_csv("NanoPlot-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")
        statsfile = settings["path"] + "NanoStats.txt"
        nanomath.write_stats(datadfs=[datadf], outputfile=statsfile)
        logging.info("Calculated statistics")
        datadf, settings = filter_data(datadf, settings)
        if args.barcoded:
            barcodes = list(datadf["barcode"].unique())
            statsfile = settings["path"] + "NanoStats_barcoded.txt"
            nanomath.write_stats(
                datadfs=[datadf[datadf["barcode"] == b] for b in barcodes],
                outputfile=statsfile,
                names=barcodes)
            plots = []
            for barc in barcodes:
                logging.info("Processing {}".format(barc))
                settings["path"] = path.join(args.outdir,
                                             args.prefix + barc + "_")
                dfbarc = datadf[datadf["barcode"] == barc]
                settings["title"] = barc
                plots.extend(make_plots(dfbarc, settings))
            settings["path"] = path.join(args.outdir, args.prefix)
        else:
            plots = make_plots(datadf, settings)
        make_report(plots, settings["path"], logfile, statsfile)
        logging.info("Finished!")
    except Exception as e:
        logging.error(e, exc_info=True)
        print("\n\n\nIf you read this then NanoPlot has crashed :-(")
        print(
            "Please report this issue at https://github.com/wdecoster/NanoPlot/issues"
        )
        print("If you include the log file that would be really helpful.")
        print("Thanks!\n\n\n")
        raise
Ejemplo n.º 10
0
def run_tests():
    """Test functions using testdata from the nanotest repo."""
    nanoget.get_input("bam", ["nanotest/alignment.bam"])
    nanoget.get_input("bam", ["nanotest/alignment.bam"], keep_supp=False)
    nanoget.get_input("fastq_rich", ["nanotest/reads.fastq.gz"])
    nanoget.get_input("summary", ["nanotest/sequencing_summary.txt"],
                      combine="track")
    nanoget.get_input("fastq_minimal", ["nanotest/reads.fastq.gz"])
    nanoget.get_input("fastq", ["nanotest/reads.fastq.gz"])
    nanoget.get_input("fasta", ["nanotest/reads.fa.gz"])
Ejemplo n.º 11
0
def main():
    args = getArgs()
    fqin = get_input(args.fastqfile.name)
    splitFq(fqin, args)