Example #1
0
def make_reports(inputDir, overwrite = False):
    """
    Makes HTML reports for all valid input files found in the supplied directory.

    Notes
    ------
    A valid input file:

    - ends with ``.tsv``
    - does not have an accompanying file with the same path that ends in ``.html``

    Parameters
    ----------
    inputDir: str
        path to directory to search for input files
    """
    for input in find_input_IR_tsvs(inputDir):
        params = {}
        input_basename = os.path.splitext(input)[0]

        # check for params files
        input_json = input_basename + ".json"
        if os.path.exists(input_json):
            with open(input_json) as f:
                params.update(json.load(f))

        # check for output already exists
        output = input_basename + ".html"
        if os.path.exists(output):
            if overwrite is True:
                print(">>> Overwriting output: {0}".format(output))
                report.make_report(input = input, output = output, params = params)
        else:
            print(">>> Generating output: {0}".format(output))
            report.make_report(input = input, output = output, params = params)
Example #2
0
def make_report_for_olympiad(dir_name):
    src_dir = '../data/' + dir_name
    out_dir = '../out/' + dir_name

    results_dir = out_dir + '/results'
    os.makedirs(results_dir, exist_ok=True)

    funcs = get_all_evaluation_functions()
    for i in range(len(funcs)):
        out_file_path = results_dir + '/out_%s.json' % i
        funcs[i](src_dir, out_file_path)

    file_report_name = out_dir + '/' + dir_name + '.html'
    make_report(results_dir,
                file_report_name,
                report_title='Olympiad from "%s"' % dir_name)
Example #3
0
def main():
    if args.vcs:
        try:
            clone_repository(args.vcs, args.path)
        except GitError as err:
            logger.critical(err)
            exit(1)
    ast_trees = get_trees(get_filenames(args.path))

    if args.words == "verbs":
        words = get_verbs_from_trees(ast_trees)
    else:
        words = get_nouns_from_trees(ast_trees)

    print('-' * 80)
    print('total %s words, %s unique' % (len(words), len(set(words))))
    print('-' * 80)

    top_size = args.count if args.count else None

    make_report(args.format, args.report_file,
                collections.Counter(words).most_common(top_size))
Example #4
0
def main():
    start_time = datetime.now()

    local_db.create_db()
    local_db.initial_scan()
    local_db.add_audit()
    import_scripts()

    network_analysis = retrieve_audit_info()
    if network_analysis != None:
        local_db.add_SNMP_SSH_info(network_analysis[0], network_analysis[1])
    else:
        print('Warning: SNMP and SSH services is unavailable')

    end_time = datetime.now()
    duration = end_time - start_time

    local_db.add_time(start_time.time().isoformat(timespec='seconds'),
                      end_time.time().isoformat(timespec='seconds'),
                      round(duration.total_seconds(), 2))
    make_report()
    local_db.close()
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        description="Create a variant calling report.")

    parser.add_argument("input",
                        help="Input file in GA4GH metrics format",
                        nargs="*")

    parser.add_argument(
        "-o",
        "--output",
        help="Output file name for reports, e.g. 'report' to write "
        "report.html",
        required=True)

    parser.add_argument("-m",
                        "--comparison-method",
                        default="default",
                        dest="comparison_method",
                        help="The comparison method that was used.")

    parser.add_argument(
        "-l",
        "--result-list",
        default=[],
        dest="result_list",
        action="append",
        help="Result list in delimited format. Must have these columns: "
        "method, comparisonmethod, and files.")

    parser.add_argument(
        "--roc-max-datapoints",
        help=
        "Maximum number of data points in a ROC (higher numbers might slow down our plotting)",
        dest="roc_datapoints",
        type=int,
        default=1000)
    parser.add_argument(
        "--roc-resolution",
        help=
        "Minimum difference in precision / recall covered by the ROC curves.",
        dest="roc_diff",
        type=float,
        default=0.005)
    parser.add_argument(
        "--min-recall",
        help=
        "Minimum recall for ROC curves (use to reduce size of output file by "
        "clipping the bits of the ROC that are not meaningful)",
        dest="min_recall",
        type=float,
        default=0.2)
    parser.add_argument(
        "--min-precision",
        help=
        "Minimum precision for ROC curves (use to reduce size of output file by"
        " clipping the bits of the ROC that are not meaningful)",
        dest="min_precision",
        type=float,
        default=0.0)

    args = parser.parse_args()

    if args.output.endswith(".gz"):
        args.output = gzip.GzipFile(args.output, "w")
    elif not args.output.endswith(".html"):
        args.output += ".html"

    if args.input:
        metrics = report.metrics.read_qfy_csv(args.input,
                                              args.comparison_method)
    else:
        metrics = []

    for l in args.result_list:
        print "reading %s" % l
        csvfile = open(l, 'rb')
        dialect = csv.Sniffer().sniff(csvfile.read(8192))
        csvfile.seek(0)
        dr = csv.DictReader(csvfile, dialect=dialect)
        for row in dr:
            rfiles = [x.strip() for x in row["files"].split(",")]
            for i, rfile in enumerate(rfiles):
                if not os.path.exists(rfile):
                    rfiles[i] = os.path.abspath(
                        os.path.join(os.path.dirname(l), rfile))
            row_metrics = report.metrics.read_qfy_csv(
                rfiles,
                method=row["method"],
                cmethod=row["comparisonmethod"],
                roc_metrics=["METRIC.Precision", "METRIC.Recall"],
                roc_diff=args.roc_diff,
                max_data_points=args.roc_datapoints,
                minmax={
                    "METRIC.Precision": {
                        "min": args.min_precision
                    },
                    "METRIC.Recall": {
                        "min": args.min_recall
                    }
                })
            metrics = metrics + row_metrics

    loader = jinja2.FileSystemLoader(searchpath=TEMPLATEDIR)
    env = jinja2.Environment(loader=loader)

    template_vars = {"content": report.make_report(metrics)}

    if not metrics:
        raise Exception("No inputs specified.")

    template = env.get_template("report.jinja2.html")
    template.stream(**template_vars).dump(args.output)
Example #6
0
        except Exception as e:
            print(f"problem dataset {i + 1}: {e}")
            traceback.print_exc()
            pass

        print()

    scores = pd.DataFrame(scores)
    print(scores)
    print(scores.sum() / len(scores))
    scores.to_csv(output_fname, index=False)


if __name__ == '__main__':
    real_data_path = "../data/with_class"
    synth_data_path = "../data/synthetic"

    real_data = util.read_data(real_data_path)
    synthetic_data = util.read_data(synth_data_path)

    start = datetime.datetime.now()
    print(start)

    os.mkdir(f'exp{start}')

    test(synthetic_data, f"exp{start}/synth_test.csv")
    test(real_data, f"exp{start}/real_test.csv")
    print(datetime.datetime.now())

    make_report(f'exp{start}')
Example #7
0
def main():
    parser = argparse.ArgumentParser(description="Create a variant calling report.")

    parser.add_argument("input", help="Input file in GA4GH metrics format", nargs="*")

    parser.add_argument(
        "-o", "--output", help="Output file name for reports, e.g. 'report' to write " "report.html", required=True
    )

    parser.add_argument(
        "-m",
        "--comparison-method",
        default="default",
        dest="comparison_method",
        help="The comparison method that was used.",
    )

    parser.add_argument(
        "-l",
        "--result-list",
        default=[],
        dest="result_list",
        action="append",
        help="Result list in delimited format. Must have these columns: " "method, comparisonmethod, and files.",
    )

    parser.add_argument(
        "--roc-max-datapoints",
        help="Maximum number of data points in a ROC (higher numbers might slow down our plotting)",
        dest="roc_datapoints",
        type=int,
        default=1000,
    )
    parser.add_argument(
        "--roc-resolution",
        help="Minimum difference in precision / recall covered by the ROC curves.",
        dest="roc_diff",
        type=float,
        default=0.005,
    )
    parser.add_argument(
        "--min-recall",
        help="Minimum recall for ROC curves (use to reduce size of output file by "
        "clipping the bits of the ROC that are not meaningful)",
        dest="min_recall",
        type=float,
        default=0.2,
    )
    parser.add_argument(
        "--min-precision",
        help="Minimum precision for ROC curves (use to reduce size of output file by"
        " clipping the bits of the ROC that are not meaningful)",
        dest="min_precision",
        type=float,
        default=0.0,
    )

    args = parser.parse_args()

    if args.output.endswith(".gz"):
        args.output = gzip.GzipFile(args.output, "w")
    elif not args.output.endswith(".html"):
        args.output += ".html"

    if args.input:
        metrics = report.metrics.read_qfy_csv(args.input, args.comparison_method)
    else:
        metrics = []

    for l in args.result_list:
        print "reading %s" % l
        csvfile = open(l, "rb")
        dialect = csv.Sniffer().sniff(csvfile.read(8192))
        csvfile.seek(0)
        dr = csv.DictReader(csvfile, dialect=dialect)
        for row in dr:
            rfiles = [x.strip() for x in row["files"].split(",")]
            for i, rfile in enumerate(rfiles):
                if not os.path.exists(rfile):
                    rfiles[i] = os.path.abspath(os.path.join(os.path.dirname(l), rfile))
            row_metrics = report.metrics.read_qfy_csv(
                rfiles,
                method=row["method"],
                cmethod=row["comparisonmethod"],
                roc_metrics=["METRIC.Precision", "METRIC.Recall"],
                roc_diff=args.roc_diff,
                max_data_points=args.roc_datapoints,
                minmax={"METRIC.Precision": {"min": args.min_precision}, "METRIC.Recall": {"min": args.min_recall}},
            )
            metrics = metrics + row_metrics

    loader = jinja2.FileSystemLoader(searchpath=TEMPLATEDIR)
    env = jinja2.Environment(loader=loader)

    template_vars = {"content": report.make_report(metrics)}

    if not metrics:
        raise Exception("No inputs specified.")

    template = env.get_template("report.jinja2.html")
    template.stream(**template_vars).dump(args.output)
Example #8
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s %(levelname)s %(message)s')

    parser = argparse.ArgumentParser(
        description="Benchmark a set of VCFs against "
        "Platinum Genomes and Genome in a Bottle + produce a report.")

    parser.add_argument("input", help="input VCF files", nargs="+")

    parser.add_argument("-o",
                        "--output",
                        help="output HTML file name",
                        dest="output")

    parser.add_argument(
        "-O",
        "--keep-results",
        help="Specify a folder in which to keep the hap.py results.",
        dest="keep_results",
        default=None)

    parser.add_argument("-e",
                        "--engine",
                        help="Comparison engine to use (xcmp / vcfeval)",
                        default="xcmp",
                        choices=["xcmp", "vcfeval"])

    parser.add_argument("--truthset",
                        dest="truthset",
                        help="Add truthsets in this format: 'name:vcf:bed'",
                        default=[],
                        action="append")

    parser.add_argument(
        "--stratification",
        dest="stratification",
        help=
        "Use a list of stratification regions (this should point to the TSV file). "
        "Prefix with 'fixchr:' to add chr prefix to chromosome names.",
        default=None)

    parser.add_argument("--roc",
                        help="ROC feature to use",
                        dest="roc",
                        default="QUAL")
    parser.add_argument(
        "--ignore-filters",
        help="Filters to ignore (e.g. --ignore-filters MQ,lowQual).",
        dest="roc_filters",
        default=None)

    parser.add_argument("--happy-extra",
                        help="hap.py extra arguments",
                        dest="happy_extra",
                        default=None)
    parser.add_argument("-r",
                        "--reference",
                        help="Reference FASTA file to use",
                        dest="reference",
                        default="hg19")

    parser.add_argument(
        "--roc-max-datapoints",
        help=
        "Maximum number of data points in a ROC (higher numbers might slow down our plotting)",
        dest="roc_datapoints",
        type=int,
        default=1000)

    parser.add_argument(
        "--roc-resolution",
        help=
        "Minimum difference in precision / recall covered by the ROC curves.",
        dest="roc_diff",
        type=float,
        default=0.005)
    parser.add_argument(
        "--min-recall",
        help=
        "Minimum recall for ROC curves (use to reduce size of output file by "
        "clipping the bits of the ROC that are not meaningful)",
        dest="min_recall",
        type=float,
        default=0.2)
    parser.add_argument(
        "--min-precision",
        help=
        "Minimum precision for ROC curves (use to reduce size of output file by"
        " clipping the bits of the ROC that are not meaningful)",
        dest="min_precision",
        type=float,
        default=0.0)

    args = parser.parse_args()

    if args.output.endswith(".gz"):
        args.output = gzip.GzipFile(args.output, "w")
    elif not args.output.endswith(".html"):
        args.output += ".html"

    if args.keep_results and not os.path.isdir(args.keep_results):
        raise Exception("Output folder %s does not exist." % args.keep_results)

    truthsets = []
    for x in args.truthset:
        xs = x.split(":", 2)
        if len(xs) != 3:
            raise Exception("Invalid custom truthset: %s" % x)
        truthsets.append(Truthset(xs[0], xs[1], xs[2]))

    resultlist = []
    metrics = []

    logging.debug(str(DEPS))
    logging.debug(str(args))

    try:
        results_for_report = []

        for query in args.input:
            methodname = os.path.basename(query)
            if methodname.endswith(".gz"):
                methodname = methodname[:-3]
            if methodname.endswith(".vcf"):
                methodname = methodname[:-4]
            if methodname.endswith(".bcf"):
                methodname = methodname[:-4]

            for ts in truthsets:
                logging.info("Comparing %s against %s" % (query, ts.name))
                # run with PG
                extra_args = args.happy_extra
                if extra_args is None:
                    extra_args = ""

                if args.stratification:
                    if args.stratification.startswith("fixchr:"):
                        extra_args += " --stratification-fixchr"
                        args.stratification = args.stratification[len("fixchr:"
                                                                      ):]
                    extra_args += " --stratification %s" % args.stratification

                happyfiles = happy(query,
                                   truth=ts.vcf,
                                   truth_conf=ts.bed,
                                   roc_feature=args.roc,
                                   roc_filters=args.roc_filters,
                                   engine=args.engine,
                                   ref=args.reference,
                                   extra=extra_args,
                                   output_folder=args.keep_results,
                                   output_prefix=args.engine + "_" +
                                   methodname)
                resultlist += happyfiles

                results_for_report.append({
                    "method":
                    methodname,
                    "cmethod":
                    ts.name + "-" + args.engine,
                    "files":
                    [r for r in happyfiles if r.endswith(".roc.all.csv.gz")]
                })

        if args.keep_results:
            with open(
                    os.path.join(
                        args.keep_results,
                        os.path.splitext(os.path.basename(args.output))[0] +
                        ".tsv"), "w") as f:
                print >> f, "\t".join(["method", "comparisonmethod", "files"])
                for row in results_for_report:
                    print >> f, "\t".join([
                        row["method"], row["cmethod"], ",".join(row["files"])
                    ])

        for row in results_for_report:
            row_metrics = report.metrics.read_qfy_csv(
                row["files"],
                method=row["method"],
                cmethod=row["cmethod"],
                roc_metrics=["METRIC.Precision", "METRIC.Recall"],
                roc_diff=args.roc_diff,
                max_data_points=args.roc_datapoints,
                minmax={
                    "METRIC.Precision": {
                        "min": args.min_precision
                    },
                    "METRIC.Recall": {
                        "min": args.min_recall
                    }
                })

            metrics = metrics + row_metrics
    finally:
        # clean results unless we want to keep the scratch space
        if not args.keep_results:
            for r in resultlist:
                try:
                    os.unlink(r)
                except:
                    pass

    logging.info("Making report %s from %i results" %
                 (args.output, len(metrics)))

    loader = jinja2.FileSystemLoader(searchpath=TEMPLATEDIR)
    env = jinja2.Environment(loader=loader)

    template_vars = {"content": report.make_report(metrics)}

    template = env.get_template("report.jinja2.html")
    template.stream(**template_vars).dump(args.output)
Example #9
0
def main():
    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(message)s")

    parser = argparse.ArgumentParser(
        description="Benchmark a set of VCFs against " "Platinum Genomes and Genome in a Bottle + produce a report."
    )

    parser.add_argument("input", help="input VCF files", nargs="+")

    parser.add_argument("-o", "--output", help="output HTML file name", dest="output")

    parser.add_argument(
        "-O",
        "--keep-results",
        help="Specify a folder in which to keep the hap.py results.",
        dest="keep_results",
        default=None,
    )

    parser.add_argument(
        "-e", "--engine", help="Comparison engine to use (xcmp / vcfeval)", default="xcmp", choices=["xcmp", "vcfeval"]
    )

    parser.add_argument(
        "--truthset", dest="truthset", help="Add truthsets in this format: 'name:vcf:bed'", default=[], action="append"
    )

    parser.add_argument(
        "--stratification",
        dest="stratification",
        help="Use a list of stratification regions (this should point to the TSV file). "
        "Prefix with 'fixchr:' to add chr prefix to chromosome names.",
        default=None,
    )

    parser.add_argument("--roc", help="ROC feature to use", dest="roc", default="QUAL")
    parser.add_argument(
        "--ignore-filters",
        help="Filters to ignore (e.g. --ignore-filters MQ,lowQual).",
        dest="roc_filters",
        default=None,
    )

    parser.add_argument("--happy-extra", help="hap.py extra arguments", dest="happy_extra", default=None)
    parser.add_argument("-r", "--reference", help="Reference FASTA file to use", dest="reference", default="hg19")

    parser.add_argument(
        "--roc-max-datapoints",
        help="Maximum number of data points in a ROC (higher numbers might slow down our plotting)",
        dest="roc_datapoints",
        type=int,
        default=1000,
    )

    parser.add_argument(
        "--roc-resolution",
        help="Minimum difference in precision / recall covered by the ROC curves.",
        dest="roc_diff",
        type=float,
        default=0.005,
    )
    parser.add_argument(
        "--min-recall",
        help="Minimum recall for ROC curves (use to reduce size of output file by "
        "clipping the bits of the ROC that are not meaningful)",
        dest="min_recall",
        type=float,
        default=0.2,
    )
    parser.add_argument(
        "--min-precision",
        help="Minimum precision for ROC curves (use to reduce size of output file by"
        " clipping the bits of the ROC that are not meaningful)",
        dest="min_precision",
        type=float,
        default=0.0,
    )

    args = parser.parse_args()

    if args.output.endswith(".gz"):
        args.output = gzip.GzipFile(args.output, "w")
    elif not args.output.endswith(".html"):
        args.output += ".html"

    if args.keep_results and not os.path.isdir(args.keep_results):
        raise Exception("Output folder %s does not exist." % args.keep_results)

    truthsets = []
    for x in args.truthset:
        xs = x.split(":", 2)
        if len(xs) != 3:
            raise Exception("Invalid custom truthset: %s" % x)
        truthsets.append(Truthset(xs[0], xs[1], xs[2]))

    resultlist = []
    metrics = []

    logging.debug(str(DEPS))
    logging.debug(str(args))

    try:
        results_for_report = []

        for query in args.input:
            methodname = os.path.basename(query)
            if methodname.endswith(".gz"):
                methodname = methodname[:-3]
            if methodname.endswith(".vcf"):
                methodname = methodname[:-4]
            if methodname.endswith(".bcf"):
                methodname = methodname[:-4]

            for ts in truthsets:
                logging.info("Comparing %s against %s" % (query, ts.name))
                # run with PG
                extra_args = args.happy_extra
                if extra_args is None:
                    extra_args = ""

                if args.stratification:
                    if args.stratification.startswith("fixchr:"):
                        extra_args += " --stratification-fixchr"
                        args.stratification = args.stratification[len("fixchr:") :]
                    extra_args += " --stratification %s" % args.stratification

                happyfiles = happy(
                    query,
                    truth=ts.vcf,
                    truth_conf=ts.bed,
                    roc_feature=args.roc,
                    roc_filters=args.roc_filters,
                    engine=args.engine,
                    ref=args.reference,
                    extra=extra_args,
                    output_folder=args.keep_results,
                    output_prefix=args.engine + "_" + methodname,
                )
                resultlist += happyfiles

                results_for_report.append(
                    {
                        "method": methodname,
                        "cmethod": ts.name + "-" + args.engine,
                        "files": [r for r in happyfiles if r.endswith(".roc.all.csv.gz")],
                    }
                )

        if args.keep_results:
            with open(
                os.path.join(args.keep_results, os.path.splitext(os.path.basename(args.output))[0] + ".tsv"), "w"
            ) as f:
                print >> f, "\t".join(["method", "comparisonmethod", "files"])
                for row in results_for_report:
                    print >> f, "\t".join([row["method"], row["cmethod"], ",".join(row["files"])])

        for row in results_for_report:
            row_metrics = report.metrics.read_qfy_csv(
                row["files"],
                method=row["method"],
                cmethod=row["cmethod"],
                roc_metrics=["METRIC.Precision", "METRIC.Recall"],
                roc_diff=args.roc_diff,
                max_data_points=args.roc_datapoints,
                minmax={"METRIC.Precision": {"min": args.min_precision}, "METRIC.Recall": {"min": args.min_recall}},
            )

            metrics = metrics + row_metrics
    finally:
        # clean results unless we want to keep the scratch space
        if not args.keep_results:
            for r in resultlist:
                try:
                    os.unlink(r)
                except:
                    pass

    logging.info("Making report %s from %i results" % (args.output, len(metrics)))

    loader = jinja2.FileSystemLoader(searchpath=TEMPLATEDIR)
    env = jinja2.Environment(loader=loader)

    template_vars = {"content": report.make_report(metrics)}

    template = env.get_template("report.jinja2.html")
    template.stream(**template_vars).dump(args.output)
Example #10
0
report.GROUPS = [
    # '613019056d70a378bf512cbc2cf0bb01128033da092b79fb576a5eb2bc4e7a11',  # 运营日报测试群
    '7e95f199da13a7483da519274be704c8d0e8557bc55323f67441e9e116fe66c6',  # TiDev
    'a8d8a969caebddef437c9582f93da87713b9bd4b0fea6da490ed1f0b7e50ae35',  # DDC 大群
]

report.STORAGE_URL = 'http://tiadmin.jinns.top/query/temp<rand>.png'

report.REPORT_URL = 'http://tiadmin.jinns.top/query/ddc_daily_report/'

target = '12:00'
while True:
    now = datetime.datetime.now()
    code = now.strftime('%H:%M')
    print(now, code, target)

    if code == target:
        print('-------------------------------')

        try:
            report.make_report()
            report.push_report()
        except Exception as e:
            print('========================')
            traceback.print_exc()
            print('========================')

        time.sleep(60)

    time.sleep(20)
Example #11
0
        random_agent,
        report_folder,
        word_list,
        dir_list,
        fnm_list,
        ext_list,
        url_set,
    ) = get_settings(args)

    print("[2] : Done")
    print("[3] : Start Scanning according to settings")

    initial_result, final_result = scan.scan_entry(url, port, max_depth,
                                                   timeout, delay,
                                                   random_agent, word_list,
                                                   url_set)

    print("[3] : Done")
    print("[4] : Making Reports")

    # make reports
    report.make_report(initial_result, report_folder, "initial")
    report.make_report(final_result, report_folder, "final")
    print("[4] : All DONE! Check ./reports/ ")
    """

    print("[5] : Merge Testing")
    dirchecker.dirchecker_entry(args, dir_list, fnm_list, ext_list)
    print("[5] : Merge Done")

    """