コード例 #1
0
def get_benchmark_result_markdown(benchmark_files: Sequence[str],
                                  query_base: bool,
                                  verbose: bool = False) -> Tuple[str, str]:
  """Gets the full/abbreviated markdown summary of all benchmarks in files."""
  all_benchmarks = aggregate_all_benchmarks(benchmark_files)

  build_url = get_required_env_var("BUILDKITE_BUILD_URL")
  pr_number = get_required_env_var("BUILDKITE_PULL_REQUEST")
  pr_commit = get_required_env_var("BUILDKITE_COMMIT")
  pr_commit = md.link(pr_commit,
                      f"{GITHUB_IREE_REPO_PREFIX}/commit/{pr_commit}")

  commit_info = f"@ commit {pr_commit}"
  if query_base:
    # Try to query some base benchmark to diff against, from the top of the
    # tree. Bail out if the maximal trial number is exceeded.
    for i in range(MAX_BASE_COMMIT_QUERY_COUNT):
      base_commit = get_origin_tree_commit(i, verbose)
      base_benchmarks = query_base_benchmark_results(base_commit, verbose)
      base_commit = md.link(base_commit,
                            f"{GITHUB_IREE_REPO_PREFIX}/commit/{base_commit}")

      if len(base_benchmarks) == 0:
        commit_info = (f"@ commit {pr_commit} (no previous benchmark results to"
                       f" compare against since {base_commit})")
        continue

      # Update the aggregate benchmarks with base numbers.
      for bench in base_benchmarks:
        if bench in all_benchmarks:
          all_benchmarks[bench].base_mean_time = base_benchmarks[bench]
      commit_info = f"@ commit {pr_commit} (vs. base {base_commit})"
      break

  pr_info = md.link("Pull request",
                    f"{GITHUB_IREE_REPO_PREFIX}/pull/{pr_number}")
  buildkite_info = md.link("Buildkite build", build_url)

  # Compose the full benchmark tables.
  full_table = [md.header("Full Benchmark Summary", 2)]
  full_table.append(md.unordered_list([commit_info, pr_info, buildkite_info]))
  full_table.append(
      categorize_benchmarks_into_tables(all_benchmarks,
                                        SIMILAR_BECNHMARK_THRESHOLD))

  # Compose the abbreviated benchmark tables.
  abbr_table = [md.header(ABBR_PR_COMMENT_TITLE, 2)]
  abbr_table.append(commit_info)
  abbr_table.append(
      categorize_benchmarks_into_tables(all_benchmarks,
                                        SIMILAR_BECNHMARK_THRESHOLD,
                                        TABLE_SIZE_CUT))
  abbr_table.append("For more information:")
  # We don't know until a Gist is really created. Use a placeholder for now
  # and replace later.
  full_result_info = md.link("Full benchmark result tables",
                             "<<placeholder-link>>")
  abbr_table.append(md.unordered_list([full_result_info, buildkite_info]))

  return "\n\n".join(full_table), "\n\n".join(abbr_table)
コード例 #2
0
def categorize_benchmarks_into_tables(benchmarks: Dict[
    str, AggregateBenchmarkLatency],
                                      similar_threshold: float,
                                      size_cut: Optional[int] = None) -> str:
  """Splits benchmarks into regressed/improved/similar/raw categories and
  returns their markdown tables.

    Args:
    - similar_threshold: the threshold under which a benchmark will be
        considered as similar to its base commit.
    - size_cut: If not None, only show the top N results for each table.
    """
  regressed, improved, similar, raw = {}, {}, {}, {}

  for name, results in benchmarks.items():
    # If no informatio about the base result. Then we cannot analyze.
    if results.base_mean_time is None:
      raw[name] = results
      continue

    current = results.mean_time
    base = results.base_mean_time
    ratio = abs(current - base) / base
    if ratio <= similar_threshold:
      similar[name] = results
    elif current > base:
      regressed[name] = results
    else:
      improved[name] = results

  tables = []
  if regressed:
    tables.append(md.header("Regressed Benchmarks 🚩", 3))
    tables.append(sort_benchmarks_and_get_table(regressed, size_cut))
  if improved:
    tables.append(md.header("Improved Benchmarks 🎉", 3))
    tables.append(sort_benchmarks_and_get_table(improved, size_cut))
  # If we want to abbreviate, similar results won't be interesting.
  if similar and size_cut is None:
    tables.append(md.header("Similar Benchmarks", 3))
    tables.append(sort_benchmarks_and_get_table(similar, size_cut))
  if raw:
    tables.append(md.header("Raw Benchmarks", 3))
    raw_list = [
        (k, v.mean_time, v.median_time, v.stddev_time) for k, v in raw.items()
    ]
    names, means, medians, stddevs = zip(*raw_list)
    tables.append(
        add_header_and_get_markdown_table(names=names,
                                          means=means,
                                          medians=medians,
                                          stddevs=stddevs,
                                          size_cut=size_cut))
  return "\n\n".join(tables)
コード例 #3
0
def generate_menu(newspaperlist):

    #Create .md file
    filename = ".\\files\\md\\" + MENU_FILE_NAME
    file = open(filename + ".md", "w+", encoding="utf8")

    #Create title
    nlwrite(file, ms.header("Noticias Resumidas - Menú", 1))
    nlwrite(file, ms.horizontal_rule(3, "_"))

    #List all newspapers with corresponding files
    for newspaper in newspaperlist:
        news_path = ".\\files\\" + TODAY.strftime(newspaper.name + DATE_FMT +
                                                  ".html")
        nlwrite(file, header((link(ms.bold(newspaper.name), news_path)), 2))

    #Finish editing the .md file
    file.close()

    #Open both the .md and .html file to finish processing into HTML
    file = open(filename + ".md", "r", encoding="utf8")
    html = open(MENU_FILE_NAME + ".html", "w+", encoding="utf8")
    html.write(md.markdown(file.read(), output_format="html5",
                           encoding="utf8"))

    #Finish HTML processing
    html.close()
コード例 #4
0
def gen_project_readme(list_source, list_title):
    """Generate README.md for aBL from category README.md files."""
    file_badges = is_path(Path.joinpath(DirPath.base, "BADGES.md"))
    file_about = is_path(Path.joinpath(DirPath.base, "ABOUT.md"))
    file_notes = is_path(Path.joinpath(DirPath.base, "NOTE.md"))
    main_title = markdown_strings.header(ListInfo.title, 1)
    badges = read_file(file_badges, data_type="str")
    about = read_file(file_about, data_type="str")
    notes = read_file(file_notes, data_type="str")
    # list_format = ["Domains", "ABP Filter"]
    # info_add = markdown_strings.blockquote(
    #     "Generated Lists: "
    #     + ", ".join(list_title)
    #     # + "\n\n"
    #     # + "Formats: "
    #     # + ", ".join(list_format)
    # )
    info_add = markdown_strings.blockquote(
        "a filter list optimized for DNS level blocking of ads, "
        "analytics, crypto-jacking and other such threats/nuisances."
    )
    section = [
        main_title,
        info_add,
        badges if badges else None,
        about if about else None,
        "\n".join(blocklist_section_table(list_source)),
        notes if notes else None,
    ]
    data_md = "\n\n".join(filter(None, section)) + "\n\n"
    file_readme = is_path(Path.joinpath(DirPath.base, "README.md"))
    with open(file_readme, "w", encoding="utf-8") as file_output:
        file_output.writelines(data_md)
    concat_category(file_readme)
コード例 #5
0
def category_section_main(blg, stats):
    """Generates the main section of the category README.md file."""
    value_percentage = float(
        (
            (int(stats["unprocessed"]) - int(stats["processed"]))
            / int(stats["unprocessed"])
        )
        * 100
    )
    link_filter = markdown_strings.link(
        "Download",
        f"{blg.info.home}/filters/{blg.category}.txt",
    )
    main_title = (
        markdown_strings.header(f"{blg.data_json[blg.j_key.title]}", 1)
        + "\n"
        + "**"
        + link_filter
        + "**"
    )

    main_desc = markdown_strings.bold(f"{fill(blg.data_json[blg.j_key.desc])}")
    info_list = [
        f"Sources: {len(blg.data_json[blg.j_key.sources])}",
        f"""Rules before processing: {stats["unprocessed"]}""",
        f"""Rules after processing: {stats["processed"]}""",
    ]
    info_add = markdown_strings.unordered_list(info_list)
    string_bold = (
        f"aBL - {blg.data_json[blg.j_key.title]} is {value_percentage:.2f}% lighter"
    )
    sub_desc = f"The {markdown_strings.bold(string_bold)} than its combined sources"
    return [main_title, main_desc, info_add, sub_desc]
コード例 #6
0
def map_header(ctx):
    """Return a line or two with basic map information."""
    if not ctx.beatmap:
        return None
    b = ctx.beatmap

    map_url = "%s/b/%d" % (consts.osu_url, b.beatmap_id)
    if ctx.mode is not None:
        map_url += "?m=%d" % ctx.mode
    map_link = md.link(escape(map_str(b)), map_url)
    mapper_id = scrape.mapper_id(ctx)
    mapper = b.creator if mapper_id is None else mapper_id
    mapper_url = "%s/u/%s" % (consts.osu_url, mapper)

    rename = mapper_renamed(ctx, mapper_id=mapper_id)
    hover = "Renamed to '%s'" % rename if rename is not None else ""

    counts = mapper_counts(ctx, mapper=mapper)
    if counts:
        hover += ": %s" % counts if hover else counts

    if hover:
        mapper_url += ' "%s"' % hover

    mapper_link = md.link(escape(b.creator), mapper_url)
    map_s = "%s by %s" % (map_link, mapper_link)

    if ctx.guest_mapper:
        guest_url = "%s/u/%d" % (consts.osu_url, ctx.guest_mapper.user_id)
        counts = mapper_counts(ctx, mapper=ctx.guest_mapper.user_id)
        if counts:
            guest_url += ' "%s"' % counts
        guest_link = md.link(ctx.guest_mapper.username, guest_url)
        map_s += " (GD by %s)" % guest_link

    tokens = [map_s]

    unranked = consts.int2status[b.approved.value] == "Unranked"

    if not unranked and ctx.mode is not None:
        tokens.append(consts.mode2str[ctx.mode])

    header = md.header(" || ".join(tokens), 4)
    subheader = (unranked_subheader if unranked else approved_subheader)(ctx)

    return "%s\n%s" % (header, subheader)
コード例 #7
0
def generate_markdown(newspaper):

    #Creating markdown file with today's date
    filename = ".\\files\\md\\" + TODAY.strftime(newspaper.name + " %Y-%m-%d")
    file = open(filename + ".md", "w+", encoding="utf8")

    #Write down the header of the document
    nlwrite(file, ms.header(f"Resumen de {newspaper.name}", 1))
    nlwrite(
        file,
        header(TODAY.strftime("Resumen generado el día %Y-%m-%d a las %H:%M"),
               4))

    #Add a back button to return to the HTML menu
    nlwrite(file,
            link(ms.bold("BACK"), "..\\" + MENU_FILE_NAME + ".html") + "  ")
    nlwrite(file, ms.horizontal_rule(3, "_"))

    for cat in newspaper.get_categories():

        nlwrite(file, header(ms.italics(cat), 3))

        for news in newspaper.get_articles(cat):
            nlwrite(file,
                    unorlist(link(ms.bold(news.headline), news.link) + "  "))

            if news.has_pub_day() and news.has_author():
                parsed_date = dp.parse(news.pub_day)
                final_date = parsed_date.strftime("%A %d.%m.%Y").title()
                nlwrite(file, f"{final_date} | Escrito por {news.author}")
            elif news.has_pub_day():
                parsed_date = dp.parse(news.pub_day)
                final_date = parsed_date.strftime("%A %d.%m.%Y").title()
                nlwrite(file, final_date)
            elif news.has_author():
                nlwrite(file, f"Escrito por {news.author}")

            nlwrite(file)

    #Save changes to markdown document.
    file.close()
コード例 #8
0
def categorize_benchmarks_into_tables(benchmarks: Dict[
    str, AggregateBenchmarkLatency],
                                      size_cut: Optional[int] = None) -> str:
    """Splits benchmarks into regressed/improved/similar/raw categories and
  returns their markdown tables.

    Args:
    - benchmarks: A dictionary of benchmark names to its aggregate info.
    - size_cut: If not None, only show the top N results for each table.
    """
    regressed, improved, similar, raw = {}, {}, {}, {}

    for name, results in benchmarks.items():
        # If no informatio about the base result. Then we cannot analyze.
        if results.base_mean_time is None:
            raw[name] = results
            continue

        similar_threshold = None
        for threshold in BENCHMARK_THRESHOLDS:
            if threshold.regex.match(name):
                similar_threshold = threshold
                break
        if similar_threshold is None:
            raise ValueError(
                f"no matched threshold setting for benchmark: {name}")

        current = results.mean_time
        base = results.base_mean_time
        if similar_threshold.unit == ThresholdUnit.PERCENTAGE:
            ratio = abs(current - base) / base * 100
        else:
            ratio = abs(current - base)

        if ratio <= similar_threshold.threshold:
            similar[name] = results
        elif current > base:
            regressed[name] = results
        else:
            improved[name] = results

    tables = []
    if regressed:
        tables.append(md.header("Regressed Benchmarks 🚩", 3))
        tables.append(_sort_benchmarks_and_get_table(regressed, size_cut))
    if improved:
        tables.append(md.header("Improved Benchmarks 🎉", 3))
        tables.append(_sort_benchmarks_and_get_table(improved, size_cut))
    # If we want to abbreviate, similar results won't be interesting.
    if similar and size_cut is None:
        tables.append(md.header("Similar Benchmarks", 3))
        tables.append(_sort_benchmarks_and_get_table(similar, size_cut))
    if raw:
        tables.append(md.header("Raw Benchmarks", 3))
        raw_list = [(k, v.mean_time, v.median_time, v.stddev_time)
                    for k, v in raw.items()]
        names, means, medians, stddevs = zip(*raw_list)
        tables.append(
            _add_header_and_get_markdown_table(names=names,
                                               means=means,
                                               medians=medians,
                                               stddevs=stddevs,
                                               size_cut=size_cut))
    return "\n\n".join(tables)
コード例 #9
0
def test_header():
    assert ms.header('header', 1) == '# header'
コード例 #10
0
    def gen_diagnostics(self):
        insight_timespan_threshold = 10 * 60  # 10 min
        if self.job_timespan < insight_timespan_threshold:
            msg = "Insight will be available when more metric samples are " \
                  "collected.\n"
            self.diagnostics += msg
            return

        # Check idleness
        self.diagnostics += md.header("GPU Idleness", 2) + "\n"
        if len(self.idle_gpus) == self.num_gpus:
            msg = md.bold("All of %s GPU(s) in the job are idle. " %
                          len(self.idle_gpus))
            msg += "Please consider killing the job if you no longer need it.\n"
            self.diagnostics += msg
            return
        elif len(self.idle_gpus) > 0:
            msg = md.bold("There are %s idle GPU(s) in the job.\n" %
                          len(self.idle_gpus))
            c1 = "If you are running a job on all GPUs, please check if the process(es) on the idle GPU(s) have died/hung"
            c2 = "If you do not need all GPUs in the job, please consider killing the job and request a new job with fewer GPUs."
            msg += md.unordered_list([c1, c2]) + "\n"
            self.diagnostics += msg
        else:
            self.diagnostics += md.bold("All GPU(s) are active.") + "\n"
        self.diagnostics += "\n"

        # Check Resource usage for active GPUs
        self.diagnostics += md.header("Active GPU Utilization", 2) + "\n"
        good_gpu_util_threshold = 90
        good_gpu_mem_util_threshold = 50
        if self.active_gpu_util >= good_gpu_util_threshold:
            msg = "Average active GPU utilization over time is good at " \
                  "%.2f%%.\n" % self.active_gpu_util
            self.diagnostics += msg
        else:
            msg = "Average active GPU utilization over time is " \
                  "%.2f%% < %s%%. You can try below suggestions to boost " \
                  "GPU utilization:\n" % \
                  (self.active_gpu_util, good_gpu_util_threshold)

            suggestions = []
            if self.active_gpu_memory_util < good_gpu_mem_util_threshold:
                suggestions.append(
                    "Average active GPU memory utilization over time is below "
                    "%s%%. Try increasing batch size to put more data "
                    "onto GPU memory to boost GPU utilization. For a "
                    "distributed job, if the model has strict "
                    "requirement on the global effective batch size "
                    "for convergence, you can consider using a job "
                    "with fewer GPUs and bigger batch size per GPU." %
                    good_gpu_mem_util_threshold)

            if self.max_cpu_per_gpu is not None and \
                    self.cpu_per_active_gpu < self.max_cpu_per_gpu:
                suggestions.append(
                    "The job uses %.2f CPU cores per active GPU on average"
                    "over time. The maximum CPU cores per GPU you can "
                    "use without interfering with other GPUs in this "
                    "cluster is %.2f. You can use more CPU cores to "
                    "perform data preprocessing to keep GPUs from "
                    "starvation. Please consider using/increasing "
                    "parallel preprocessing on your input data." %
                    (self.cpu_per_active_gpu, self.max_cpu_per_gpu))

            if self.max_memory_per_gpu is not None and \
                    self.memory_per_active_gpu < self.max_memory_per_gpu:
                suggestions.append(
                    "The job uses %.2fG memory per active GPU on average"
                    "over time. The maximum memory per GPU you can "
                    "use without interfering with other GPUs in this "
                    "cluster is %.2fG. You can preload more input "
                    "data into memory to make sure your data pipeline "
                    "is never waiting on data loading from "
                    "disk/remote." % (self.memory_per_active_gpu / G,
                                      self.max_memory_per_gpu / G))

            suggestions.append(
                "Please check if your program is waiting on NFS I/O. "
                "If so, please consider using scalable storage, e.g. "
                "Azure blob.")

            suggestions.append(
                "Suggestions above are purely based on average usage over a "
                "time window. Please take a closer look at METRICS tab to "
                "better understand the utilization pattern of GPU, GPU "
                "memory, CPU and memory over time for further optimization.")
            msg += md.unordered_list(suggestions) + "\n"
            self.diagnostics += msg + "\n"
コード例 #11
0
    def test_gen_insights(self):
        since = 1588630427
        end = 1588634027
        node_spec = test_node_spec()
        task_gpu_percent = test_task_gpu_percent()
        task_gpu_mem_percent = test_task_gpu_mem_percent()
        task_cpu_percent = test_task_cpu_percent()
        task_mem_usage_byte = test_task_mem_usage_byte()
        running_job_ids = test_running_job_ids()

        insights = gen_insights(task_gpu_percent, task_gpu_mem_percent,
                                task_cpu_percent, task_mem_usage_byte, since,
                                end, node_spec, running_job_ids)
        self.assertEqual(len(insights), 1)

        insight = insights[0]

        expected_diagnostics = md.header("GPU Idleness", 2) + "\n"
        expected_diagnostics += md.bold("All GPU(s) are active.") + "\n\n"

        expected_diagnostics += md.header("Active GPU Utilization", 2) + "\n"
        expected_diagnostics += "Average active GPU utilization over time is 30.00% < 90%. You can try below suggestions to boost GPU utilization:\n"
        suggestions = []
        suggestions.append(
            "Average active GPU memory utilization over time is below "
            "50%. Try increasing batch size to put more data "
            "onto GPU memory to boost GPU utilization. For a "
            "distributed job, if the model has strict "
            "requirement on the global effective batch size "
            "for convergence, you can consider using a job "
            "with fewer GPUs and bigger batch size per GPU.")
        suggestions.append(
            "The job uses 1.00 CPU cores per active GPU on average"
            "over time. The maximum CPU cores per GPU you can "
            "use without interfering with other GPUs in this "
            "cluster is 4.00. You can use more CPU cores to "
            "perform data preprocessing to keep GPUs from "
            "starvation. Please consider using/increasing "
            "parallel preprocessing on your input data.")
        suggestions.append(
            "The job uses 10.00G memory per active GPU on average"
            "over time. The maximum memory per GPU you can "
            "use without interfering with other GPUs in this "
            "cluster is 100.00G. You can preload more input "
            "data into memory to make sure your data pipeline "
            "is never waiting on data loading from "
            "disk/remote.")
        suggestions.append(
            "Please check if your program is waiting on NFS I/O. "
            "If so, please consider using scalable storage, e.g. "
            "Azure blob.")
        suggestions.append(
            "Suggestions above are purely based on average usage over a "
            "time window. Please take a closer look at METRICS tab to "
            "better understand the utilization pattern of GPU, GPU "
            "memory, CPU and memory over time for further optimization.")
        expected_diagnostics += md.unordered_list(suggestions) + "\n"
        expected_diagnostics += "\n"

        expected_insight = {
            "job_id": "job0",
            "since": since,
            "end": end,
            "diagnostics": expected_diagnostics,
        }
        self.assertEqual(expected_insight, insight)
コード例 #12
0
 def heading(self, level, text):
     return markdown_strings.header(text, level) + "\n"