Example #1
0
def test_get_runs(mocker, kubernetes_api_client_node_port):
    mocker.patch("kubernetes.config.load_kube_config")
    rg = mocker.patch("concurrent.futures.ThreadPoolExecutor")
    rg.return_value.submit.return_value.result.return_value.json.return_value = "a"

    client = ApiClient(in_cluster=False)

    result = client.get_runs()

    assert result is not None
    assert result.result().json() == "a"
Example #2
0
def status(name, dashboard_url):
    """Get the status of a benchmark run, or all runs if no name is given"""
    loaded = setup_client_from_config()

    client = ApiClient(in_cluster=False,
                       url=dashboard_url,
                       load_config=not loaded)

    ret = client.get_runs()
    runs = ret.result().json()

    if name is None:  # List all runs
        for run in runs:
            del run["job_id"]
            del run["job_metadata"]

        click.echo(tabulate(runs, headers="keys"))
        return

    try:
        run = next(r for r in runs if r["name"] == name)
    except StopIteration:
        click.echo("Run not found")
        return

    del run["job_id"]
    del run["job_metadata"]

    click.echo(tabulate([run], headers="keys"))

    loss = client.get_run_metrics(run["id"],
                                  metric_filter="val_global_loss @ 0",
                                  last_n=1)
    prec = client.get_run_metrics(run["id"],
                                  metric_filter="val_global_Prec@1 @ 0",
                                  last_n=1)

    loss = loss.result()
    prec = prec.result()

    if loss.status_code < 300 and "val_global_loss @ 0" in loss.json():
        val = loss.json()["val_global_loss @ 0"][0]
        click.echo("Current Global Loss: {0:.2f} ({1})".format(
            float(val["value"]), val["date"]))
    else:
        click.echo("No Validation Loss Data yet")
    if prec.status_code < 300 and "val_global_Prec@1 @ 0" in prec.json():
        val = prec.json()["val_global_Prec@1 @ 0"][0]
        click.echo("Current Global Precision: {0:.2f} ({1})".format(
            float(val["value"]), val["date"]))
    else:
        click.echo("No Validation Precision Data yet")
Example #3
0
def download(name, output, dashboard_url):
    """Download the results of a benchmark run"""
    loaded = setup_client_from_config()

    client = ApiClient(in_cluster=False,
                       url=dashboard_url,
                       load_config=not loaded)

    ret = client.get_runs()
    runs = ret.result().json()

    run = next(r for r in runs if r["name"] == name)

    ret = client.download_run_metrics(run["id"])

    with open(output, "wb") as f:
        f.write(ret.result().content)
Example #4
0
def delete(name, dashboard_url):
    """Delete a benchmark run"""
    loaded = setup_client_from_config()

    client = ApiClient(in_cluster=False,
                       url=dashboard_url,
                       load_config=not loaded)

    ret = client.get_runs()
    runs = ret.result().json()

    try:
        run = next(r for r in runs if r["name"] == name)
    except StopIteration:
        click.echo("Run not found")
        return

    del run["job_id"]
    del run["job_metadata"]

    client.delete_run(run["id"])
Example #5
0
def charts(folder, filter, dashboard_url):
    """Chart the results of benchmark runs

    Save generated charts in FOLDER
    """
    folder = Path(folder)
    if not folder.exists():
        folder.mkdir(parents=True)
    loaded = setup_client_from_config()

    client = ApiClient(in_cluster=False,
                       url=dashboard_url,
                       load_config=not loaded)

    ret = client.get_runs()
    runs = ret.result().json()
    runs = [r for r in runs if r["state"] == "finished"]

    if filter:
        runs = [r for r in runs if filter in r["name"]]

    options = {i: r for i, r in enumerate(runs, start=0)}

    if len(options) < 2:
        click.echo("At least two finished runs are needed to create a summary")
        return

    options["all"] = {"name": "*all runs*"}

    prompt = 'Select the runs to generate a summary for (e.g. "0 1 2"): \n\t{}'.format(
        "\n\t".join("{} [{}]".format(r["name"], i)
                    for i, r in options.items()))

    choice = click.prompt(
        prompt,
        default=0,
        type=click.Choice([options.keys()]),
        show_choices=False,
        value_proc=lambda x: runs
        if "all" in x else [options[int(i)] for i in x.split(" ")],
    )

    if len(choice) < 2:
        click.echo("At least two finished runs are needed to create a summary")
        return

    results = []

    def _get_metric(name, run):
        """Gets a metric from the dashboard."""
        name = "global_cum_{} @ 0".format(name)
        return float(
            client.get_run_metrics(run["id"], metric_filter=name,
                                   last_n=1).result().json()[name][0]["value"])

    for run in choice:
        agg = _get_metric("agg", run)

        backprop = _get_metric("backprop", run)

        batch_load = _get_metric("batch_load", run)

        comp_loss = _get_metric("comp_loss", run)

        comp_metrics = _get_metric("comp_metrics", run)

        fwd_pass = _get_metric("fwd_pass", run)

        opt_step = _get_metric("opt_step", run)

        compute = (fwd_pass + comp_loss + backprop + opt_step +
                   (agg if run["num_workers"] == 1 else 0))
        communicate = agg if run["num_workers"] != 1 else 0

        results.append((
            run["name"],
            compute,
            communicate,
            comp_metrics,
            batch_load,
            str(run["num_workers"]),
        ))

    results = sorted(results, key=lambda x: x[5])
    names, compute, communicate, metrics, batch_load, num_workers = zip(
        *results)

    width = 0.35
    fig, ax = plt.subplots()

    ax.bar(num_workers, compute, width, label="Compute")
    ax.bar(num_workers, communicate, width, label="Communication")

    ax.set_ylabel("Time (s)")
    ax.set_title("Total time by number of workers")
    ax.legend()
    plt.savefig(folder / "total_time.png", dpi=150)

    fig, ax = plt.subplots()

    combined = [c + r for _, c, r, _, _, _ in results]

    speedup = [combined[0] / c for c in combined]

    ax.bar(num_workers, speedup, width)

    ax.set_ylabel("Speedup factor")
    ax.set_title("Speedup")
    plt.savefig(folder / "speedup.png", dpi=150)

    fig, ax = plt.subplots()

    ax.bar(num_workers, compute, width, label="Compute")
    ax.bar(num_workers, communicate, width, label="Communication")
    ax.bar(num_workers, metrics, width, label="Metrics Computation")
    ax.bar(num_workers, batch_load, width, label="Batch Loading")

    ax.set_ylabel("Time (s)")
    ax.set_title("Total time by number of workers")
    ax.legend()
    plt.savefig(folder / "time_for_all_phases.png", dpi=150)

    click.echo("Summary created in {}".format(folder))