def main(experiments_dir: str, output_json_path: str, keys_of_interest,
         random_subset_of_size: int) -> None:
    runs_infos_paths = tuple(path for path in traverse_files(experiments_dir)
                             if path.endswith("run_info.txt"))
    if random_subset_of_size != -1:
        runs_infos_paths = tuple(
            shuffled(runs_infos_paths)[:random_subset_of_size])
    experiments_dirs_relpaths = tuple(
        os.path.relpath(os.path.dirname(path), experiments_dir)
        for path in runs_infos_paths
    )  # contains relative paths to each dir containing an experiment in `experiments_dir`
    runs_infos: Dict[str, Any] = tuple(
        load_json(path) for path in runs_infos_paths)
    union_of_keys = reduce(lambda x, y: x | y, (frozenset(run_info.keys())
                                                for run_info in runs_infos))
    assert union_of_keys.issuperset(keys_of_interest)
    shared_items = {}
    for key in union_of_keys:
        if key not in runs_infos[0]:
            continue
        value = runs_infos[0][key]
        if all(key in run_info and run_info[key] == value
               for run_info in runs_infos):
            shared_items[key] = value
    non_shared_items = tuple(
        {k: v
         for k, v in run_info.items() if k not in shared_items}
        for run_info in runs_infos)
    names = (tuple(
        str(i)
        for i in range(len(runs_infos))) if not keys_of_interest else tuple(
            str({k: v
                 for k, v in d.items() if k in keys_of_interest})
            for d in non_shared_items))
    descriptions = tuple(str(d) for d in non_shared_items)
    json_struct = {
        "common_description":
        pformat(shared_items, indent=0),
        "experiments": [{
            "rel_dir": rel_dir,
            "name": name,
            "description": description
        } for (
            rel_dir, name,
            description) in zip(experiments_dirs_relpaths, names, descriptions)
                        ],
    }
    save_json(json_struct, output_json_path)
Esempio n. 2
0
        num_matrices,
    }
    print(result)
    return result


def cartesian_product_dicts(
        d: Dict[Tuple[Any, ...], Any]) -> Tuple[Dict[Any, Any], ...]:
    return tuple(dict(zip(d, x)) for x in itertools.product(*d.values()))


inputs = cartesian_product_dicts({
    "dim_size": (300, ),
    "num_matrices": (6, ),
    "dtype": (torch.float32, torch.float64),
    "device":
    (torch.device("cuda:0"), torch.device("cuda:1"), torch.device("cpu")),
    "func": (torch.matmul, logmatmulexp, logmatmulexp_lowmem),
    "num_iterations": (50, ),
})

json_path = os.path.expanduser(
    "~/projects/dctn/small_experiments/benchmark_logmatmulexp_results.json")

new_results: Tuple[Dict[str, Any],
                   ...] = tuple(benchmark(**input) for input in inputs)
old_results: Tuple[Dict[str, Any], ...] = tuple(
    load_json(json_path)) if os.path.exists(json_path) else ()
combined_results = old_results + new_results
save_json(combined_results, json_path)
def get_lr(subdir: str) -> float:
    json = load_json(os.path.join(subdir, "run_info.txt"))
    return json["lr"]
Esempio n. 4
0
def main(config_path: str, output_path: str, experiments_base_dir,
         big_plots: bool):
    log_rel_fname = "log.log"
    run_info_rel_fname = "run_info.txt"
    run_info_useless_keys = frozenset({
        "breakpoint_on_nan_loss",
        "commit",
        "device",
        "ds_path",
        "es_train_acc",
        "es_train_mean_ce",
        "es_val_acc",
        "es_val_mean_ce",
        "experiments_dir",
        "keep_last_models",
        "max_num_iters",
        "patience",
        "tb_batches",
        "verbosity",
    })
    config: List[Dict[str, str]] = load_json(config_path)
    experiments_rel_dirs: Tuple[str,
                                ...] = tuple(d["rel_dir"]
                                             for d in config["experiments"])
    experiments_names: Tuple[str, ...] = tuple(d["name"]
                                               for d in config["experiments"])
    experiments_descriptions: Tuple[str, ...] = tuple(
        d["description"] for d in config["experiments"])

    runs_infos: Dict[str, Any] = tuple({
        k: v
        for k, v in load_json(
            os.path.join(experiments_base_dir, experiment_rel_dir,
                         run_info_rel_fname)).items()
        if k not in run_info_useless_keys
    } for experiment_rel_dir in experiments_rel_dirs)

    assert len(experiments_names) == len(experiments_rel_dirs)
    colors = get_distinguishable_colors(len(experiments_names))

    all_increasing_tracc_records: Tuple[Tuple[Record, ...], ...] = tuple(
        load_records(
            os.path.join(experiments_base_dir, experiment_dir, log_rel_fname),
            increasing_tracc=True,
        ) for experiment_dir in experiments_rel_dirs)

    all_records: Tuple[Tuple[Record, ...], ...] = tuple(
        load_records(
            os.path.join(experiments_base_dir, experiment_dir, log_rel_fname),
            increasing_tracc=False,
        ) for experiment_dir in experiments_rel_dirs)

    # output_file("one_eps_vacc_by_tracc.html")
    output_file(output_path, mode="inline")

    tools = "pan,wheel_zoom,box_zoom,reset,crosshair,hover,undo,redo,save"

    tracc_range = Range1d(bounds=(0.0, 1.0))
    vacc_range = Range1d(bounds=(0.0, 1.0))
    nitd_range = Range1d(
        0,
        (maximum_nitd := max(records[-1].nitd for records in all_records)),
        bounds=(0, maximum_nitd),
    )
    min_mce = min(
        min(
            min(record.trmce for record in records)
            for records in all_records),
        min(min(record.vmce for record in records) for records in all_records),
    )
    max_mce = max(
        max(
            max(record.trmce for record in records)
            for records in all_records),
        max(max(record.vmce for record in records) for records in all_records),
    )
    trmce_range = Range1d(0.0, max_mce, bounds=(min_mce, max_mce))
    vmce_range = Range1d(0.0, max_mce, bounds=(min_mce, max_mce))

    # plot vacc by tracc
    vacc_by_tracc_plot = figure(
        x_axis_label="train acc",
        y_axis_label="val acc",
        tools=tools,
        x_range=tracc_range,
        y_range=vacc_range,
        **({
            "plot_height": 850,
            "plot_width": 1400
        } if big_plots else {}),
    )
    vacc_by_tracc_plot.line((0.0, 1.0), (0.0, 1.0),
                            line_color="black",
                            alpha=0.3,
                            line_dash="dashed")
    for experiment_name, records, color in zip(experiments_names,
                                               all_increasing_tracc_records,
                                               colors):
        vacc_by_tracc_plot.line(
            tuple(record.tracc for record in records),
            tuple(record.vacc for record in records),
            legend_label=experiment_name,
            line_color=color,
        )
    vacc_by_tracc_plot.legend.location = "top_left"
    vacc_by_tracc_plot.legend.click_policy = "hide"

    def plot_something_by_nitd(
        y_axis_label: str,
        y_range: Range1d,
        record_attr: str,
        legend_location: str,
        plot_height: Optional[int] = None,
    ) -> Figure:
        plot = figure(
            x_axis_label="number of iterations done",
            y_axis_label=y_axis_label,
            tools=tools,
            x_range=nitd_range,
            y_range=y_range,
            **({
                "plot_height": 850,
                "plot_width": 1400
            } if big_plots else {
                "plot_height": plot_height
            }),
        )
        for experiment_name, records, color in zip(experiments_names,
                                                   all_records, colors):
            plot.line(
                tuple(record.nitd for record in records),
                tuple(getattr(record, record_attr) for record in records),
                legend_label=experiment_name,
                line_color=color,
            )
        plot.legend.location = legend_location
        plot.legend.click_policy = "hide"
        return plot

    x_by_nitd_plot_height = 300
    vacc_by_nitd_plot = plot_something_by_nitd("val acc", vacc_range, "vacc",
                                               "bottom_right",
                                               x_by_nitd_plot_height)
    tracc_by_nitd_plot = plot_something_by_nitd("train acc", tracc_range,
                                                "tracc", "bottom_right",
                                                x_by_nitd_plot_height)
    vmce_by_nitd_plot = plot_something_by_nitd(
        "val mean negative log likelihood",
        vmce_range,
        "vmce",
        "top_right",
        x_by_nitd_plot_height,
    )
    trmce_by_nitd_plot = plot_something_by_nitd(
        "train mean negative log likelihood",
        trmce_range,
        "trmce",
        "top_right",
        x_by_nitd_plot_height,
    )

    def create_range_slider(range: Range1d, title: str,
                            step: float) -> RangeSlider:
        slider = RangeSlider(
            start=range.start,
            end=range.end,
            step=step,
            value=(range.bounds[0], range.bounds[1]),
            title=title,
        )
        slider.js_link("value", range, "start", attr_selector=0)
        slider.js_link("value", range, "end", attr_selector=1)
        return slider

    vmce_slider = create_range_slider(vmce_range,
                                      "val mean negative log likelihood", 0.05)
    trmce_slider = create_range_slider(trmce_range,
                                       "train mean negative log likelihood",
                                       0.05)
    vacc_slider = create_range_slider(vacc_range, "val acc", 0.005)
    tracc_slider = create_range_slider(tracc_range, "train acc", 0.005)
    nitd_slider = create_range_slider(nitd_range, "number of iterations done",
                                      10)

    div = Div(
        text=
        f'<p>{config["common_description"]}</p><ul style="list-style-type:circle;"><li>'
        + "</li><li>".join(
            f"<b>{name}</b>: <i>{description}</i> : {run_info}"
            for name, description, run_info in zip(
                experiments_names, experiments_descriptions, runs_infos)) +
        "</li></ul>")

    if big_plots:
        p = gridplot((
            (vacc_by_tracc_plot, ),
            (div, ),
            (vacc_slider, ),
            (tracc_slider, ),
            (vacc_by_nitd_plot, ),
            (tracc_by_nitd_plot, ),
            (vmce_slider, ),
            (trmce_slider, ),
            (nitd_slider, ),
            (vmce_by_nitd_plot, ),
            (trmce_by_nitd_plot, ),
        ))
    else:
        p = gridplot((
            (vacc_by_tracc_plot, div),
            (vacc_slider, tracc_slider),
            (vacc_by_nitd_plot, tracc_by_nitd_plot),
            (vmce_slider, trmce_slider),
            (nitd_slider, ),
            (vmce_by_nitd_plot, trmce_by_nitd_plot),
        ))

    save(p)
Esempio n. 5
0
def get_dropout_p(subdir: str) -> float:
    json = load_json(os.path.join(subdir, "run_info.txt"))
    return json["dropout_p"]