Ejemplo n.º 1
0
def parse_results(config_filename, experiments):
    """
    Parse the results for each specified experiment in one cfg file. Creates a
    dataframe containing one row per iteration for every trial for every
    network configuration in every experiment.

    The dataframe is saved to config_filename.pkl The raw results are also saved in a
    .csv file named config_filename.csv.

    :param config_filename: the cfg filename
    :param experiments: a list of experiment names from the cfg file

    :return: a dataframe containing raw results
    """

    # The results table
    columns = [
        "Experiment name", "L1 channels", "L2 channels", "L3 N",
        "L1 Wt sparsity", "L2 Wt sparsity", "L3 Wt sparsity",
        "Activation sparsity", "Non-zero params", "Accuracy", "Iteration",
        "Best accuracy", "L2 dimensionality", "L3 dimensionality", "Seed", "ID"
    ]
    df = pd.DataFrame(columns=columns)

    # Load and parse experiment configurations
    with open(config_filename, "r") as config_file:
        configs = parse_config(config_file,
                               experiments,
                               globals_param=globals())

    for exp in configs:
        config = configs[exp]

        # Make sure path and data_dir are relative to the project location,
        # handling both ~/nta and ../results style paths.
        path = config.get("path", ".")
        config["path"] = str(Path(path).expanduser().resolve())

        data_dir = config.get("data_dir", "data")
        config["data_dir"] = str(Path(data_dir).expanduser().resolve())

        # Load experiment data
        experiment_path = os.path.join(config["path"], exp)
        try:
            states = load_ray_tune_experiments(experiment_path=experiment_path,
                                               load_results=True)

        except RuntimeError:
            print("Could not locate experiment state for " + exp +
                  " ...skipping")
            continue

        df = parse_one_experiment(exp, states, df)

    df.to_csv(os.path.splitext(config_file.name)[0] + ".csv")
    df.to_pickle(os.path.splitext(config_file.name)[0] + ".pkl")
    return df
Ejemplo n.º 2
0
def collect_results(configs, basefilename):
    """
    Parse the results for each specified experiment in each config file. Creates a
    dataframe containing one row for every trial for every network configuration in
    every experiment.

    The dataframe is saved to basefilename.pkl
    The raw results are also saved in a csv file named basefilename.csv.

    :param configs: list of experiment configs
    :param basefilename: base name for output files
    """

    # The results table
    columns = ["Experiment name",
               "Activation sparsity", "FF weight sparsity",
               "Dendrite weight sparsity", "Num segments",
               "Dim context", "Epochs", "Num tasks", "LR", "Momentum", "Seed",
               "Accuracy", "ID"
               ]
    df = pd.DataFrame(columns=columns)

    for exp in configs:
        config = configs[exp]

        # Make sure path and data_dir are relative to the project location,
        # handling both ~/nta and ../results style paths.
        path = config.get("local_dir", ".")
        config["path"] = str(Path(path).expanduser().resolve())

        # Load experiment data
        experiment_path = os.path.join(config["path"], exp)
        try:
            states = load_ray_tune_experiments(
                experiment_path=experiment_path, load_results=True
            )

        except RuntimeError:
            print("Could not locate experiment state for " + exp + " ...skipping")
            continue

        df = parse_one_experiment(exp, states, df)

    df.to_csv(basefilename + ".csv")
    df.to_pickle(basefilename + ".pkl")
Ejemplo n.º 3
0
def main(config, experiments, tablefmt):

    # The table we use in the paper
    test_scores_table = [["Network", "Test Score", "Noise Score", "Params"]]

    # A more detailed table
    test_scores_table_long = [[
        "Network", "Test Score", "Noise Score", "Noise Accuracy",
        "Total Entropy", "Nonzero Parameters", "Num Trials", "Session"
    ]]

    # Load and parse experiment configurations
    configs = parse_config(config, experiments, globals_param=globals())

    # Use the appropriate plus/minus sign for latex
    if tablefmt == "grid":
        pm = "±"
    else:
        pm = "$\\pm$"

    # Select tags ignoring seed value
    def key_func(x):
        s = re.split("[,]", re.sub(",|\\d+_|seed=\\d+", "",
                                   x["experiment_tag"]))
        if len(s[0]) == 0:
            return [" "]
        return s

    for exp in configs:
        config = configs[exp]

        # Make sure path and data_dir are relative to the project location,
        # handling both ~/nta and ../results style paths.
        path = config.get("path", ".")
        config["path"] = str(Path(path).expanduser().resolve())

        data_dir = config.get("data_dir", "data")
        config["data_dir"] = str(Path(data_dir).expanduser().resolve())

        # Load experiment data
        experiment_path = os.path.join(config["path"], exp)
        try:
            states = load_ray_tune_experiments(experiment_path=experiment_path,
                                               load_results=True)

        except RuntimeError:
            # print("Could not locate experiment state for " + exp + " ...skipping")
            continue

        for experiment_state in states:
            # Go through all checkpoints in the experiment
            all_checkpoints = experiment_state["checkpoints"]

            # Group checkpoints by tags
            checkpoint_groups = {
                k[0]: list(v)
                for k, v in groupby(sorted(all_checkpoints, key=key_func),
                                    key=key_func)
            }

            for tag in checkpoint_groups:
                checkpoints = checkpoint_groups[tag]
                num_exps = len(checkpoints)
                test_scores = np.zeros(num_exps)
                noise_scores = np.zeros(num_exps)
                noise_accuracies = np.zeros(num_exps)
                noise_samples = np.zeros(num_exps)
                nonzero_params = np.zeros(num_exps)
                entropies = np.zeros(num_exps)

                try:
                    for i, checkpoint in enumerate(checkpoints):
                        results = checkpoint["results"]
                        if results is None:
                            continue

                        # For each checkpoint select the epoch with the best accuracy as
                        # the best epoch
                        best_result = max(results,
                                          key=lambda x: x["mean_accuracy"])
                        test_scores[i] = best_result["mean_accuracy"]
                        entropies[i] = best_result["entropy"]
                        # print("best result:", best_result)

                        # Load noise score
                        logdir = os.path.join(
                            experiment_path,
                            os.path.basename(checkpoint["logdir"]))
                        filename = os.path.join(logdir, "noise.json")
                        if os.path.exists(filename):
                            with open(filename, "r") as f:
                                noise = json.load(f)

                            noise_scores[i] = sum(
                                x["total_correct"]
                                for x in list(noise.values()))
                            noise_samples[i] = sum(
                                x["total_samples"]
                                for x in list(noise.values()))
                        else:
                            print("No noise file for " + experiment_path +
                                  " ...skipping")
                            continue

                        noise_accuracies[i] = (float(100.0 * noise_scores[i]) /
                                               noise_samples[i])
                        nonzero_params[i] = max(x["non_zero_parameters"]
                                                for x in list(noise.values()))
                except Exception:
                    print("Problem with checkpoint group" + tag + " in " +
                          exp + " ...skipping")
                    continue

                test_score = "{0:.2f} {1:} {2:.2f}".format(
                    test_scores.mean(), pm, test_scores.std())
                entropy = "{0:.2f} {1:} {2:.2f}".format(
                    entropies.mean(), pm, entropies.std())
                noise_score = "{0:,.0f} {1:}  {2:.2f}".format(
                    noise_scores.mean(), pm, noise_scores.std())
                noise_accuracy = "{0:,.2f} {1:}  {2:.2f}".format(
                    noise_accuracies.mean(), pm, noise_accuracies.std())
                nonzero = "{0:,.0f}".format(nonzero_params.mean())
                test_scores_table.append([
                    "{} {}".format(exp, tag), test_score, noise_accuracy,
                    nonzero
                ])
                test_scores_table_long.append([
                    "{} {}".format(exp, tag), test_score, noise_score,
                    noise_accuracy, entropy, nonzero, num_exps,
                    experiment_state["runner_data"]["_session_str"]
                ])

    print()
    print(tabulate(test_scores_table, headers="firstrow", tablefmt=tablefmt))
    print()
    print(
        tabulate(test_scores_table_long, headers="firstrow",
                 tablefmt=tablefmt))