예제 #1
0
 def load(self, model_dir, component):
     '''
     Find and loads the best model from ray.tune analysis results.
     '''
     try:
         path_analysis = os.path.join(model_dir,'FNN',component)
         analysis = tune.Analysis(path_analysis)
         df_temp = analysis.dataframe()
         idx = df_temp['mean_loss'].idxmin()
         logdir = df_temp.loc[idx]['logdir']
         path_config     = os.path.join(logdir,'config')
         path_state_dict = os.path.join(logdir,'state_dict')
     except:
         # no tuning records
         path_config     = os.path.join(model_dir, 'FNN', component,'config')
         path_state_dict = os.path.join(model_dir, 'FNN', component,'state_dict')
         
     
     with open(path_config, 'rb') as f:
         config = pickle.load(f)
         self.model = Model(config).to(self.device)
     
     state_dict = torch.load(path_state_dict,
                             map_location=torch.device('cpu'))
     self.model.load_state_dict(state_dict)
예제 #2
0
def get_net_from_logs(logs):
    """
        Best tf.keras.model 
        (best: best performance on validation set out of all models in the logdir)
        used to choose optimum hyperparameters. 
        
        Arguments: 
            dataset: instance of data.Cleaned or data.Simulated
            logdir(str): path to the logging folder of the model. 
                The logging folder of a trained model contains:
                1) folder "Training", witch contains logdirs 
                of individual hyperparameter searches
                2) pickle "args.pickle", which contains a dict of the network args. 
        """
    with open(os.path.join(logdir, "args.pickle"), "rb") as f:
        args = argparse.Namespace(**pickle.load(
            f))  # loads dict and converts it to namespace
    analysis = tune.Analysis(os.path.join(logdir, "Training"))
    best_config = analysis.get_best_config(metric="valid_rmse", mode="min")
    best_logdir = analysis.get_best_logdir(metric="valid_rmse", mode="min")
    model = create_model(args=args, **best_config)
    checkpoint_folder_name = [
        s for s in os.listdir(best_logdir) if s.startswith("checkpoint")
    ][0]
    model.load_weights(
        os.path.join(best_logdir, checkpoint_folder_name, "model.h5"))
    return Net(model, args)
예제 #3
0
def get_exp_widget():
    exp_dir = os.path.join(os.getcwd(), "exp")
    analysis_list = []
    for d in os.listdir(exp_dir):
        try:
            analysis = tune.Analysis(os.path.join(exp_dir, d))
            analysis_list.append((d, analysis))
        except tune.TuneError:
            pass
    return widgets.Dropdown(options=analysis_list, description="experiment")
예제 #4
0
def compare_top_experiment(src_dist, dest_dist, metric='eval_acc', mode='max'):
    if not os.path.exists(src_dist):
        raise Exception("Directory {} does not exist".format(src_dist))

    dest_dist = ifnot_create(dest_dist)

    for exp_dir in get_dirs(src_dist):
        exp_name = os.path.basename(os.path.normpath(exp_dir))

        analysis = tune.Analysis(exp_dir)

        best_expdir = analysis.get_best_logdir(metric=metric, mode=mode)
        copyanything(best_expdir, os.path.join(dest_dist, exp_name))
예제 #5
0
def get_best_configs(src_dist, metric='eval_acc', mode='max'):
    if not os.path.exists(src_dist):
        raise Exception("Directory {} does not exist".format(src_dist))

    best_configs = {}
    for exp_dir in get_dirs(src_dist):
        exp_name = os.path.basename(os.path.normpath(exp_dir))

        analysis = tune.Analysis(exp_dir)

        best_config = analysis.get_best_config(metric=metric, mode=mode)
        best_configs[exp_name] = best_config

    return best_configs
예제 #6
0
def get_points_to_evaluate(path, args):
    try:
        analysis = tune.Analysis(path)
        top_runs = analysis.dataframe().sort_values(by="kappa",
                                                    ascending=False).iloc[:3]
        top_runs.columns = [
            col.replace("config:", "") for col in top_runs.columns
        ]

        params = top_runs[[
            "num_layers", "dropout", "weight_decay", "learning_rate"
        ]]

        return list(params.T.to_dict().values())
    except Exception:
        print("could not extraction previous runs from " +
              os.path.join(args.local_dir, args.experiment))
        return None
예제 #7
0
def get_ray_trials(experiment_dir, parameters):
    trials = []

    experiment_dir = Path(experiment_dir)
    if experiment_dir.exists():
        analysis = tune.Analysis(experiment_dir)
        for _, result in analysis.dataframe().iterrows():
            with open(Path(result["logdir"]) / "params.json", "r") as fin:
                full_config = json.load(fin)

            config = tuple(
                sorted([(parameter["name"], full_config[parameter["name"]])
                        for parameter in parameters]))

            trials.append((config, result))
    else:
        print(f"Experiment directory {experiment_dir} does not yet exist.")

    return trials
예제 #8
0
def save_tables():
    hsd_exp = "run_lenet_staticstructure_gsc"
    m = import_module(f"runs.{hsd_exp}")
    df = tune.Analysis(
        os.path.expanduser(f"~/ray_results/{hsd_exp}")).dataframe()
    df = df[df["training_iteration"] == m.NUM_TRAINING_ITERATIONS]
    hsd_results = [result for _, result in df.iterrows()]
    hsd_acc = np.mean([result["mean_accuracy"] for result in hsd_results])

    dense_exp = "ax_ln_gsc"
    m = import_module(f"runs.{dense_exp}")
    _, dense_results = get_best_config(
        os.path.expanduser(f"~/ray_results/{dense_exp}"), m.PARAMETERS,
        m.NUM_TRAINING_ITERATIONS)

    exp_name = "ax_ln_bps_gsc"
    m = import_module(f"runs.{exp_name}")

    frontier_trials = get_frontier_trials(
        os.path.expanduser(f"~/ray_results/{exp_name}"), m.PARAMETERS,
        m.NUM_TRAINING_ITERATIONS)
    accs = np.array([
        np.mean([result["mean_accuracy"] for result in results])
        for _, results in frontier_trials
    ])

    diffs = accs - hsd_acc
    diffs[diffs < 0] = np.inf
    bps_config1, bps_results1 = frontier_trials[diffs.argmin()]

    # Ideally this would use the largest accuracy that achieves some minimal
    # sparsity requirement. This just happens to work using my current results.
    # (Yes, it's a hack.)
    bps_config2, bps_results2 = frontier_trials[accs.argmax()]

    save_weight_table([bps_results2, bps_results1],
                      gsc_lenet_backpropstructure, hsd_results,
                      gsc_lesparsenet, dense_results, densenet)

    save_multiplies_table([bps_results2, bps_results1], hsd_results,
                          gsc_lesparsenet, dense_results, densenet)
예제 #9
0
def update_hyperparameter_csv(model, dataset):

    ray_run_directory = os.path.join(config.TUNE_STORE, model, dataset)

    best = tune.Analysis(ray_run_directory).dataframe().sort_values(by="score", ascending=False)
    best.columns = [col.replace("config/", "") for col in best.columns]
    best = best.iloc[0]

    if model == "Conv1D":
        hparam = ["hidden_dims", "learning_rate", "num_layers", "shapelet_width_increment", "dropout", "weight_decay","alpha", "epsilon"]
    else:
        raise NotImplementedError()

    # read csv
    print(f"opening {config.CONV1D_HYPERPARAMETER_CSV}")
    hyperparameter_database = pd.read_csv(config.CONV1D_HYPERPARAMETER_CSV, index_col=0)

    # update row
    hyperparameter_database.loc[dataset] = best[hparam]
    print(f"updating {dataset} with {best[hparam].to_dict()}")

    # store
    print(f"saving {config.CONV1D_HYPERPARAMETER_CSV}")
    hyperparameter_database.to_csv(config.CONV1D_HYPERPARAMETER_CSV, float_format="%.8f")
예제 #10
0
    'xtick.labelsize': 8,
    'ytick.labelsize': 8
})

df_all = {}

for component in config.components:
    df_all[component] = pd.DataFrame()
    # https://ray.readthedocs.io/en/latest/tune-package-ref.html#ray.tune.Analysis
    # https://ray.readthedocs.io/en/latest/tune-usage.html#analyzing-results
    path_analysis = os.path.join(utils.model_dir, model_key, component)
    n = sum(f.endswith('.json') for f in os.listdir(path_analysis))
    if n>1:
        warnings.warn(F'{path_analysis} contains {n} experimental runs.'+
        'Results might not plot correctly, so you might want to remove previous runs.')
    analysis = tune.Analysis(path_analysis)
    df_temp = analysis.dataframe()
    df_all[component] = pd.concat([df_all[component],df_temp], ignore_index=True, sort=False)

### CONTOUR SENSITIVITY

fig, axes = pyplot.subplots(1, len(config.components), sharex=True, sharey=True)
fig.suptitle('')
legends = []
caxes = [None]*len(axes)

w=6.3
h=2.3
fig.set_size_inches(w=w, h=h)
fig.subplots_adjust(bottom=0, top=1, left=0, right=1, wspace=0)
예제 #11
0
from icae.tools.config_loader import config
import icae.interactive_setup as interactive
import icae.results.n01_toy.n02_hpo as hpo
from icae.models.waveform.simple import ConvAE
from icae.tools.analysis import calc_auc, plot_auc, TrainingStability
import icae.tools.loss.EMD as EMD
from icae.tools.torch.gym import Gym
from icae.tools.dataset.MC import MCDataset
from icae.tools.hyperparam import mappings
from icae.tools.dataset.single import SingleWaveformPreprocessing

plt.set_plot_path(__file__)
interactive.set_saved_value_yaml_root_by_filename(__file__)

#%%
ana = tune.Analysis("~/ray_results/final-1/")
# %%
dfs = ana.trial_dataframes
interactive.save_value("number of configurations", len(dfs), ".1e")
#%%
plot_training_overview = False
if plot_training_overview:
    ax = None  # This plots everything on the same plot
    for d in tqdm(dfs.values()):
        d.auc.plot(ax=ax, legend=False)
    plt.show_and_save("training overview")

#%%
aucs = []
times = []
for d in tqdm(dfs.values()):
#%%
import ray
import ray.tune as tune
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

ana = tune.Analysis('~/ray_results/MPT_long_run')

# %%
val_loss = 'validation_loss'
ana.get_best_config(val_loss, 'min')

# %%
df: pd.DataFrame = ana.dataframe().dropna(axis=0, subset=[val_loss])
interesting = [val_loss] + [i for i in df.columns if 'config' in i]
df.sort_values(val_loss).head(20)[interesting]

# %%
# %%
config_att = ["config/conv_layers_per_block", "config/channel_expansion"]
for i in config_att:
    possible_values = np.unique(df[i])
    plt.hist2d(
        df[val_loss],
        df[i],
        bins=[100, len(possible_values)],
    )
    plt.colorbar()
    plt.ylabel(i)
    plt.xlabel('validation loss')
예제 #13
0
        tuneLFADS,
        name=RUN_NAME,
        local_dir=PBT_HOME,
        config=flat_cfg_dict,
        resources_per_trial=RESOURCES_PER_TRIAL,
        num_samples=NUM_WORKERS,
        sync_to_driver='# {source} {target}', # prevents rsync
        scheduler=scheduler,
        progress_reporter=reporter,
        trial_executor=executor,
        verbose=1,
        reuse_actors=True,
    )
except tune.error.TuneError:
    pass

# load the results dataframe for this run
pbt_dir = path.join(PBT_HOME, RUN_NAME)
df = tune.Analysis(pbt_dir).dataframe()
df = df[df.logdir.apply(lambda path: 'best_model' not in path)]
# find the best model
best_model_logdir = df.loc[df[PBT_METRIC].idxmin()].logdir
best_model_src = path.join(best_model_logdir, 'model_dir')
# copy the best model somewhere easy to find
best_model_dest = path.join(pbt_dir, 'best_model')
shutil.copytree(best_model_src, best_model_dest)
# perform posterior sampling
from lfads_tf2.models import LFADS
model = LFADS(model_dir=best_model_dest)
model.sample_and_average()
예제 #14
0
def save_charts(  # NOQA: C901
        chart_prefix, experiments, dense_exp, densenet_constructor, hsd_exp,
        hsd_kw_exp, hsd_constructor, error_xlim, error_ylim, acc_xlim,
        acc_ylim, acc_noise_xlim):
    script_dir = os.path.dirname(os.path.realpath(__file__))
    output_dir = Path(script_dir) / "output"
    os.makedirs(output_dir, exist_ok=True)

    plot1_results = []
    plot2_results = []

    noise_plot1_results = []
    noise_plot2_results = []

    for exp_name in experiments:
        m = import_module(f"runs.{exp_name}")

        frontier_trials = get_frontier_trials(
            os.path.expanduser(f"~/ray_results/{exp_name}"), m.PARAMETERS,
            m.NUM_TRAINING_ITERATIONS)

        nz = np.array([
            np.mean([result["inference_nz"] for result in results])
            for config, results in frontier_trials
        ])
        acc = np.array([
            np.mean([result["mean_accuracy"] for result in results])
            for config, results in frontier_trials
        ])
        err = 1 - acc
        noise_score = np.array([
            get_noise_score(results, NOISE_LEVELS)
            for config, results in frontier_trials
        ])

        order = np.argsort(nz)
        plot1_results.append((nz[order], err[order]))
        plot2_results.append((nz[order], acc[order]))
        noise_plot1_results.append((nz[order], noise_score[order]))

        order = np.argsort(acc)
        noise_plot2_results.append((acc[order], noise_score[order]))

    if dense_exp is not None:
        m = import_module(f"runs.{dense_exp}")
        _, results = get_best_config(
            os.path.expanduser(f"~/ray_results/{dense_exp}"), m.PARAMETERS,
            m.NUM_TRAINING_ITERATIONS)
        densenet_accuracy = np.mean(
            [result["mean_accuracy"] for result in results])
        densenet_noisescore = get_noise_score(results, NOISE_LEVELS)

        densenet = densenet_constructor(
            cnn_activity_percent_on=(1.0, 1.0),
            cnn_weight_percent_on=(1.0, 1.0),
            linear_activity_percent_on=(1.0, ),
            linear_weight_percent_on=(1.0, ),
        )
        densenet_num_weights = sum(
            getattr(densenet, name).weight.detach().numpy().size
            for name in ["cnn1_cnn", "cnn2_cnn", "linear1_linear", "output"])

    if hsd_exp is not None:
        m = import_module(f"runs.{hsd_exp}")
        df = tune.Analysis(
            os.path.expanduser(f"~/ray_results/{hsd_exp}")).dataframe()
        df = df[df["training_iteration"] == m.NUM_TRAINING_ITERATIONS]
        hsd_accuracy = np.mean(df["mean_accuracy"])
        hsd_noisescore = np.mean(
            get_noise_score([result for _, result in df.iterrows()],
                            NOISE_LEVELS))

    if hsd_kw_exp is not None:
        m = import_module(f"runs.{hsd_kw_exp}")
        df = tune.Analysis(
            os.path.expanduser(f"~/ray_results/{hsd_kw_exp}")).dataframe()
        df = df[df["training_iteration"] == m.NUM_TRAINING_ITERATIONS]
        hsd_kw_accuracy = np.mean(df["mean_accuracy"])
        hsd_kw_noisescore = np.mean(
            get_noise_score([result for _, result in df.iterrows()],
                            NOISE_LEVELS))

        hsd_weights_by_layer = get_hsd_weights_by_layer(hsd_constructor)
        hsd_num_weights = sum(hsd_weights_by_layer.values())

    fig = plt.figure(figsize=(4, 4))

    for nz, err in plot1_results:
        plt.plot(nz, err, "-o")

    if hsd_exp is not None:
        plt.plot(hsd_num_weights, 1 - hsd_accuracy, "x", color="C1")
    if hsd_kw_exp is not None:
        plt.plot(hsd_num_weights, 1 - hsd_kw_accuracy, "x", color="C1")
    if dense_exp is not None:
        plt.plot(densenet_num_weights, 1 - densenet_accuracy, "d", color="C3")

    plt.xlabel("# of weights")
    plt.xscale("log")
    plt.xlim(error_xlim)

    plt.ylabel("error rate")
    plt.yscale("log")
    plt.ylim(error_ylim)
    plt.grid(True)
    plt.tight_layout()
    filename = output_dir / f"{chart_prefix}_error_rate.pdf"
    print(f"Saving {filename}")
    fig.savefig(filename)

    fig = plt.figure(figsize=(4, 4))
    for nz, acc in plot2_results:
        plt.plot(nz, acc, "-o")

    if hsd_exp is not None:
        plt.plot(hsd_num_weights, hsd_accuracy, "x", color="C1")
    if hsd_kw_exp is not None:
        plt.plot(hsd_num_weights, hsd_kw_accuracy, "x", color="C1")
    if dense_exp is not None:
        plt.plot(densenet_num_weights, densenet_accuracy, "d", color="C3")

    plt.xlabel("# of weights")
    plt.xscale("log")
    plt.xlim(acc_xlim)
    plt.ylabel("accuracy")
    plt.ylim(acc_ylim)
    plt.grid(True)
    plt.tight_layout()
    filename = output_dir / f"{chart_prefix}_accuracy.pdf"
    print(f"Saving {filename}")
    fig.savefig(filename)

    fig = plt.figure(figsize=(4, 4))
    for nz, noise_score in noise_plot1_results:
        plt.plot(nz, noise_score, "-o")

    if hsd_exp is not None:
        plt.plot(hsd_num_weights, hsd_noisescore, "x", color="C1")
    if hsd_kw_exp is not None:
        plt.plot(hsd_num_weights, hsd_kw_noisescore, "x", color="C1")
    if dense_exp is not None:
        plt.plot(densenet_num_weights, densenet_noisescore, "d", color="C3")

    plt.xlabel("# of weights")
    plt.xscale("log")
    plt.xlim(acc_xlim)

    plt.ylabel("noise score")

    plt.grid(True)
    plt.tight_layout()
    filename = output_dir / f"{chart_prefix}_noise.pdf"
    print(f"Saving {filename}")
    fig.savefig(filename)

    fig = plt.figure(figsize=(4, 4))
    for acc, noise_score in noise_plot2_results:
        plt.plot(acc, noise_score, "-o")

    if hsd_exp is not None:
        plt.plot(hsd_accuracy, hsd_noisescore, "x", color="C1")
    if hsd_kw_exp is not None:
        plt.plot(hsd_accuracy, hsd_kw_noisescore, "x", color="C1")
    if dense_exp is not None:
        plt.plot(densenet_accuracy, densenet_noisescore, "d", color="C3")

    plt.xlabel("accuracy")
    plt.xlim(acc_noise_xlim)

    plt.ylabel("noise score")

    plt.grid(True)
    plt.tight_layout()
    filename = output_dir / f"{chart_prefix}_acc_noise.pdf"
    print(f"Saving {filename}")
    fig.savefig(filename)
예제 #15
0
#%%
import icae.interactive_setup
import ray
import ray.tune as tune
import pandas as pd

path = "~/ray_results/"
ana = tune.Analysis(path + "lonpoint-512")

# %%
df: pd.DataFrame = ana.dataframe("validation_loss")

# %%
df.sort_values("validation_loss").head(20)

# %%
def save_plot(expdir, outfilename, show_expected=False, mean=False):
    analysis = tune.Analysis(expdir)
    configs = analysis.get_all_configs()

    layernames = (
        "cnn1",
        "cnn2",
        "fc1",
        "fc2",
    )

    nz_by_unit_key = ("expected_nz_by_unit"
                      if show_expected else "inference_nz_by_unit")

    f = plt.figure(figsize=(12, 5))
    ax = f.add_subplot(111)
    ax.yaxis.tick_right()

    data_by_l0 = defaultdict(list)

    for trial_path, df in analysis.trial_dataframes.items():
        accuracies = []
        nz_counts = []

        for epoch in range(len(df)):
            nz = 0
            for layername in layernames:
                nz_by_unit = literal_eval(
                    df.at[epoch, "{}/{}".format(layername, nz_by_unit_key)])
                nz += np.sum(nz_by_unit)
            accuracies.append(df.at[epoch, "mean_accuracy"])
            nz_counts.append(nz)

        l0_strength = configs[trial_path]["l0_strength"]

        data_by_l0[l0_strength].append(
            (np.array(accuracies), np.array(nz_counts)))

    if mean:
        new = {}
        for l0_strength, results in data_by_l0.items():
            all_accuracies, all_nz_counts = zip(*results)
            new[l0_strength] = [(np.mean(all_accuracies,
                                         axis=0), np.mean(all_nz_counts,
                                                          axis=0))]
        data_by_l0 = new

    colors = dict(zip(data_by_l0.keys(), (
        "C0",
        "C1",
        "C2",
        "C3",
    )))

    for l0_strength, results in data_by_l0.items():
        color = colors[l0_strength]
        for accuracies, nz_counts in results:
            ax.plot(accuracies, nz_counts, "-o", markersize=2, color=color)

    ax.set_ylim(0, ax.get_ylim()[1])

    outpath = os.path.join(expdir, outfilename)
    print("Saving {}".format(outpath))
    plt.savefig(outpath)
                    df.at[epoch, "{}/{}".format(layername, nz_by_unit_key)])
                num_input_units = df.at[epoch,
                                        "{}/num_input_units".format(layername)]
                ax.hist(num_nonzeros,
                        bins=np.arange(0, num_input_units + 1,
                                       max(num_input_units / 50, 1)))

        for ax, col in zip(axes[0], layernames):
            ax.set_title(col)

        for ax, row in zip(axes[:, 0], range(epochs)):
            ax.set_ylabel("Epoch {}".format(row), size="large")

        fig.suptitle("# nonzero weights per unit", y=1.01)
        plt.tight_layout()

        outpath = os.path.join(trial_path, outfilename)
        print("Saving {}".format(outpath))
        plt.savefig(outpath)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("logdir",
                        type=str,
                        help="Path to a single trial of a ray tune experiment")
    parser.add_argument("--outfilename", type=str, default="sparsity_log.pdf")
    parser.add_argument("--expected", action="store_true")
    args = parser.parse_args()
    save_plot(tune.Analysis(args.logdir), args.outfilename, args.expected)