def analyse_ray_dump(ray_directory, topn, metric="avg_inc_acc"): if metric not in ("avg_inc_acc", "last_acc"): raise NotImplementedError("Unknown metric {}.".format(metric)) ea = Analysis(ray_directory) trials_dataframe = ea.dataframe() trials_dataframe = trials_dataframe.sort_values(by=metric, ascending=False) mapping_col_to_index = {} result_index = -1 for index, col in enumerate(trials_dataframe.columns): if col.startswith("config:"): mapping_col_to_index[col[7:]] = index elif col == metric: result_index = index print("Ray config: {}".format(ray_directory)) print("Best Config:") print("{}: {} with {}.".format( metric, trials_dataframe.iloc[0][result_index], _get_line_results(trials_dataframe, 0, mapping_col_to_index))) print("\nFollowed by:") if topn < 0: topn = len(trials_dataframe) else: topn = min(topn - 1, len(trials_dataframe)) for i in range(1, topn): print("avg_inc_acc: {} with {}.".format( trials_dataframe.iloc[i][result_index], _get_line_results(trials_dataframe, i, mapping_col_to_index))) return _get_line_results(trials_dataframe, 0, mapping_col_to_index)
def main(): analysis = Analysis(TUNE_RESULTS_FOLDER) print("Best hyperparameter {}".format( analysis.get_best_config(metric="mean_reward", mode="max"))) best_model_path = analysis.get_best_logdir(metric="mean_reward", mode="max") print( "Best model found in {}, start rendering .gif".format(best_model_path)) best_model = SomeModelToTrain({ 'learning_rate': 0.1, 'batch_size': 1, 'target_update': 1 }) checkpoint_path = f'{best_model_path}/checkpoint_{MAX_TRAINING_ITERATION}' best_model.load(checkpoint_path + '/' + MODEL_FILENAME) # we got this part from https://stable-baselines.readthedocs.io/en/master/guide/examples.html and modified it env = gym.make('LunarLander-v2') images = [] state = env.reset() for j in range(210): action = best_model.agent.act(state) img = env.render(mode='rgb_array') images.append(img) state, reward, done, _ = env.step(action) if done: break env.close() imageio.mimsave( 'best_model.gif', [np.array(img) for i, img in enumerate(images) if i % 2 == 0], fps=29) optimize('best_model.gif')
def testBestLogdir(self): analysis = Analysis(self.test_dir) logdir = analysis.get_best_logdir(self.metric) self.assertTrue(logdir.startswith(self.test_dir)) logdir2 = analysis.get_best_logdir(self.metric, mode="min") self.assertTrue(logdir2.startswith(self.test_dir)) self.assertNotEquals(logdir, logdir2)
def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ sub_dirs = os.listdir(self._logdir) job_names = filter( lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs) self._trial_records = {} # search through all the sub_directories in log directory for job_name in job_names: analysis = Analysis(str(os.path.join(self._logdir, job_name))) df = analysis.dataframe() if len(df) == 0: continue self._available = True # make sure that data will convert to JSON without error df["trial_id"] = df["trial_id"].astype(str) df = df.fillna(0) # convert df to python dict df = df.set_index("trial_id") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data, job_name) self._trial_records.update(trial_data)
def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ sub_dirs = os.listdir(self._logdir) job_names = filter( lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs) self._trial_records = {} # search through all the sub_directories in log directory for job_name in job_names: analysis = Analysis(str(os.path.join(self._logdir, job_name))) df = analysis.dataframe() if len(df) == 0 or "trial_id" not in df.columns: continue # start TensorBoard server if not started yet if not self._tensor_board_started: tb = program.TensorBoard() tb.configure(argv=[None, "--logdir", self._logdir]) tb.launch() self._tensor_board_started = True self._available = True # make sure that data will convert to JSON without error df["trial_id_key"] = df["trial_id"].astype(str) df = df.fillna(0) trial_ids = df["trial_id"] for i, value in df["trial_id"].iteritems(): if type(value) != str and type(value) != int: trial_ids[i] = int(value) df["trial_id"] = trial_ids # convert df to python dict df = df.set_index("trial_id_key") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data, job_name) self._trial_records.update(trial_data) self.collect_errors(job_name, df)
def analyze_ray_experiment(exp_dir, default_metric, default_mode): from ray.tune import Analysis analysis = Analysis(exp_dir, default_metric=default_metric, default_mode=default_mode) topk_summary_plot_v2(analysis, 5, save_dir=exp_dir) summ, styled = summarize_top_k(analysis, k=10, save_dir=exp_dir)
def get_tune_dfs(path): """ Here we load the df which is produced/logged by Tune. For more details see here: https://docs.ray.io/en/latest/tune/api_docs/analysis.html#tune-analysis-docs """ analysis = Analysis(path) d = analysis.trial_dataframes return d
def get_best_specific_params_dir(opt, layer, model, att_type): analysis = Analysis("../ray_tune/{}".format(opt['folder'])) df = analysis.dataframe(metric=opt['metric'], mode='max') print(df) df.columns = [c.replace('config/', '') for c in df.columns] print(df) # # newdf = df.loc[(df.num_layers == layers) & (df.model == model) & (df.att_type == att_type)] print(layer) print(df['num_layers'].unique()) print(model) print(df['model'].unique()) print(att_type) print(df['att_type'].unique()) newdf = df.loc[(df['num_layers'] == layer) & (df['model'] == model) & (df['att_type'] == att_type)] print(newdf) best_params_dir = newdf.sort_values( 'accuracy', ascending=False)['logdir'].iloc[opt['index']] return best_params_dir
def meta_setup(self): from ray.tune import Analysis out_meta = ContextCompositeNode.meta_setup(self) if 'tune' in self.conf: if 'local_dir' in self.conf['tune']: path = self.conf['tune']['local_dir'] if 'name' in self.conf['tune']: exp = self.conf['tune']['name'] try: analysis = Analysis(path + '/' + exp) if 'best' in self.conf: best = analysis.get_best_config( **self.conf['best']) for key in best.keys(): self.conf['context'][key]['value'] = best[key] print('get best', best) out_meta.outports[self.OUTPUT_CONFIG] = self.conf except Exception: pass return out_meta
def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ self._trial_records = {} self._errors = {} if not self._logdir: return # search through all the sub_directories in log directory analysis = Analysis(str(self._logdir)) df = analysis.dataframe() if len(df) == 0 or "trial_id" not in df.columns: return self._trials_available = True # make sure that data will convert to JSON without error df["trial_id_key"] = df["trial_id"].astype(str) df = df.fillna(0) trial_ids = df["trial_id"] for i, value in df["trial_id"].iteritems(): if type(value) != str and type(value) != int: trial_ids[i] = int(value) df["trial_id"] = trial_ids # convert df to python dict df = df.set_index("trial_id_key") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data) self._trial_records.update(trial_data) self.collect_errors(df)
from ray.tune import Analysis import pandas as pd import os import numpy as np if __name__ == "__main__": analysis = Analysis( "/Users/shaobohu/Documents/我的坚果云/project/circles_experiment/TRY_ALL/Train") print(sorted(analysis.dataframe()['acc'].tolist())) print(analysis.get_best_config('acc', 'max'))
def main(args): ray.init(num_cpus=args.num_cpus, memory=3000 * 1024**2, object_store_memory=300 * 1024**2) def train_reg(config, reporter): sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, data_valid, _ = provide_data(dataset=args.dataset) # 2) setup model from meta_learn.GPR_meta_vi import GPRegressionMetaLearnedVI torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedVI(data_train, **config) # 3) train and evaluate model eval_period = 2000 train_iter = 0 for i in range(config["num_iter_fit"] // eval_period): loss = model.meta_fit(verbose=False, log_period=2000, n_iter=eval_period) train_iter += eval_period ll, rmse, calib_err = model.eval_datasets(data_valid) reporter(timesteps_total=train_iter, loss=loss, test_rmse=rmse, test_ll=ll, calib_err=calib_err) @ray.remote def train_test(config): results_dict = config try: sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, _, data_test = provide_data(dataset=args.dataset, seed=SEED) # 2) Fit model from meta_learn.GPR_meta_vi import GPRegressionMetaLearnedVI torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedVI(data_train, **config) model.meta_fit(data_test, log_period=5000) # 3) evaluate on test set ll, rmse, calib_err = model.eval_datasets(data_test) results_dict.update(ll=ll, rmse=rmse, calib_err=calib_err) except Exception as e: print(e) results_dict.update(ll=np.nan, rmse=np.nan, calib_err=np.nan) return results_dict if len(args.load_analysis_from) > 0: assert os.path.isdir(args.load_analysis_from ), 'load_analysis_from must be a valid directory' print('Loading existing tune analysis results from %s' % args.load_analysis_from) analysis = Analysis(args.load_analysis_from) exp_name = os.path.basename(args.load_analysis_from) else: space = { "weight_prior_std": hp.loguniform("weight_prior_std", math.log(5e-2), math.log(1.0)), "prior_factor": hp.loguniform("prior_factor", math.log(1e-5), math.log(1e-1)), "lr": hp.loguniform("lr", math.log(5e-4), math.log(5e-3)), "lr_decay": hp.loguniform("lr_decay", math.log(0.8), math.log(1.0)), "svi_batch_size": hp.choice("svi_batch_size", [10, 50]), "task_batch_size": hp.choice("task_batch_size", [4, 10]), } config = { "num_samples": 240, "config": { "num_iter_fit": 30000, 'kernel_nn_layers': [32, 32, 32, 32], 'mean_nn_layers': [32, 32, 32, 32], 'random_seed': SEED, 'mean_module': 'NN', 'covar_module': args.covar_module, 'normalize_data': True, 'cov_type': 'diag' }, "stop": { "timesteps_total": 30000 }, } # Run hyper-parameter search algo = HyperOptSearch(space, max_concurrent=args.num_cpus, metric="test_ll", mode="max") exp_name = 'tune_meta_vi_%s_kernel_%s' % (args.covar_module, args.dataset) analysis = tune.run(train_reg, name=exp_name, search_alg=algo, verbose=1, local_dir=HPARAM_EXP_DIR, **config) # Select N best configurations re-run train & test with 5 different seeds from experiments.hyperparam_search.util import select_best_configs if args.metric == 'test_ll': best_configs = select_best_configs(analysis, metric='test_ll', mode='max', N=args.n_test_runs) elif args.metric == 'test_rmse': best_configs = select_best_configs(analysis, metric='test_rmse', mode='min', N=args.n_test_runs) else: raise AssertionError('metric must be test_ll or test_rmse') test_configs = [] for config in best_configs: for seed in TEST_SEEDS: test_config = copy.deepcopy(config) test_config.update({'random_seed': seed}) test_configs.append(test_config) result_dicts = ray.get( [train_test.remote(config) for config in test_configs]) result_df = pd.DataFrame(result_dicts) print(result_df.to_string()) csv_file_name = os.path.join( HPARAM_EXP_DIR, '%s_%s.csv' % (exp_name, datetime.now().strftime("%b_%d_%Y_%H:%M:%S"))) result_df.to_csv(csv_file_name) print("\nSaved result csv to %s" % csv_file_name)
def main(args): ray.init(num_cpus=args.num_cpus, memory=3000 * 1024**2, object_store_memory=300 * 1024**2) def train_reg(config, reporter): sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, data_valid, _ = provide_data(dataset=args.dataset, seed=SEED) # 2) setup model from meta_learn.GPR_meta_pac import GPRegressionMetaLearnedPAC torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedPAC(data_train, **config) # 3) train and evaluate model with gpytorch.settings.max_cg_iterations(300): log_period = 5000 train_iter = 0 loss = 0.0 diagnostics_dict = {} for i in range(config["num_iter_fit"] // log_period): loss, diagnostics_dict = model.meta_fit(verbose=False, log_period=1000, eval_period=100000, n_iter=log_period) train_iter += log_period if i < config["num_iter_fit"] // log_period - 1: reporter(timesteps_total=train_iter, loss=loss, test_rmse=math.nan, test_ll=math.nan, calib_err=math.nan, **diagnostics_dict) ll, rmse, calib_err = model.eval_datasets(data_valid, n_iter_meta_test=3000) reporter(timesteps_total=train_iter, loss=loss, test_rmse=rmse, test_ll=ll, calib_err=calib_err, **diagnostics_dict) @ray.remote def train_test(config): results_dict = config try: sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, _, data_test = provide_data(dataset=args.dataset, seed=SEED) # 2) Fit model from meta_learn.GPR_meta_pac import GPRegressionMetaLearnedPAC torch.set_num_threads(N_THREADS_PER_RUN) with gpytorch.settings.max_cg_iterations(500): model = GPRegressionMetaLearnedPAC(data_train, **config) model.meta_fit( data_test, log_period=1000, eval_period=100000, ) # 3) evaluate on test set ll, rmse, calib_err = model.eval_datasets( data_test, n_iter_meta_test=3000) results_dict.update(ll=ll, rmse=rmse, calib_err=calib_err) except Exception as e: print(e) results_dict.update(ll=np.nan, rmse=np.nan, calib_err=np.nan) return results_dict assert args.metric in ['test_ll', 'test_rmse'] exp_name = 'tune_meta_pac_%s_kernel_%s' % (args.covar_module, args.dataset) if args.load_analysis: analysis_dir = os.path.join(HPARAM_EXP_DIR, exp_name) assert os.path.isdir( analysis_dir), 'load_analysis_from must be a valid directory' print('Loading existing tune analysis results from %s' % analysis_dir) analysis = Analysis(analysis_dir) else: space = { "task_kl_weight": hp.loguniform("task_kl_weight", math.log(5e-2), math.log(1e0)), "meta_kl_weight": hp.loguniform("meta_kl_weight", math.log(1e-7), math.log(1e0)), "lr": hp.loguniform("lr", math.log(1e-4), math.log(1e-3)), "lr_decay": hp.loguniform("lr_decay", math.log(0.92), math.log(0.97)), "posterior_lr_multiplier": hp.loguniform("posterior_lr_multiplier", math.log(1e0), math.log(10.)), "svi_batch_size": hp.choice("svi_batch_size", [5, 10]), "task_batch_size": hp.choice("task_batch_size", [5, 20]), } config = { "num_samples": 150, "config": { "num_iter_fit": 40000, 'kernel_nn_layers': [32, 32, 32, 32], 'mean_nn_layers': [32, 32, 32, 32], 'random_seed': SEED, 'mean_module': 'NN', 'covar_module': args.covar_module, 'normalize_data': True, 'cov_type': 'diag' }, "stop": { "timesteps_total": 100000 }, } config["config"].update() # configs_to_evaluate = [{ # "task_kl_weight": 1.0, # "meta_kl_weight": 1e-5, # "lr": 1e-3, # "lr_decay": 0.95, # "posterior_lr_multiplier": 5.0, # "svi_batch_size": 0, # "task_batch_size": 0, # }, # ] # Run hyper-parameter search algo = HyperOptSearch( space, max_concurrent=args.num_cpus, metric=args.metric, mode="max" if args.metric == 'test_ll' else "min", ) analysis = custom_tune.run(train_reg, name=exp_name, search_alg=algo, verbose=1, raise_on_failed_trial=False, local_dir=HPARAM_EXP_DIR, resume=args.resume, **config) # Select N best configurations re-run train & test with 5 different seeds from experiments.hyperparam_search.util import select_best_configs if args.metric == 'test_ll': best_configs = select_best_configs(analysis, metric='test_ll', mode='max', N=args.n_test_runs) elif args.metric == 'test_rmse': best_configs = select_best_configs(analysis, metric='test_rmse', mode='min', N=args.n_test_runs) else: raise AssertionError('metric must be test_ll or test_rmse') test_configs = [] for config in best_configs: for seed in TEST_SEEDS: test_config = copy.deepcopy(config) test_config.update({'random_seed': seed}) test_configs.append(test_config) result_dicts = ray.get( [train_test.remote(config) for config in test_configs]) result_df = pd.DataFrame(result_dicts) print(result_df.to_string()) csv_file_name = os.path.join( HPARAM_EXP_DIR, '%s_%s.csv' % (exp_name, datetime.now().strftime("%b_%d_%Y_%H:%M:%S"))) result_df.to_csv(csv_file_name) print("\nSaved result csv to %s" % csv_file_name)
project_dir = os.path.dirname(os.path.join(os.getcwd(), __file__)) sys.path.append(os.path.normpath(os.path.join(project_dir, '..', '..'))) from experiments.utils.constants import RESULTS_PATH series = 6 type_env = 2 dataset_id = 0 workload_id = 0 experiment_id = 0 algorithm = 'PPO' trial = 'PPO_CloudSimV2_847c5_00000_0_fcnet_hiddens_0=20,fcnet_hiddens_1=20_2021-02-14_22-46-46' experiment_folder_path = os.path.join(RESULTS_PATH, "series", str(series), "envs", str(type_env), "datasets", str(dataset_id), "workloads", str(workload_id), "experiments", str(experiment_id), str(algorithm), trial) analysis = Analysis(experiment_folder_path) df = analysis.trial_dataframes[experiment_folder_path] selected_stats = [ 'episode_reward_mean', 'episodes_this_iter', 'timesteps_total', 'episodes_total', 'experiment_id', 'custom_metrics/num_moves_mean', 'custom_metrics/num_consolidated_mean', 'custom_metrics/num_overloaded_mean' ] df = df[selected_stats] # each row of the df is the training iteration a = 1
from ray.tune import Analysis import torch import matplotlib.pyplot as plt from core.dataloader import * from core.model import * from core.training import * from core.evaluating import * from plots.plots import * from core.create_folder import * from core.pred_sequence import * folders = [] folders.append( "Run_w_11_timesteps_3_hiddenDim_1_layers_0.00021723989839730966_LR") configs = [] analysis = Analysis("~/ray_results/Jan21") configs.append(analysis.get_best_config(metric="error", mode="max")) for i in range(2, 17): analysis = Analysis("~/ray_results/" + str(i) + "forward") config = analysis.get_best_config(metric="error", mode="max") configs.append(config) print(config) folder_name = 'Predicting' + str(config["num_forward"]) + '_w_' + str( config["timesteps"]) + '_timesteps_' + str( config["hidden_dim"]) + '_hiddenDim_' + str( config["num_layers"]) + '_layers_' + str(config["lr"]) + "_LR" folders.append(folder_name) #print(folder_name) print(len(folders)) print(len(configs)) model_keys = []
def raytune_analysis(exp_dir, save, skip, mode, metric): analysis = Analysis(exp_dir, default_metric=metric, default_mode=mode) plot_ray_analysis(analysis, save=save, skip=skip) analyze_ray_experiment(exp_dir, default_metric=metric, default_mode=mode)
def get_best_info(exp_dir, metrics=['vl_score', 'vl_loss'], ascending=[False, True], mode='auto'): if mode == 'auto': analysis = Analysis(exp_dir, 'vl_score', 'max') df = analysis.dataframe() df = df.sort_values(metrics, ascending=ascending) trial_dir = df.iloc[0][-1] min_ = 10000 for f in os.listdir(trial_dir): if 'checkpoint' in f: idx = int(f.split('_')[1]) min_ = min(min_, idx) chk_file = os.path.join(trial_dir, f'checkpoint_{min_}', 'model.pth') with open(os.path.join(trial_dir, 'params.json')) as f: config = json.load(f) with open(os.path.join(trial_dir, 'result.json')) as f: res = [json.loads(i) for i in f] best_res = res[min_ - 1] return { 'trial_dir': trial_dir, 'chk_file': chk_file, 'config': config, 'tr_loss': best_res['tr_loss'], 'tr_score': best_res['tr_score'], 'vl_loss': best_res['vl_loss'], 'vl_score': best_res['vl_score'] } elif mode == 'manual': best_dict = { 'trial_dir': None, 'chk_file': None, 'config': None, 'tr_loss': float('inf'), 'tr_score': 0, 'vl_loss': float('inf'), 'vl_score': 0 } dirs = [ part_dir for part_dir in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, part_dir)) ] for part_dir in dirs: trial_dir = os.path.join(exp_dir, part_dir) min_ = 400 for f in os.listdir(trial_dir): if 'checkpoint' in f: idx = int(f.split('_')[1]) min_ = min(min_, idx) with open(os.path.join(trial_dir, 'result.json')) as f: for i, d in enumerate(f): if i + 1 == min_: curr = json.loads(d) if best_dict['vl_score'] < curr[ 'vl_score']: #or (best_dict['vl_score'] == curr['vl_score'] and best_dict['vl_loss'] > curr['vl_loss']): with open(os.path.join(trial_dir, 'params.json')) as f: config = json.load(f) best_dict = { 'trial_dir': trial_dir, 'chk_file': os.path.join(trial_dir, f'checkpoint_{min_}/model.pth'), 'config': config, 'tr_loss': curr['tr_loss'], 'tr_score': curr['tr_score'], 'vl_loss': curr['vl_loss'], 'vl_score': curr['vl_score'] } return best_dict
def testDataframe(self): analysis = Analysis(self.test_dir) df = analysis.dataframe() self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEquals(df.shape[0], self.num_samples * 2)
def get_best_params_dir(opt): analysis = Analysis("../ray_tune/{}".format(opt['folder'])) df = analysis.dataframe(metric=opt['metric'], mode='max') best_params_dir = df.sort_values( 'accuracy', ascending=False)['logdir'].iloc[opt['index']] return best_params_dir
def testBestConfigIsLogdir(self): analysis = Analysis(self.test_dir) for metric, mode in [(self.metric, "min"), (self.metric, "max")]: logdir = analysis.get_best_logdir(metric, mode=mode) best_config = analysis.get_best_config(metric, mode=mode) self.assertEquals(analysis.get_all_configs()[logdir], best_config)
""" from ray.tune import Analysis import torch import matplotlib.pyplot as plt from core.dataloader import * from core.model import * from core.training import * from core.evaluating import * from plots.plots import * from core.create_folder import * from core.pred_sequence import * configs = [] #analysis_GSPC = Analysis("~/ray_results/1forward_returns_GSPC") #analysis_IXIC = Analysis("~/ray_results/1forward_returns_IXIC") #analysis_N225 = Analysis("~/ray_results/1forward_returns_N225") analysis_DJI = Analysis("~/ray_results/1_forward_returns_DJI") #configs.append(analysis_GSPC.get_best_config(metric="error", mode="max")) #configs.append(analysis_IXIC.get_best_config(metric="error", mode="max")) #configs.append(analysis_N225.get_best_config(metric="error", mode="max")) configs.append(analysis_DJI.get_best_config(metric="error", mode="max")) print(configs) configs_np = np.empty(shape=(4, 6)) datasets = ["GSPC", "IXIC", "N225", "DJI"] for i in range(4): configs_np[i][0] = datasets[i] configs_np[i][1] = configs[i]["num_layers"] configs_np[i][2] = configs[i]["hidden_dim"] configs_np[i][3] = configs[i]["lr"] configs_np[i][4] = configs[i]["timesteps"] configs_np[i][5] = configs[i]["dropout"] configs_df = pd.DataFrame(configs_np,