def main(args): ray.init(num_cpus=args.num_cpus, memory=3000 * 1024**2, object_store_memory=300 * 1024**2) def train_reg(config, reporter): sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, data_valid, _ = provide_data(dataset=args.dataset) # 2) setup model from meta_learn.GPR_meta_vi import GPRegressionMetaLearnedVI torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedVI(data_train, **config) # 3) train and evaluate model eval_period = 2000 train_iter = 0 for i in range(config["num_iter_fit"] // eval_period): loss = model.meta_fit(verbose=False, log_period=2000, n_iter=eval_period) train_iter += eval_period ll, rmse, calib_err = model.eval_datasets(data_valid) reporter(timesteps_total=train_iter, loss=loss, test_rmse=rmse, test_ll=ll, calib_err=calib_err) @ray.remote def train_test(config): results_dict = config try: sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, _, data_test = provide_data(dataset=args.dataset, seed=SEED) # 2) Fit model from meta_learn.GPR_meta_vi import GPRegressionMetaLearnedVI torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedVI(data_train, **config) model.meta_fit(data_test, log_period=5000) # 3) evaluate on test set ll, rmse, calib_err = model.eval_datasets(data_test) results_dict.update(ll=ll, rmse=rmse, calib_err=calib_err) except Exception as e: print(e) results_dict.update(ll=np.nan, rmse=np.nan, calib_err=np.nan) return results_dict if len(args.load_analysis_from) > 0: assert os.path.isdir(args.load_analysis_from ), 'load_analysis_from must be a valid directory' print('Loading existing tune analysis results from %s' % args.load_analysis_from) analysis = Analysis(args.load_analysis_from) exp_name = os.path.basename(args.load_analysis_from) else: space = { "weight_prior_std": hp.loguniform("weight_prior_std", math.log(5e-2), math.log(1.0)), "prior_factor": hp.loguniform("prior_factor", math.log(1e-5), math.log(1e-1)), "lr": hp.loguniform("lr", math.log(5e-4), math.log(5e-3)), "lr_decay": hp.loguniform("lr_decay", math.log(0.8), math.log(1.0)), "svi_batch_size": hp.choice("svi_batch_size", [10, 50]), "task_batch_size": hp.choice("task_batch_size", [4, 10]), } config = { "num_samples": 240, "config": { "num_iter_fit": 30000, 'kernel_nn_layers': [32, 32, 32, 32], 'mean_nn_layers': [32, 32, 32, 32], 'random_seed': SEED, 'mean_module': 'NN', 'covar_module': args.covar_module, 'normalize_data': True, 'cov_type': 'diag' }, "stop": { "timesteps_total": 30000 }, } # Run hyper-parameter search algo = HyperOptSearch(space, max_concurrent=args.num_cpus, metric="test_ll", mode="max") exp_name = 'tune_meta_vi_%s_kernel_%s' % (args.covar_module, args.dataset) analysis = tune.run(train_reg, name=exp_name, search_alg=algo, verbose=1, local_dir=HPARAM_EXP_DIR, **config) # Select N best configurations re-run train & test with 5 different seeds from experiments.hyperparam_search.util import select_best_configs if args.metric == 'test_ll': best_configs = select_best_configs(analysis, metric='test_ll', mode='max', N=args.n_test_runs) elif args.metric == 'test_rmse': best_configs = select_best_configs(analysis, metric='test_rmse', mode='min', N=args.n_test_runs) else: raise AssertionError('metric must be test_ll or test_rmse') test_configs = [] for config in best_configs: for seed in TEST_SEEDS: test_config = copy.deepcopy(config) test_config.update({'random_seed': seed}) test_configs.append(test_config) result_dicts = ray.get( [train_test.remote(config) for config in test_configs]) result_df = pd.DataFrame(result_dicts) print(result_df.to_string()) csv_file_name = os.path.join( HPARAM_EXP_DIR, '%s_%s.csv' % (exp_name, datetime.now().strftime("%b_%d_%Y_%H:%M:%S"))) result_df.to_csv(csv_file_name) print("\nSaved result csv to %s" % csv_file_name)
def main(args): ray.init(num_cpus=args.num_cpus, memory=3000 * 1024**2, object_store_memory=300 * 1024**2) def train_reg(config, reporter): sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, data_valid, _ = provide_data(dataset=args.dataset, seed=SEED) # 2) setup model from meta_learn.GPR_meta_pac import GPRegressionMetaLearnedPAC torch.set_num_threads(N_THREADS_PER_RUN) model = GPRegressionMetaLearnedPAC(data_train, **config) # 3) train and evaluate model with gpytorch.settings.max_cg_iterations(300): log_period = 5000 train_iter = 0 loss = 0.0 diagnostics_dict = {} for i in range(config["num_iter_fit"] // log_period): loss, diagnostics_dict = model.meta_fit(verbose=False, log_period=1000, eval_period=100000, n_iter=log_period) train_iter += log_period if i < config["num_iter_fit"] // log_period - 1: reporter(timesteps_total=train_iter, loss=loss, test_rmse=math.nan, test_ll=math.nan, calib_err=math.nan, **diagnostics_dict) ll, rmse, calib_err = model.eval_datasets(data_valid, n_iter_meta_test=3000) reporter(timesteps_total=train_iter, loss=loss, test_rmse=rmse, test_ll=ll, calib_err=calib_err, **diagnostics_dict) @ray.remote def train_test(config): results_dict = config try: sys.path.append(BASE_DIR) # 1) load / generate data from experiments.data_sim import provide_data data_train, _, data_test = provide_data(dataset=args.dataset, seed=SEED) # 2) Fit model from meta_learn.GPR_meta_pac import GPRegressionMetaLearnedPAC torch.set_num_threads(N_THREADS_PER_RUN) with gpytorch.settings.max_cg_iterations(500): model = GPRegressionMetaLearnedPAC(data_train, **config) model.meta_fit( data_test, log_period=1000, eval_period=100000, ) # 3) evaluate on test set ll, rmse, calib_err = model.eval_datasets( data_test, n_iter_meta_test=3000) results_dict.update(ll=ll, rmse=rmse, calib_err=calib_err) except Exception as e: print(e) results_dict.update(ll=np.nan, rmse=np.nan, calib_err=np.nan) return results_dict assert args.metric in ['test_ll', 'test_rmse'] exp_name = 'tune_meta_pac_%s_kernel_%s' % (args.covar_module, args.dataset) if args.load_analysis: analysis_dir = os.path.join(HPARAM_EXP_DIR, exp_name) assert os.path.isdir( analysis_dir), 'load_analysis_from must be a valid directory' print('Loading existing tune analysis results from %s' % analysis_dir) analysis = Analysis(analysis_dir) else: space = { "task_kl_weight": hp.loguniform("task_kl_weight", math.log(5e-2), math.log(1e0)), "meta_kl_weight": hp.loguniform("meta_kl_weight", math.log(1e-7), math.log(1e0)), "lr": hp.loguniform("lr", math.log(1e-4), math.log(1e-3)), "lr_decay": hp.loguniform("lr_decay", math.log(0.92), math.log(0.97)), "posterior_lr_multiplier": hp.loguniform("posterior_lr_multiplier", math.log(1e0), math.log(10.)), "svi_batch_size": hp.choice("svi_batch_size", [5, 10]), "task_batch_size": hp.choice("task_batch_size", [5, 20]), } config = { "num_samples": 150, "config": { "num_iter_fit": 40000, 'kernel_nn_layers': [32, 32, 32, 32], 'mean_nn_layers': [32, 32, 32, 32], 'random_seed': SEED, 'mean_module': 'NN', 'covar_module': args.covar_module, 'normalize_data': True, 'cov_type': 'diag' }, "stop": { "timesteps_total": 100000 }, } config["config"].update() # configs_to_evaluate = [{ # "task_kl_weight": 1.0, # "meta_kl_weight": 1e-5, # "lr": 1e-3, # "lr_decay": 0.95, # "posterior_lr_multiplier": 5.0, # "svi_batch_size": 0, # "task_batch_size": 0, # }, # ] # Run hyper-parameter search algo = HyperOptSearch( space, max_concurrent=args.num_cpus, metric=args.metric, mode="max" if args.metric == 'test_ll' else "min", ) analysis = custom_tune.run(train_reg, name=exp_name, search_alg=algo, verbose=1, raise_on_failed_trial=False, local_dir=HPARAM_EXP_DIR, resume=args.resume, **config) # Select N best configurations re-run train & test with 5 different seeds from experiments.hyperparam_search.util import select_best_configs if args.metric == 'test_ll': best_configs = select_best_configs(analysis, metric='test_ll', mode='max', N=args.n_test_runs) elif args.metric == 'test_rmse': best_configs = select_best_configs(analysis, metric='test_rmse', mode='min', N=args.n_test_runs) else: raise AssertionError('metric must be test_ll or test_rmse') test_configs = [] for config in best_configs: for seed in TEST_SEEDS: test_config = copy.deepcopy(config) test_config.update({'random_seed': seed}) test_configs.append(test_config) result_dicts = ray.get( [train_test.remote(config) for config in test_configs]) result_df = pd.DataFrame(result_dicts) print(result_df.to_string()) csv_file_name = os.path.join( HPARAM_EXP_DIR, '%s_%s.csv' % (exp_name, datetime.now().strftime("%b_%d_%Y_%H:%M:%S"))) result_df.to_csv(csv_file_name) print("\nSaved result csv to %s" % csv_file_name)