def adapt_and_test(): import os import dill from playground.maml.maml_torch.maml_multi_step import FunctionalMLP logger.configure(log_directory=Args.log_dir, prefix=Args.log_prefix) logger.log_params(Args=vars(Args)) # load weights with open(os.path.join(Args.log_dir, Args.log_prefix, Args.weight_path), 'rb') as f: weights = dill.load(f) model = FunctionalMLP(1, 1) losses = DefaultBear(list) for amp, task in amp_tasks: model.params.update({ k: t.tensor(v, requires_grad=True, dtype=t.double).to(device) for k, v in weights[0].items() }) sgd = t.optim.SGD(model.parameters(), lr=Args.learning_rate) proper = t.tensor(task.proper()).to(device) samples = t.tensor(task.samples(Args.k_shot)).to(device) for grad_ind in range(Args.grad_steps): with t.no_grad(): xs, labels = proper ys = model(xs.unsqueeze(-1)) loss = model.criteria(ys, labels.unsqueeze(-1)) logger.log(grad_ind, loss=loss.item(), silent=grad_ind != Args.grad_steps - 1) losses[f"amp-{amp:.2f}-loss"].append(loss.item()) xs, labels = samples ys = model(xs.unsqueeze(-1)) loss = model.criteria(ys, labels.unsqueeze(-1)) sgd.zero_grad() loss.backward() sgd.step() # losses = np.array([v for k, v in losses.items()]) import matplotlib.pyplot as plt fig = plt.figure() plt.title(f'Learning Curves') for amp, task in amp_tasks: plt.plot(losses[f"amp-{amp:.2f}-loss"], label=f"amp {amp:.2f}") plt.legend() logger.log_pyplot(None, key=f"losses/learning_curves_amp.png", fig=fig) plt.close() average_losses = np.array( [losses[f"amp-{amp:.2f}-loss"] for amp, task in amp_tasks]) fig = plt.figure() plt.title(f'Learning Curves Averaged amp ~ [5 - 10]') plt.plot(average_losses.mean(0)) plt.ylim(0, 28) logger.log_pyplot(None, key=f"losses/learning_curves_amp_all.png", fig=fig) plt.close()
def __init__(self, exp_prefix, est_params, sim_params, observations, keys_of_interest, n_mc_samples=10**7, n_x_cond=5, n_seeds=5, use_gpu=True, tail_measures=True): assert est_params and exp_prefix and sim_params and keys_of_interest assert observations.all() # every simulator configuration will be run multiple times with different randomness seeds sim_params = _add_seeds_to_sim_params(n_seeds, sim_params) self.observations = observations self.n_mc_samples = n_mc_samples self.n_x_cond = n_x_cond self.keys_of_interest = keys_of_interest self.exp_prefix = exp_prefix self.use_gpu = use_gpu self.tail_measures = tail_measures logger.configure(log_directory=config.DATA_DIR, prefix=exp_prefix, color='green') ''' ---------- Either load or generate the configs ----------''' config_pkl_path = os.path.join(logger.log_directory, logger.prefix, EXP_CONFIG_FILE) if os.path.isfile(config_pkl_path): logger.log("{:<70s} {:<30s}".format( "Loading experiment previous configs from file: ", config_pkl_path)) self.configs = logger.load_pkl(EXP_CONFIG_FILE) else: logger.log("{:<70s} {:<30s}".format( "Generating and storing experiment configs under: ", config_pkl_path)) self.configs = self._generate_configuration_variants( est_params, sim_params) logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE) ''' ---------- Either load already existing results or start a new result collection ---------- ''' results_pkl_path = os.path.join(logger.log_directory, logger.prefix, RESULTS_FILE) if os.path.isfile(results_pkl_path): logger.log_line("{:<70s} {:<30s}".format("Continue with: ", results_pkl_path)) self.gof_single_res_collection = dict( logger.load_pkl_log(RESULTS_FILE)) else: # start from scratch self.gof_single_res_collection = {} self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
def test_metrics_prefix(setup): from ml_logger import logger logger.remove("metrics.pkl") with logger.Prefix(metrics="evaluate/", sep=""): logger.log(loss=0.5, flush=True) assert logger.read_metrics("evaluate/loss", )[0] == 0.5
def _fit_by_cv_ml_eval(model_dict_tuple, train_valid_set_tuple): estimator_key, conf_dict = model_dict_tuple X_train, Y_train, X_valid, Y_valid = train_valid_set_tuple estimator = _initialize_model_cv_ml(conf_dict) estimator.fit(X_train, Y_train) score = estimator.score(X_valid, Y_valid) result_dict[estimator_key].append(score) logger.log('%s: score: %.4f'%(estimator_key, score))
def test(setup): d = Color(3.1415926, 'red') s = "{:.1}".format(d) print(s) logger.log_params(G=dict(some_config="hey")) logger.log(step=0, some=Color(0.1, 'yellow')) logger.log(step=1, some=Color(0.28571, 'yellow', lambda v: "{:.5f}%".format(v * 100))) logger.log(step=2, some=Color(0.85, 'yellow', percent)) logger.log({"some_var/smooth": 10}, some=Color(0.85, 'yellow', percent), step=3) logger.log(step=4, some=Color(10, 'yellow'))
def run_benchmark_train_test_fit_cv_ml(dataset, model_dict, seed=27, n_train_valid_splits=1, shuffle_splits=True, n_jobs_outer=-1): if logger.log_directory is None: logger.configure(log_directory='/tmp/ml-logger') rds = np.random.RandomState(seed) logger.log("\n------------------ empirical cv_ml benchmark with %s ----------------------" % str(dataset)) datasets = zip(*dataset.get_train_valid_splits(valid_portion=0.2, n_splits=n_train_valid_splits, shuffle=shuffle_splits, random_state=rds)) exps = list(zip(*itertools.product(model_dict.items(), datasets))) manager = Manager() result_dict = manager.dict() for estimator_key in model_dict.keys(): result_dict[estimator_key] = manager.list() def _fit_by_cv_ml_eval(model_dict_tuple, train_valid_set_tuple): estimator_key, conf_dict = model_dict_tuple X_train, Y_train, X_valid, Y_valid = train_valid_set_tuple estimator = _initialize_model_cv_ml(conf_dict) estimator.fit(X_train, Y_train) score = estimator.score(X_valid, Y_valid) result_dict[estimator_key].append(score) logger.log('%s: score: %.4f'%(estimator_key, score)) executor = AsyncExecutor(n_jobs=n_jobs_outer) executor.run(_fit_by_cv_ml_eval, *exps) # convert result_dict into normal python dict containing lists result_dict = dict([(key, list(value)) for key, value in result_dict.items()]) pprint(result_dict) # rearrange results as pandas df final_results_dict = {'scores_mean': [], 'scores_std': [], 'dataset': []} for estimator_key, scores in result_dict.items(): final_results_dict['scores_mean'].append(np.nanmean(scores)) final_results_dict['scores_std'].append(np.nanstd(scores)) final_results_dict['dataset'].append(str(dataset)) df = pd.DataFrame.from_dict(data=final_results_dict, orient='columns') df.index = list(model_dict.keys()) logger.log('\n' + str(df)) return df
def experiment(): logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX, color='green') # 1) EUROSTOXX dataset = datasets.EuroStoxx50() result_df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=False, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) # 2) NYC Taxi for n_samples in [10000]: dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples) df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) result_df = pd.concat([result_df, df], ignore_index=True) # 3) UCI for dataset_class in [ datasets.BostonHousing, datasets.Conrete, datasets.Energy ]: dataset = dataset_class() df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) result_df = pd.concat([result_df, df], ignore_index=True) logger.log('\n', str(result_df)) logger.log('\n', result_df.tolatex())
def run_configurations(self, dump_models=False, multiprocessing=True, n_workers=None): """ Runs the given configurations, i.e. 1) fits the estimator to the simulation and 2) executes goodness-of-fit (currently: e.g. kl-divergence, wasserstein-distance etc.) tests Every successful run yields a result object of type GoodnessOfFitResult which contains information on both estimator, simulator and chosen hyperparameters such as n_samples, see GoodnessOfFitResult documentation for more information. Args: estimator_filter: a parameter to decide whether to execute just a specific type of estimator, e.g. "KernelMixtureNetwork", must be one of the density estimator class types limit: limit the number of (potentially filtered) tasks dump_models: (boolean) whether to save/dump the fitted estimators Returns: returns two objects: (result_list, full_df) 1) a GoodnessOfFitResults object containing all configurations as GoodnessOfFitSingleResult objects, carrying information about the estimator and simulator hyperparameters as well as n_obs, n_x_cond, n_mc_samples and the statistic results. 2) a full pandas dataframe of the csv Additionally, if export_pickle is True, the path to the pickle file will be returned, i.e. return values are (results_list, full_df, path_to_pickle) """ self.dump_models = dump_models ''' Asserts ''' assert len(self.configs) > 0 tasks = self.configs ''' Run the configurations ''' logger.log("{:<70s} {:<30s}".format( "Number of total tasks in pipeline:", str(len(self.configs)))) logger.log("{:<70s} {:<30s}".format( "Number of aleady finished tasks (found in results pickle): ", str(len(self.gof_single_res_collection)))) iters = range(len(tasks)) if multiprocessing: executor = AsyncExecutor(n_jobs=n_workers) executor.run(self._run_single_task, iters, tasks) else: for i, task in zip(iters, tasks): self._run_single_task(i, task)
def sgd_baseline(lr=0.001): from playground.maml.maml_torch.tasks import Sine task = Sine() model = StandardMLP(1, 1) if G.debug else FunctionalMLP(1, 1) adam = t.optim.Adam([p for p in model.parameters()], lr=lr) mse = t.nn.MSELoss() for ep_ind in range(1000): xs, labels = h.const(task.proper()) ys = model(xs.unsqueeze(-1)) loss = mse(ys, labels.unsqueeze(-1)) logger.log(ep_ind, loss=loss.item(), silent=ep_ind % 50) adam.zero_grad() loss.backward() adam.step() logger.flush()
def _initialize_model_cv(model_key, conf_dict, verbose=False): ''' make kartesian product of listed parameters per model ''' assert 'estimator' in conf_dict.keys() estimator = conf_dict.pop('estimator') param_dict_cv = {} param_dict_init = {} for param_key, param_value in conf_dict.items(): if type(param_value) in (list, tuple): param_dict_cv[param_key] = param_value param_dict_init[param_key] = param_value[0] else: param_dict_init[param_key] = param_value param_dict_init['name'] = model_key if verbose: logger.log('initialize %s'%model_key) estimator_instance = globals()[estimator](**param_dict_init) return estimator_instance, param_dict_cv, param_dict_init
def regular_sgd_baseline(model, Task, n_epochs, batch_n, k_shot=100, **_): problem = Task() # simple gradient descent for ep_ind in trange(n_epochs, desc="Epochs", ncols=50, leave=False): loss = 0 for _ in range(batch_n): xs, ys = problem.samples(k_shot) output = model(t.tensor(xs).unsqueeze(dim=-1)) targets = t.tensor(ys).unsqueeze(dim=-1) loss += model.criteria(output, targets) loss /= batch_n model.zero_grad() loss.backward() model.step(lr=G.alpha) logger.log(ep_ind, loss=loss.item()) if ep_ind % 100 == 0 or ep_ind == (n_epochs - 1): pass
def _fit_by_cv_and_eval(estimator_key, conf_dict): estimator, param_grid, param_dict_init = _initialize_model_cv(estimator_key, conf_dict, verbose=True) # 1) perform cross-validation hyperparam search to select params selected_params = estimator.fit_by_cv(X_train, Y_train, param_grid=param_grid, n_folds=n_folds, n_jobs=n_jobs_inner, random_state=rds) logger.log("%s selected params:"%estimator_key) logger.log_params(**selected_params) # 2) evaluate selected params with different initializations param_dict_init.update(selected_params) logger.log("evaluating %s parameters with %i seeds"%(estimator_key, len(eval_seeds))) scores = _evaluate_params(estimator.__class__, param_dict_init, X_train, Y_train, X_valid, Y_valid, seeds=eval_seeds) cv_result_dict[estimator_key] = {'selected_params': selected_params, 'scores': scores, 'eval_seeds': eval_seeds} logger.log("evaluation scores for %s: %s" % (estimator_key, str(scores)))
def train_maml(*, n_tasks: int, tasks: MetaRLTasks, maml: E_MAML): if not G.inner_alg.startswith("BC"): path_gen = path_gen_fn(env=tasks.envs, policy=maml.runner.policy, start_reset=G.reset_on_start) next(path_gen) meta_path_gen = path_gen_fn(env=tasks.envs, policy=maml.meta_runner.policy, start_reset=G.reset_on_start) next(meta_path_gen) if G.load_from_checkpoint: # todo: add variable to checkpoint # todo: set the epoch_ind starting point here. logger.load_variables(G.load_from_checkpoint) if G.meta_sgd: assert maml.alpha is not None, "Coding Mistake if meta_sgd is trueful but maml.alpha is None." max_episode_length = tasks.spec.max_episode_steps sess = tf.get_default_session() epoch_ind, prefix = G.epoch_init - 1, "" while epoch_ind < G.epoch_init + G.n_epochs: logger.flush() logger.split() is_bc_test = (prefix != "test/" and G.eval_interval and epoch_ind % G.eval_interval == 0) prefix = "test/" if is_bc_test else "" epoch_ind += 0 if is_bc_test else 1 if G.meta_sgd: alpha_lr = sess.run(maml.alpha) # only used in the runner. logger.log(metrics={f"alpha_{i}/{stem(t.name, 2)}": a for i, a_ in enumerate(alpha_lr) for t, a in zip(maml.runner.trainables, a_)}, silent=True) else: alpha_lr = G.alpha.send(epoch_ind) if isinstance(G.alpha, Schedule) else np.array(G.alpha) logger.log(alpha=metrify(alpha_lr), epoch=epoch_ind, silent=True) beta_lr = G.beta.send(epoch_ind) if isinstance(G.beta, Schedule) else np.array(G.beta) clip_range = G.clip_range.send(epoch_ind) if isinstance(G.clip_range, Schedule) else np.array(G.clip_range) logger.log(beta=metrify(beta_lr), clip_range=metrify(clip_range), epoch=epoch_ind, silent=True) batch_timesteps = G.batch_timesteps.send(epoch_ind) \ if isinstance(G.batch_timesteps, Schedule) else G.batch_timesteps # Compute updates for each task in the batch # 0. save value of variables # 1. sample # 2. gradient descent # 3. repeat step 1., 2. until all gradient steps are exhausted. batch_data = defaultdict(list) maml.save_weight_cache() load_ops = [] if DEBUG.no_weight_reset else [maml.cache.load] if G.checkpoint_interval and epoch_ind % G.checkpoint_interval == 0 \ and not is_bc_test and epoch_ind >= G.start_checkpoint_after_epoch: cp_path = f"checkpoints/variables_{epoch_ind:04d}.pkl" logger.log_line(f'saving checkpoint {cp_path}') # note: of course I don't know that are all of the trainables at the moment. logger.save_variables(tf.trainable_variables(), path=cp_path) feed_dict = {} for task_ind in range(n_tasks if is_bc_test else G.n_tasks): graph_branch = maml.graphs[0] if G.n_graphs == 1 else maml.graphs[task_ind] if G.n_graphs == 1: gradient_sum_op = maml.gradient_sum.set_op if task_ind == 0 else maml.gradient_sum.add_op print(f"task_ind {task_ind}...") if not DEBUG.no_task_resample: if not is_bc_test: print(f'L250: sampling task') tasks.sample() elif task_ind < n_tasks: task_spec = dict(index=task_ind % n_tasks) print(f'L254: sampling task {task_spec}') tasks.sample(**task_spec) else: raise RuntimeError('should never hit here.') for k in range(G.n_grad_steps + 1): # 0 - 10 <== last one being the maml policy. _is_new = False # for imitation inner loss, we still sample trajectory for evaluation purposes, but # replace it with the demonstration data for learning if k < G.n_grad_steps: if G.inner_alg.startswith("BC"): p = p if G.single_sampling and k > 0 else \ bc.sample_demonstration_data(tasks.task_spec, key=("eval" if is_bc_test else None)) else: p, _is_new = path_gen.send(batch_timesteps), True elif k == G.n_grad_steps: if G.meta_alg.startswith("BC"): # note: use meta bc samples. p = bc.sample_demonstration_data(tasks.task_spec, key="meta") else: p, _is_new = meta_path_gen.send(batch_timesteps), True else: raise Exception('Implementation error. Should never reach this line.') if k in G.eval_grad_steps: _ = path_gen if k < G.n_grad_steps else meta_path_gen p_eval = p if _is_new else _.send(G.eval_timesteps) # reporting on new trajectory samples avg_r = p_eval['ep_info']['reward'] if G.normalize_env else np.mean(p_eval['rewards']) episode_r = avg_r * max_episode_length # default horizon for HalfCheetah if episode_r < G.term_reward_threshold: # todo: make this batch-based instead of on single episode logger.log_line("episode reward is too low: ", episode_r, "terminating training.", flush=True) raise RuntimeError('AVERAGE REWARD TOO LOW. Terminating the experiment.') batch_data[prefix + f"grad_{k}_step_reward"].append(avg_r if Reporting.report_mean else episode_r) if k in G.eval_grad_steps: logger.log_key_value(prefix + f"task_{task_ind}_grad_{k}_reward", episode_r, silent=True) _p = {k: v for k, v in p.items() if k != "ep_info"} if k < G.n_grad_steps: # note: under meta-SGD mode, the runner needs the k^th learning rate. _lr = alpha_lr[k] if G.meta_sgd else alpha_lr # clip_range is not used in BC mode. but still passed in. runner_feed_dict = \ path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=_lr, baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam, horizon=max_episode_length, clip_range=clip_range) # todo: optimize `maml.meta_runner` if k >= G.n_grad_steps. loss, *_, __ = maml.runner.optim.run_optimize(feed_dict=runner_feed_dict) runner_feed_dict.clear() for key, value in zip(maml.runner.model.reports.keys(), [loss, *_]): batch_data[prefix + f"grad_{k}_step_{key}"].append(value) logger.log_key_value(prefix + f"task_{task_ind}_grad_{k}_{key}", value, silent=True) if loss > G.term_loss_threshold: # todo: make this batch-based instead of on single episode logger.log_line(prefix + "episode loss blew up:", loss, "terminating training.", flush=True) raise RuntimeError('loss is TOO HIGH. Terminating the experiment.') # done: has bug when using fixed learning rate. Needs the learning rate as input. feed_dict.update( # do NOT pass in the learning rate because the graph already includes those. path_to_feed_dict(inputs=graph_branch.workers[k].inputs, paths=_p, lr=None if G.meta_sgd else alpha_lr, # but do with fixed alpha horizon=max_episode_length, baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam, clip_range=clip_range)) elif k == G.n_grad_steps: yield_keys = dict( movie=epoch_ind >= G.start_movie_after_epoch and epoch_ind % G.record_movie_interval == 0, eval=is_bc_test ) if np.fromiter(yield_keys.values(), bool).any(): yield yield_keys, epoch_ind, tasks.task_spec if is_bc_test: if load_ops: # we need to reset the weights. Otherwise the world would be on fire. tf.get_default_session().run(load_ops) continue # do NOT meta learn from test samples. # we don't treat the meta_input the same way even though we could. This is more clear to read. # note: feed in the learning rate only later. feed_dict.update( # do NOT need learning rate path_to_feed_dict(inputs=graph_branch.meta.inputs, paths=_p, horizon=max_episode_length, baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam, clip_range=clip_range)) if G.n_graphs == 1: # load from checkpoint before computing the meta gradient\nrun gradient sum operation if load_ops: tf.get_default_session().run(load_ops) # note: meta reporting should be run here. Not supported for simplicity. (need to reduce across # note: tasks, and can not be done outside individual task graphs. if G.meta_sgd is None: # note: copied from train_supervised_maml, not tested feed_dict[maml.alpha] = alpha_lr tf.get_default_session().run(gradient_sum_op, feed_dict) feed_dict.clear() if load_ops: tf.get_default_session().run(load_ops) if is_bc_test: continue # do NOT meta learn from test samples. # note: copied from train_supervised_maml, not tested if G.meta_sgd is None: feed_dict[maml.alpha] = alpha_lr if G.n_graphs == 1: assert G.meta_n_grad_steps == 1, "ERROR: Can only run 1 meta gradient step with a single graph." # note: remove meta reporting b/c meta report should be in each task in this case. tf.get_default_session().run(maml.meta_update_ops[0], {maml.beta: beta_lr}) else: assert feed_dict, "ERROR: It is likely that you jumped here from L:178." feed_dict[maml.beta] = beta_lr for i in range(G.meta_n_grad_steps): update_op = maml.meta_update_ops[0 if G.reuse_meta_optimizer else i] *reports, _ = tf.get_default_session().run(maml.meta_reporting + [update_op], feed_dict) if i not in (0, G.meta_n_grad_steps - 1): continue for key, v in zip(maml.meta_reporting_keys, reports): logger.log_key_value(prefix + f"grad_{G.n_grad_steps + i}_step_{key}", v, silent=True) feed_dict.clear() tf.get_default_session().run(maml.cache.save) # Now compute the meta gradients. # note: runner shares variables with the MAML graph. Reload from state_dict # note: if max_grad_step is the same as n_grad_steps then no need here. dt = logger.split() logger.log_line('Timer Starts...' if dt is None else f'{dt:0.2f} sec/epoch') logger.log(dt_epoch=dt or np.nan, epoch=epoch_ind) for key, arr in batch_data.items(): reduced = np.array(arr).mean() logger.log_key_value(key, reduced) logger.flush()
def train_supervised_maml(*, k_tasks=1, maml: E_MAML): # env used for evaluation purposes only. if G.meta_sgd: assert maml.alpha is not None, "Coding Mistake if meta_sgd is trueful but maml.alpha is None." assert G.n_tasks >= k_tasks, f"Is this intended? You probably want to have " \ f"meta-batch({G.n_tasks}) >= k_tasks({k_tasks})." sess = tf.get_default_session() epoch_ind, pref = -1, "" while epoch_ind < G.n_epochs: # for epoch_ind in range(G.n_epochs + 1): logger.flush() logger.split() is_bc_test = (pref != "test/" and G.eval_interval and epoch_ind % G.eval_interval == 0) pref = "test/" if is_bc_test else "" epoch_ind += 0 if is_bc_test else 1 if G.meta_sgd: alpha_lr = sess.run(maml.alpha) # only used in the runner. logger.log(metrics={f"alpha_{i}/{stem(t.name, 2)}": a for i, a_ in enumerate(alpha_lr) for t, a in zip(maml.runner.trainables, a_)}, silent=True) else: alpha_lr = G.alpha.send(epoch_ind) if isinstance(G.alpha, Schedule) else np.array(G.alpha) logger.log(alpha=metrify(alpha_lr), epoch=epoch_ind, silent=True) beta_lr = G.beta.send(epoch_ind) if isinstance(G.beta, Schedule) else np.array(G.beta) logger.log(beta=metrify(beta_lr), epoch=epoch_ind, silent=True) if G.checkpoint_interval and epoch_ind % G.checkpoint_interval == 0: yield "pre-update-checkpoint", epoch_ind # Compute updates for each task in the batch # 0. save value of variables # 1. sample # 2. gradient descent # 3. repeat step 1., 2. until all gradient steps are exhausted. batch_data = defaultdict(list) maml.save_weight_cache() load_ops = [] if DEBUG.no_weight_reset else [maml.cache.load] feed_dict = {} for task_ind in range(k_tasks if is_bc_test else G.n_tasks): graph_branch = maml.graphs[0] if G.n_graphs == 1 else maml.graphs[task_ind] if G.n_graphs == 1: gradient_sum_op = maml.gradient_sum.set_op if task_ind == 0 else maml.gradient_sum.add_op """ In BC mode, we don't have an environment. The sampling is handled here then fed to the sampler. > task_spec = dict(index=0) Here we make the testing more efficient. """ if not DEBUG.no_task_resample: if not is_bc_test: task_spec = dict(index=np.random.randint(0, k_tasks)) elif task_ind < k_tasks: task_spec = dict(index=task_ind % k_tasks) else: raise RuntimeError('should never hit here.') for k in range(G.n_grad_steps + 1): # 0 - 10 <== last one being the maml policy. # for imitation inner loss, we still sample trajectory for evaluation purposes, but # replace it with the demonstration data for learning if k < G.n_grad_steps: p = p if G.single_sampling and k > 0 else \ bc.sample_demonstration_data(task_spec, key=("eval" if is_bc_test else None)) elif k == G.n_grad_steps: # note: use meta bc samples. p = bc.sample_demonstration_data(task_spec, key="meta") else: raise Exception('Implementation error. Should never reach this line.') _p = {k: v for k, v in p.items() if k != "ep_info"} if k < G.n_grad_steps: # note: under meta-SGD mode, the runner needs the k^th learning rate. _lr = alpha_lr[k] if G.meta_sgd else alpha_lr runner_feed_dict = \ path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=_lr) # todo: optimize `maml.meta_runner` if k >= G.n_grad_steps. loss, *_, __ = maml.runner.optim.run_optimize(feed_dict=runner_feed_dict) runner_feed_dict.clear() for key, value in zip(maml.runner.model.reports.keys(), [loss, *_]): batch_data[pref + f"grad_{k}_step_{key}"].append(value) logger.log_key_value(pref + f"task_{task_ind}_grad_{k}_{key}", value, silent=True) if loss > G.term_loss_threshold: # todo: make this batch-based instead of on single episode err = pref + "episode loss blew up:", loss, "terminating training." logger.log_line(colored(err, "red"), flush=True) raise RuntimeError('loss is TOO HIGH. Terminating the experiment.') # fixit: has bug when using fixed learning rate. Still needs to get learning rate from placeholder feed_dict.update(path_to_feed_dict(inputs=graph_branch.workers[k].inputs, paths=_p)) elif k == G.n_grad_steps: yield_keys = dict( movie=G.record_movie_interval and epoch_ind >= G.start_movie_after_epoch and epoch_ind % G.record_movie_interval == 0, eval=is_bc_test ) if np.fromiter(yield_keys.values(), bool).any(): yield yield_keys, epoch_ind, task_spec if is_bc_test: if load_ops: tf.get_default_session().run(load_ops) continue # do NOT meta learn from test samples. # we don't treat the meta_input the same way even though we could. This is more clear to read. # note: feed in the learning rate only later. feed_dict.update(path_to_feed_dict(inputs=graph_branch.meta.inputs, paths=_p)) if G.n_graphs == 1: # load from checkpoint before computing the meta gradient\nrun gradient sum operation if load_ops: tf.get_default_session().run(load_ops) # note: meta reporting should be run here. Not supported for simplicity. (need to reduce across # note: tasks, and can not be done outside individual task graphs. if G.meta_sgd is None: feed_dict[maml.alpha] = alpha_lr tf.get_default_session().run(gradient_sum_op, feed_dict) feed_dict.clear() if load_ops: tf.get_default_session().run(load_ops) if is_bc_test: continue # do NOT meta learn from test samples. if G.meta_sgd is None: feed_dict[maml.alpha] = alpha_lr if G.n_graphs == 1: assert G.meta_n_grad_steps == 1, "ERROR: Can only run 1 meta gradient step with a single graph." # note: remove meta reporting b/c meta report should be in each task in this case. tf.get_default_session().run(maml.meta_update_ops[0], {maml.beta: beta_lr}) else: assert feed_dict, "ERROR: It is likely that you jumped here from L:178." feed_dict[maml.beta] = beta_lr for i in range(G.meta_n_grad_steps): update_op = maml.meta_update_ops[0 if G.reuse_meta_optimizer else i] *reports, _ = tf.get_default_session().run(maml.meta_reporting + [update_op], feed_dict) if i not in (0, G.meta_n_grad_steps - 1): continue for key, v in zip(maml.meta_reporting_keys, reports): logger.log_key_value(pref + f"grad_{G.n_grad_steps + i}_step_{key}", v, silent=True) feed_dict.clear() tf.get_default_session().run(maml.cache.save) # Now compute the meta gradients. # note: runner shares variables with the MAML graph. Reload from state_dict # note: if max_grad_step is the same as n_grad_steps then no need here. dt = logger.split() logger.log_line('Timer Starts...' if dt is None else f'{dt:0.2f} sec/epoch') logger.log(dt_epoch=dt or np.nan, epoch=epoch_ind) for key, arr in batch_data.items(): reduced = np.array(arr).mean() logger.log_key_value(key, reduced)
def train(self, *, tasks, maml: E_MAML, plot_fn=None, test_tasks=None): max_grad_steps = max(G.n_grad_steps, *G.eval_grad_steps) for epoch_ind in range(G.n_epochs): is_the_end = (epoch_ind == G.n_epochs) should_plot = (epoch_ind % Reporting.plot_interval == 0) if Reporting.plot_interval else False should_save = (epoch_ind % Reporting.save_interval == 0) if Reporting.save_interval else False should_test = (epoch_ind % G.eval_test_interval == 0) if G.eval_test_interval else False frac = 1.0 - (epoch_ind - 1.0) / G.n_epochs alpha_lr = G.alpha * frac beta_lr = G.beta * frac clip_range = G.clip_range * frac # Compute updates for each task in the batch # 0. save value of variables # 1. sample # 2. gradient descent # 3. repeat step 1., 2. until all gradient steps are exhausted. batch_data = defaultdict(list) if DEBUG.debug_params: debug_tensor_key = 'runner_network/MlpPolicy/pi/b:0' runner_state_dict = {} meta_state_dict = {} runner_grads = defaultlist(dict) meta_grads = defaultlist(dict) all_grads = [] if not DEBUG.no_weight_reset: # M.white('<--- save weights') maml.save_checkpoint() feed_dict = {} for task_ind, meta_branch in enumerate(maml.task_graphs): if not DEBUG.no_task_resample or (task_ind == 0 and epoch_ind == 0): # M.white('===> re-sample tasks', end='') env = tasks.sample() if task_ind != 0 and not DEBUG.no_weight_reset: # M.white('---> resetting weights for worker sampling') maml.load_checkpoint() else: # M.white('---> Do NOT reset for first worker') pass worker_paths = defaultlist( None) # get paths for the first update. for k in range( max_grad_steps + 1): # 0 - 10 <== last one being the maml policy. # debug code if DEBUG.debug_params: runner_state_dict[k] = maml.runner.policy.state_dict print("k =", k, debug_tensor_key, ": ", end='') print(runner_state_dict[k][debug_tensor_key]) # collect samples from the environment if G.single_sampling: if k == 0 or k == G.n_grad_steps: # M.print('$!#$@#$ sample from environment') worker_paths[k] = p = self.sample_from_env( env, maml.runner.policy, render=False) else: # M.print('^^^^^^^ copy previous sample') worker_paths[k] = p else: # M.print('$!#$@#$ sample from environment') worker_paths[k] = p = self.sample_from_env( env, maml.runner.policy, render=False) avg_r = np.mean(p['rewards']) episode_r = avg_r * tasks.spec.max_episode_steps # default horizon for HalfCheetah if k in G.eval_grad_steps: batch_data['grad_{}_step_reward'.format(k)].append( avg_r if Reporting.report_mean else episode_r) if episode_r < G.term_reward_threshold: # todo: make this based on batch instead of a single episode. print(episode_r) raise RuntimeError( 'AVERAGE REWARD TOO LOW. Terminating the experiment.' ) _p = {k: v for k, v in p.items() if k != "ep_infos"} # Here we gradient descent on the same data only once. In the future, we could explore case # involving more updates. if k < max_grad_steps: # M.red('....... Optimize Model') runner_feed_dict = \ path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=alpha_lr, clip_range=clip_range) if not DEBUG.debug_params: maml.runner.optim.run_optimize( feed_dict=runner_feed_dict) if DEBUG.debug_params: _grads, *_ = maml.runner.model.run_grads( feed_dict=runner_feed_dict) runner_grads[k] = { t.name: g for t, g in zip(maml.runner.policy.trainables, _grads) } print('runner_grads:', runner_grads[k][debug_tensor_key]) if DEBUG.debug_apply_gradient: maml.runner.optim.run_apply_grads(grads=_grads, lr=alpha_lr) else: maml.runner.optim.run_optimize( feed_dict=runner_feed_dict) if k < G.n_grad_steps: feed_dict.update( path_to_feed_dict( inputs=meta_branch.workers[k].inputs, paths=_p, lr=alpha_lr, clip_range=clip_range)) elif k == G.n_grad_steps: # we don't treat the meta_input the same way even though we could. This is more clear to read. # note: feed in the learning rate only later. feed_dict.update( path_to_feed_dict(inputs=meta_branch.meta.inputs, paths=_p, clip_range=clip_range)) # Now compute the gradients. # note: runner shares variables with the MAML graph. Reload from state_dict # note: should use variable placeholders for these inputs. if not DEBUG.no_weight_reset: from moleskin import moleskin as M M.green('---> resetting weights for meta gradient') maml.load_checkpoint() feed_dict[maml.beta] = beta_lr maml.optim.run_optimize(feed_dict=feed_dict) for key in batch_data.keys(): reduced = np.array(batch_data[key]).mean() logger.log_keyvalue(epoch_ind, key, reduced) if should_test and test_tasks is not None: maml.save_checkpoint() print(test_tasks.spec) test_envs = test_tasks.envs test_envs.reset() p = self.sample_from_env( test_envs, maml.runner.policy, timestep_limit=test_tasks.spec.timestep_limit) logger.log(epoch_ind, pre_update_rewards=np.mean(p['rewards'])) p = self.sample_from_env(test_envs, maml.runner.policy) runner_feed_dict = \ path_to_feed_dict(inputs=maml.runner.inputs, paths=p, lr=alpha_lr, clip_range=clip_range) maml.runner.model.run_optimize(feed_dict=runner_feed_dict) p = self.sample_from_env( test_envs, maml.runner.policy, timestep_limit=test_tasks.spec.timestep_limit) logger.log(epoch_ind, post_update_rewards=np.mean(p['rewards'])) maml.load_checkpoint() if should_plot and callable(plot_fn): plot_fn(save=True if should_save or is_the_end else False, lr=beta_lr)
def _run_single_task(self, i, task): start_time = time.time() try: task_hash = _hash_task_dict( task) # generate SHA256 hash of task dict as identifier # skip task if it has already been completed if task_hash in self.gof_single_res_collection.keys(): logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format( i + 1, "has already been completed:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"])) return None # run task when it has not been completed else: logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format( i + 1, "running:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"])) tf.reset_default_graph() ''' build simulator and estimator model given the specified configurations ''' simulator = globals()[task['simulator_name']]( **task['simulator_config']) t = time.time() estimator = globals()[task['estimator_name']]( task['task_name'], simulator.ndim_x, simulator.ndim_y, **task['estimator_config']) time_to_initialize = time.time() - t # if desired hide gpu devices if not self.use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ''' train the model ''' gof = GoodnessOfFit(estimator=estimator, probabilistic_model=simulator, X=task['X'], Y=task['Y'], n_observations=task['n_obs'], n_mc_samples=task['n_mc_samples'], x_cond=task['x_cond'], task_name=task['task_name'], tail_measures=self.tail_measures) t = time.time() gof.fit_estimator(print_fit_result=True) time_to_fit = time.time() - t if self.dump_models: logger.dump_pkl(data=gof.estimator, path="model_dumps/{}.pkl".format( task['task_name'])) logger.dump_pkl(data=gof.probabilistic_model, path="model_dumps/{}.pkl".format( task['task_name'] + "_simulator")) ''' perform tests with the fitted model ''' t = time.time() gof_results = gof.compute_results() time_to_evaluate = time.time() - t gof_results.task_name = task['task_name'] gof_results.hash = task_hash logger.log_pkl(data=(task_hash, gof_results), path=RESULTS_FILE) logger.flush(file_name=RESULTS_FILE) del gof_results task_duration = time.time() - start_time logger.log( "Finished task {:<1} in {:<1.4f} {:<43} {:<10} {:<1} {:<1} {:<2} | {:<1} {:<1.2f} {:<1} {:<1.2f} {:<1} {:<1.2f}" .format(i + 1, task_duration, "sec:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"], "t_init:", time_to_initialize, "t_fit:", time_to_fit, "t_eval:", time_to_evaluate)) except Exception as e: logger.log("error in task: ", str(i + 1)) logger.log(str(e)) traceback.print_exc()
def test_configuration(log_dir): logger.configure(log_dir, prefix='main_test_script', color='green') logger.log("This is a unittest") logger.log("Some stats", reward=0.05, kl=0.001) logger.flush()
from ml_logger import logger ### First configure the logger to log to a direction (or a server) logger.configure('/tmp/ml-logger-debug') # outputs ~> # logging data to /tmp/ml-logger-debug # We can log individual keys for i in range(1): logger.log(metrics={ 'some_val/smooth': 10, 'status': f"step ({i})" }, reward=20, timestep=i) ### flush the data, otherwise the value would be overwritten with new values in the next iteration. logger.flush() # outputs ~> # ╒════════════════════╤════════════════════════════╕ # │ reward │ 20 │ # ├────────────────────┼────────────────────────────┤ # │ timestep │ 0 │ # ├────────────────────┼────────────────────────────┤ # │ some val/smooth │ 10 │ # ├────────────────────┼────────────────────────────┤ # │ status │ step (0) │ # ├────────────────────┼────────────────────────────┤ # │ timestamp │'2018-11-04T11:37:03.324824'│ # ╘════════════════════╧════════════════════════════╛ for i in range(100):
def train(): from moleskin import moleskin as M M.tic('Full Run') if G.model == "lenet": model = Conv2d() elif G.model == 'mlp': model = Mlp() else: raise NotImplementedError('only lenet and mlp are allowed') model.train() print(model) G.log_prefix = f"mnist_{type(model).__name__}" logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G), Model=dict(architecture=str(model))) from torchvision import datasets, transforms trans = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, ), (1.0, ))]) train_set = datasets.MNIST(root=G.data_dir, train=True, transform=trans, download=True) test_set = datasets.MNIST(root=G.data_dir, train=False, transform=trans, download=True) train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=G.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=G.batch_size, shuffle=False) celoss = nn.CrossEntropyLoss() adam = optim.SGD(model.parameters(), lr=G.learning_rate, momentum=0.9) for epoch in range(G.n_epochs): for it, (x, target) in enumerate(train_loader): adam.zero_grad() ys = model(x) loss = celoss(ys, target) loss.backward() adam.step() if it % G.test_interval == 0: with h.Eval(model), torch.no_grad(): accuracy = h.Average() for x, label in test_loader: acc = h.cast( h.one_hot_to_int(model(x).detach()) == label, float).sum() / len(x) accuracy.add(acc.detach().numpy()) logger.log(float(epoch) + it / len(train_loader), accuracy=accuracy.value) M.split("epoch") # logger.log(epoch, it=it, loss=loss.detach().numpy()) M.toc('Full Run')
def run_benchmark_train_test_fit_cv(dataset, model_dict, seed=27, n_jobs_inner=-1, n_jobc_outer=1, n_train_valid_splits=1, shuffle_splits=True, n_eval_seeds=1, n_folds=5): if logger.log_directory is None: logger.configure(log_directory='/tmp/ml-logger') rds = np.random.RandomState(seed) eval_seeds = list(rds.randint(0, 10**7, size=n_eval_seeds)) logger.log("\n------------------ empirical benchmark with %s ----------------------"%str(dataset)) for model_key in model_dict: model_dict[model_key].update({'ndim_x': dataset.ndim_x, 'ndim_y': dataset.ndim_y}) # run experiments cv_result_dicts = [] datasets = zip(*dataset.get_train_valid_splits(valid_portion=0.2, n_splits=n_train_valid_splits, shuffle=shuffle_splits, random_state=rds)) for i, (X_train, Y_train, X_valid, Y_valid) in enumerate(datasets): logger.log("-------- train-valid split %i --------"%i) manager = Manager() cv_result_dict = manager.dict() def _fit_by_cv_and_eval(estimator_key, conf_dict): estimator, param_grid, param_dict_init = _initialize_model_cv(estimator_key, conf_dict, verbose=True) # 1) perform cross-validation hyperparam search to select params selected_params = estimator.fit_by_cv(X_train, Y_train, param_grid=param_grid, n_folds=n_folds, n_jobs=n_jobs_inner, random_state=rds) logger.log("%s selected params:"%estimator_key) logger.log_params(**selected_params) # 2) evaluate selected params with different initializations param_dict_init.update(selected_params) logger.log("evaluating %s parameters with %i seeds"%(estimator_key, len(eval_seeds))) scores = _evaluate_params(estimator.__class__, param_dict_init, X_train, Y_train, X_valid, Y_valid, seeds=eval_seeds) cv_result_dict[estimator_key] = {'selected_params': selected_params, 'scores': scores, 'eval_seeds': eval_seeds} logger.log("evaluation scores for %s: %s" % (estimator_key, str(scores))) executor = AsyncExecutor(n_jobs=n_jobc_outer) executor.run(_fit_by_cv_and_eval, model_dict.keys(), model_dict.values()) cv_result_dicts.append(dict(cv_result_dict)) pprint(cv_result_dicts) # rearrange results as pandas df final_results_dict = {'scores_mean':[], 'scores_std':[], 'dataset':[]} for estimator_key in model_dict.keys(): scores = [] for result_dict in cv_result_dicts: scores.extend(result_dict[estimator_key]['scores']) final_results_dict['scores_mean'].append(np.mean(scores)) final_results_dict['scores_std'].append(np.std(scores)) final_results_dict['dataset'].append(str(dataset)) df = pd.DataFrame.from_dict(data=final_results_dict, orient='columns') df.index = list(model_dict.keys()) logger.log('\n' + str(df)) return df
def maml_supervised(model, Task, n_epochs, task_batch_n, npts, k_shot, n_gradient_steps, **_): """ :param model: :param Task: :param n_epochs: :param task_batch_n: :param npts: the total number of samples for the sinusoidal task :param k_shot: :param n_gradient_steps: :param _: :return: """ import playground.maml.maml_torch.paper_metrics as metrics device = t.device('cuda' if t.cuda.is_available() else 'cpu') model.to(device) alpha = 0.01 beta = 0.01 ps = list(model.parameters()) # for ep_ind in trange(n_epochs, desc='Epochs', ncols=50, leave=False): for ep_ind in range(n_epochs): M.split('epoch') meta_grads = defaultdict(lambda: 0) theta = copy.deepcopy(model.state_dict()) tasks = [Task(npts=npts) for _ in range(task_batch_n)] for task_ind, task in enumerate(tasks): # sample a new problem # todo: this part is highly-parallelizable if task_ind != 0: model.load_state_dict(theta) task_grads = defaultdict(deque) proper = t.tensor(task.proper()).to(device) samples = t.tensor(task.samples(k_shot)).to(device) for grad_ind in range(n_gradient_steps): # done: ready to be repackaged loss, _ = metrics.comp_loss(*samples, model) model.zero_grad() # back-propagate once, retain graph. loss.backward(t.ones(1).to(device), retain_graph=True) # done: need to use gradient descent, plus creating a meta graph. U, grad_outputs = [], [] for p in model.parameters(): U.append(p - alpha * p.grad) # meta update grad_outputs.append(t.ones(1).to(device).expand_as(p)) # t.autograd.grad returns sum of gradient between all U and all grad_outputs # note: this is the row sum of \partial theta_prime \partial theta, which is a matrix. dU = t.autograd.grad(outputs=U, grad_outputs=grad_outputs, inputs=model.parameters()) # Now update the param.figs for p, updated_p, du in zip(ps, U, dU): p.data = updated_p.data # these are leaf notes, so we can directly manipulate the data attribute. task_grads[p].append(du) # note: evaluate the 1-grad loss if grad_ind == 0: with t.no_grad(): _loss, _ = metrics.comp_loss(*proper, model) logger.log_keyvalue(ep_ind, key=f"1-grad-loss-{task_ind:02d}", value=loss.item(), silent=True) # compute Loss_theta_prime samples = t.tensor(task.samples(k_shot)).to( device) # sample from this problem loss, _ = metrics.comp_loss(*samples, model) model.zero_grad() loss.backward() for i, grad in enumerate( model.gradients()): # Now accumulate the gradient p = ps[i] task_grads[p].append(grad) meta_grads[p] += t.prod(t.cat(list( map(lambda d: d.unsqueeze(dim=-1), task_grads[p])), dim=-1), dim=-1) # theta_prime = copy.deepcopy(model.state_dict()) model.load_state_dict(theta) for p in ps: p.grad = t.tensor( (meta_grads[p] / task_batch_n).detach()).to(device) model.meta_step(lr=beta) with t.no_grad(): _loss, _ = metrics.comp_loss(*proper, model) logger.log(ep_ind, meta_loss=_loss.item())
import os from ml_logger import logger logger.configure(log_directory=os.path.expanduser("~/ml-logger-debug"), prefix='episodeyang/demo-project/first-run') for i in range(100): logger.log(loss=0.9**i, step=i, flush=True) logger.log_text('charts: [{"yKey": "loss", "xKey": "step"}]', ".charts.yml")