def log_diagnostics(itr, algo, agent, sampler): mp: ProMP = agent.model.promp mu, cov = mp.mu_and_cov_w std = cov.diagonal(dim1=-2, dim2=-1).sqrt().detach().numpy() # for i in range(std.shape[0]): # record_tabular('agent/std{}'.format(i), std[i]) record_tabular_misc_stat('AgentCov', std) record_tabular_misc_stat('AgentMu', mu.detach().numpy())
def _log_infos(self, traj_infos=None): """ Writes trajectory info and optimizer info into csv via the logger. Resets stored optimizer info. """ if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): values = [info[k] for info in traj_infos] logger.record_tabular_misc_stat(k, values) wandb.run.summary[k] = np.average(values) self.wandb_info[k + "Average"] = np.average(values) self.wandb_info[k + "Std"] = np.std(values) self.wandb_info[k + "Min"] = np.min(values) self.wandb_info[k + "Max"] = np.max(values) self.wandb_info[k + "Median"] = np.median(values) if k == 'GameScore': game = self.sampler.env_kwargs['game'] random_score = atari_random_scores[game] der_score = atari_der_scores[game] nature_score = atari_nature_scores[game] human_score = atari_human_scores[game] normalized_score = (np.average(values) - random_score ) / (human_score - random_score) der_normalized_score = (np.average(values) - random_score) / (der_score - random_score) nature_normalized_score = ( np.average(values) - random_score) / (nature_score - random_score) self.wandb_info[k + "Normalized"] = normalized_score self.wandb_info[k + "DERNormalized"] = der_normalized_score self.wandb_info[ k + "NatureNormalized"] = nature_normalized_score maybe_update_summary(k + "Best", np.average(values)) maybe_update_summary(k + "NormalizedBest", normalized_score) maybe_update_summary(k + "DERNormalizedBest", der_normalized_score) maybe_update_summary(k + "NatureNormalizedBest", nature_normalized_score) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self.wandb_info[k] = np.average(v) wandb.run.summary[k] = np.average(v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def _log_infos(self, traj_infos=None): if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): logger.record_tabular_misc_stat(k, [info[k] for info in traj_infos]) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def _log_infos(self, player_traj_infos=None, observer_traj_infos=None): """ Writes trajectory info and optimizer info into csv via the logger. Resets stored optimizer info. """ if player_traj_infos is None: player_traj_infos = self._player_traj_infos if player_traj_infos: for k in player_traj_infos[0]: if (not k.startswith("_")) and k != "ObsMap": logger.record_tabular_misc_stat( "Player" + k, [info[k] for info in player_traj_infos]) if observer_traj_infos is None: observer_traj_infos = self._observer_traj_infos if observer_traj_infos: for k in observer_traj_infos[0]: if (not k.startswith("_")) and k != "ObsMap": logger.record_tabular_misc_stat( "Observer" + k, [info[k] for info in observer_traj_infos]) if self._player_opt_infos: for k, v in self._player_opt_infos.items(): logger.record_tabular_misc_stat("Player" + k, v) self._player_opt_infos = {k: list() for k in self._player_opt_infos} # (reset) if self._observer_opt_infos: for k, v in self._observer_opt_infos.items(): logger.record_tabular_misc_stat("Observer" + k, v) self._observer_opt_infos = { k: list() for k in self._observer_opt_infos } # (reset)
def _log_infos(self, traj_infos=None): """ Writes trajectory info and optimizer info into csv via the logger. Resets stored optimizer info. """ if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): logger.record_tabular_misc_stat(k, [info[k] for info in traj_infos]) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def _log_infos(self, traj_infos=None): """Customised version of _log_infos that supports having different keys in each `TrajInfo`.""" if traj_infos is None: traj_infos = self._traj_infos if traj_infos: values = defaultdict(lambda: []) for traj_info in traj_infos: for k, v in traj_info.items(): if not k.startswith("_"): values[k].append(v) for k, vs in values.items(): logger.record_tabular_misc_stat(k, vs) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def _log_infos(self, traj_infos=None): """ 记录和trajectory相关的统计信息,以及和具体算法相关的统计信息。 :param traj_infos: trajectory的一些统计信息。 """ if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): logger.record_tabular_misc_stat( k, [info[k] for info in traj_infos]) if self._opt_infos: # 和算法相关的一些统计指标,例如对DQN来说,就是dqn.py定义的OptInfo里的那些指标 for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) # 计算并记录统计指标 self._opt_infos = {k: list() for k in self._opt_infos} # (reset) 各指标数据清零
def _log_infos(self, traj_infos=None): """ Writes trajectory info and optimizer info into csv via the logger. Resets stored optimizer info. """ if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): values = [info[k] for info in traj_infos] logger.record_tabular_misc_stat(k, values) self.wandb_info[k + "Average"] = np.average(values) self.wandb_info[k + "Median"] = np.median(values) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self.wandb_info[k] = np.average(v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def log_diagnostics(itr, algo, agent, sampler): if itr > 0: shared_sampler_dict['angle_bound_scale'] = np.minimum( options.angle_bound_scale + 0.004 * (itr - 0), 1.) if itr > 500: if not options.fixed_hpos_std: shared_sampler_dict['hpos_std'] = np.minimum( options.hpos_std + 0.0005 * (itr - 500), 0.25) record_tabular('agent/hpos_std', shared_sampler_dict['hpos_std']) record_tabular('agent/angle_bound_scale', shared_sampler_dict['angle_bound_scale']) std = agent.model.log_std.exp().data.cpu().numpy() for i in range(std.shape[0]): record_tabular('agent/std{}'.format(i), std[i]) record_tabular_misc_stat( 'final_obj_position_x', sampler.samples_np.env.observation[sampler.samples_np.env.done, 8])
def log_diagnostics(self, itr, val_info, *args, **kwargs): self.save_itr_snapshot(itr) new_time = time.time() self._cum_time = new_time - self._start_time epochs = itr * self.algo.batch_size / ( self.algo.replay_buffer.size * (1 - self.algo.validation_split)) logger.record_tabular("Iteration", itr) logger.record_tabular("Epochs", epochs) logger.record_tabular("CumTime (s)", self._cum_time) logger.record_tabular("UpdatesPerSecond", itr / self._cum_time) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) for k, v in zip(val_info._fields, val_info): logger.record_tabular_misc_stat("val_" + k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset) logger.dump_tabular(with_prefix=False) if itr < self.n_updates - 1: logger.log( f"Optimizing over {self.log_interval_updates} iterations.") self.pbar = ProgBarCounter(self.log_interval_updates)
def _log_infos(self, traj_infos=None): """ Writes trajectory info and optimizer info into csv via the logger. Resets stored optimizer info. """ if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_"): values = [info[k] for info in traj_infos] logger.record_tabular_misc_stat(k, values) if k == 'Return': self.sampler.sync.glob_average_return.value = np.average( values) # glob_average_return = multiprocessing.Value('d', np.average(values)) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def shaping(samples): # TODO eval/train mode here and in other places if logger._iteration <= 1e3: # FIXME(1e3) return 0 with torch.no_grad(): obs = (samples.agent_inputs.observation.to(device) ) # TODO check if maybe better to keep it on cpu obsprim = (samples.target_inputs.observation.to(device)) qs = weak_agent(obs, samples.agent_inputs.prev_action.to(device), samples.agent_inputs.prev_reward.to(device)) qsprim = weak_agent(obsprim, samples.target_inputs.prev_action.to(device), samples.target_inputs.prev_reward.to(device)) vals = 0.995 * torch.max(qsprim, dim=1).values - torch.max( qs, dim=1).values if logger._iteration % 1e1 == 0: # FIXME(1e1) with logger.tabular_prefix("Shaping"): logger.record_tabular_misc_stat( 'ShapedReward', vals.detach().cpu().numpy()) return vals
def _log_infos(self, traj_infos=None): if traj_infos is None: traj_infos = self._traj_infos if traj_infos: for k in traj_infos[0]: if not k.startswith("_") and k != 'env_infos': logger.record_tabular_misc_stat( k, [info[k] for info in traj_infos]) elif k == 'env_infos': env_info = traj_infos[0][k][0] for field in env_info._fields: if field != 'total_steps': logger.record_tabular_misc_stat( 'info_final_' + field, [ getattr(info[k][-1], field) for info in traj_infos ]) # else: # logger.record_tabular('info_total_steps', sum(getattr(info[k][-1], field) for info in traj_infos)) if self._opt_infos: for k, v in self._opt_infos.items(): logger.record_tabular_misc_stat(k, v) self._opt_infos = {k: list() for k in self._opt_infos} # (reset)
def train(demos, add_preproc, seed, batch_size, total_n_batches, eval_every_n_batches, out_dir, run_name, gpu_idx, cpu_list, eval_n_traj, snapshot_gap, omit_noop, net_width_mul, net_use_bn, net_dropout, net_coord_conv, net_attention, net_task_spec_layers, load_policy, aug_mode, min_bc): # TODO: abstract setup code. Seeds & GPUs should go in one function. Env # setup should go in another function (or maybe the same function). Dataset # loading should be simplified by having a single class that can provide # whatever form of data the current IL method needs, without having to do # unnecessary copies in memory. Maybe also just use Sacred, because YOLO. with contextlib.ExitStack() as exit_stack: # set up seeds & devices set_seeds(seed) mp.set_start_method('spawn') use_gpu = gpu_idx is not None and torch.cuda.is_available() dev = torch.device(["cpu", f"cuda:{gpu_idx}"][use_gpu]) print(f"Using device {dev}, seed {seed}") if cpu_list is None: cpu_list = sample_cpu_list() affinity = dict( cuda_idx=gpu_idx if use_gpu else None, workers_cpus=cpu_list, ) # register original envs import magical magical.register_envs() # TODO: split out part of the dataset for validation. demos_metas_dict = get_demos_meta(demo_paths=demos, omit_noop=omit_noop, transfer_variants=[], preproc_name=add_preproc) dataset_mt = demos_metas_dict['dataset_mt'] loader_mt = make_loader_mt(dataset_mt, batch_size) variant_groups = demos_metas_dict['variant_groups'] env_metas = demos_metas_dict['env_metas'] num_demo_sources = demos_metas_dict['num_demo_sources'] task_ids_and_demo_env_names = demos_metas_dict[ 'task_ids_and_demo_env_names'] sampler_batch_B = batch_size # this doesn't really matter sampler_batch_T = 5 sampler, sampler_batch_B = make_mux_sampler( variant_groups=variant_groups, num_demo_sources=num_demo_sources, env_metas=env_metas, use_gpu=use_gpu, batch_B=sampler_batch_B, batch_T=sampler_batch_T, # TODO: instead of doing this, try sampling in proportion to length # of horizon; that should get more samples from harder envs task_var_weights=None) if load_policy is not None: try: pol_path = get_latest_path(load_policy) except ValueError: pol_path = load_policy policy_ctor = functools.partial( adapt_pol_loader, pol_path=pol_path, task_ids_and_demo_env_names=task_ids_and_demo_env_names) policy_kwargs = {} else: policy_kwargs = { 'env_ids_and_names': task_ids_and_demo_env_names, 'width': net_width_mul, 'use_bn': net_use_bn, 'dropout': net_dropout, 'coord_conv': net_coord_conv, 'attention': net_attention, 'n_task_spec_layers': net_task_spec_layers, **get_policy_spec_magical(env_metas), } policy_ctor = MultiHeadPolicyNet agent = CategoricalPgAgent(ModelCls=MuxTaskModelWrapper, model_kwargs=dict( model_ctor=policy_ctor, model_kwargs=policy_kwargs)) sampler.initialize(agent=agent, seed=np.random.randint(1 << 31), affinity=affinity) exit_stack.callback(lambda: sampler.shutdown()) model_mt = policy_ctor(**policy_kwargs).to(dev) if min_bc: num_tasks = len(task_ids_and_demo_env_names) weight_mod = MinBCWeightingModule(num_tasks, num_demo_sources) \ .to(dev) all_params = it.chain(model_mt.parameters(), weight_mod.parameters()) else: weight_mod = None all_params = model_mt.parameters() # Adam mostly works fine, but in very loose informal tests it seems # like SGD had fewer weird failures where mean loss would jump up by a # factor of 2x for a period (?). (I don't think that was solely due to # high LR; probably an architectural issue.) opt_mt = # torch.optim.Adam(model_mt.parameters(), lr=3e-4) opt_mt = torch.optim.SGD(all_params, lr=1e-3, momentum=0.1) try: aug_opts = MILBenchAugmentations.PRESETS[aug_mode] except KeyError: raise ValueError(f"unsupported mode '{aug_mode}'") if aug_opts: print("Augmentations:", ", ".join(aug_opts)) aug_model = MILBenchAugmentations(**{k: True for k in aug_opts}) \ .to(dev) else: print("No augmentations") aug_model = None n_uniq_envs = len(task_ids_and_demo_env_names) log_params = { 'n_uniq_envs': n_uniq_envs, 'n_demos': len(demos), 'net_use_bn': net_use_bn, 'net_width_mul': net_width_mul, 'net_dropout': net_dropout, 'net_coord_conv': net_coord_conv, 'net_attention': net_attention, 'aug_mode': aug_mode, 'seed': seed, 'omit_noop': omit_noop, 'batch_size': batch_size, 'eval_n_traj': eval_n_traj, 'eval_every_n_batches': eval_every_n_batches, 'total_n_batches': total_n_batches, 'snapshot_gap': snapshot_gap, 'add_preproc': add_preproc, 'net_task_spec_layers': net_task_spec_layers, } with make_logger_ctx(out_dir, "mtbc", f"mt{n_uniq_envs}", run_name, snapshot_gap=snapshot_gap, log_params=log_params): # initial save torch.save( model_mt, os.path.join(logger.get_snapshot_dir(), 'full_model.pt')) # train for a while n_batches_done = 0 n_rounds = int(np.ceil(total_n_batches / eval_every_n_batches)) rnd = 1 assert eval_every_n_batches > 0 while n_batches_done < total_n_batches: batches_left_now = min(total_n_batches - n_batches_done, eval_every_n_batches) print(f"Done {n_batches_done}/{total_n_batches} " f"({n_batches_done/total_n_batches*100:.2f}%, " f"{rnd}/{n_rounds} rounds) batches; doing another " f"{batches_left_now}") model_mt.train() loss_ewma, losses, per_task_losses = do_training_mt( loader=loader_mt, model=model_mt, opt=opt_mt, dev=dev, aug_model=aug_model, min_bc_module=weight_mod, n_batches=batches_left_now) # TODO: record accuracy on a random subset of the train and # validation sets (both in eval mode, not train mode) print(f"Evaluating {eval_n_traj} trajectories on " f"{variant_groups.num_tasks} tasks") record_misc_calls = [] model_mt.eval() copy_model_into_agent_eval(model_mt, sampler.agent) scores_by_tv = eval_model( sampler, # shouldn't be any exploration itr=0, n_traj=eval_n_traj) for (task_id, variant_id), scores in scores_by_tv.items(): tv_id = (task_id, variant_id) env_name = variant_groups.env_name_by_task_variant[tv_id] tag = make_env_tag(strip_mb_preproc_name(env_name)) logger.record_tabular_misc_stat("Score%s" % tag, scores) env_losses = per_task_losses.get(tv_id, []) record_misc_calls.append((f"Loss{tag}", env_losses)) # we record score AFTER loss so that losses are all in one # place, and scores are all in another for args in record_misc_calls: logger.record_tabular_misc_stat(*args) # finish logging for this epoch logger.record_tabular("Round", rnd) logger.record_tabular("LossEWMA", loss_ewma) logger.record_tabular_misc_stat("Loss", losses) logger.dump_tabular() logger.save_itr_params( rnd, { 'model_state': model_mt.state_dict(), 'opt_state': opt_mt.state_dict(), }) # advance ctrs rnd += 1 n_batches_done += batches_left_now