def save_itr_snapshot(self, itr, save_cur): """ Calls the logger to save training checkpoint/snapshot (logger itself may or may not save, depending on mode selected). """ # logger.log("saving snapshot...") params = self.get_itr_snapshot(itr) logger.save_itr_params(itr, params, save_cur)
def save_itr_snapshot(self, itr): """ Calls the logger to save training checkpoint/snapshot (logger itself may or may not save, depending on mode selected). """ logger.log("Saving pytorch checkpoint.") params = self.get_itr_snapshot(itr) logger.save_itr_params(itr, params) logger.log("Pytorch checkpoint saved.")
def save_itr_snapshot(self, itr): """ 保存指定的某次迭代的快照数据到日志中。所谓快照数据是指模型参数等,保存到日志文件有利于debug问题。 :param itr: 第几次迭代。 """ logger.log("saving snapshot...") params = self.get_itr_snapshot(itr) # 获取第iter次迭代的快照数据,其中包含了model参数等 logger.save_itr_params(itr, params) # 保存第iter次迭代的快照数据 logger.log("saved")
def save_itr_snapshot(self, itr, sample_itr): logger.log("saving snapshot...") params = self.get_itr_snapshot(itr, sample_itr) logger.save_itr_params(itr, params) logger.log("saved")
def train(demos, add_preproc, seed, batch_size, total_n_batches, eval_every_n_batches, out_dir, run_name, gpu_idx, cpu_list, eval_n_traj, snapshot_gap, omit_noop, net_width_mul, net_use_bn, net_dropout, net_coord_conv, net_attention, net_task_spec_layers, load_policy, aug_mode, min_bc): # TODO: abstract setup code. Seeds & GPUs should go in one function. Env # setup should go in another function (or maybe the same function). Dataset # loading should be simplified by having a single class that can provide # whatever form of data the current IL method needs, without having to do # unnecessary copies in memory. Maybe also just use Sacred, because YOLO. with contextlib.ExitStack() as exit_stack: # set up seeds & devices set_seeds(seed) mp.set_start_method('spawn') use_gpu = gpu_idx is not None and torch.cuda.is_available() dev = torch.device(["cpu", f"cuda:{gpu_idx}"][use_gpu]) print(f"Using device {dev}, seed {seed}") if cpu_list is None: cpu_list = sample_cpu_list() affinity = dict( cuda_idx=gpu_idx if use_gpu else None, workers_cpus=cpu_list, ) # register original envs import magical magical.register_envs() # TODO: split out part of the dataset for validation. demos_metas_dict = get_demos_meta(demo_paths=demos, omit_noop=omit_noop, transfer_variants=[], preproc_name=add_preproc) dataset_mt = demos_metas_dict['dataset_mt'] loader_mt = make_loader_mt(dataset_mt, batch_size) variant_groups = demos_metas_dict['variant_groups'] env_metas = demos_metas_dict['env_metas'] num_demo_sources = demos_metas_dict['num_demo_sources'] task_ids_and_demo_env_names = demos_metas_dict[ 'task_ids_and_demo_env_names'] sampler_batch_B = batch_size # this doesn't really matter sampler_batch_T = 5 sampler, sampler_batch_B = make_mux_sampler( variant_groups=variant_groups, num_demo_sources=num_demo_sources, env_metas=env_metas, use_gpu=use_gpu, batch_B=sampler_batch_B, batch_T=sampler_batch_T, # TODO: instead of doing this, try sampling in proportion to length # of horizon; that should get more samples from harder envs task_var_weights=None) if load_policy is not None: try: pol_path = get_latest_path(load_policy) except ValueError: pol_path = load_policy policy_ctor = functools.partial( adapt_pol_loader, pol_path=pol_path, task_ids_and_demo_env_names=task_ids_and_demo_env_names) policy_kwargs = {} else: policy_kwargs = { 'env_ids_and_names': task_ids_and_demo_env_names, 'width': net_width_mul, 'use_bn': net_use_bn, 'dropout': net_dropout, 'coord_conv': net_coord_conv, 'attention': net_attention, 'n_task_spec_layers': net_task_spec_layers, **get_policy_spec_magical(env_metas), } policy_ctor = MultiHeadPolicyNet agent = CategoricalPgAgent(ModelCls=MuxTaskModelWrapper, model_kwargs=dict( model_ctor=policy_ctor, model_kwargs=policy_kwargs)) sampler.initialize(agent=agent, seed=np.random.randint(1 << 31), affinity=affinity) exit_stack.callback(lambda: sampler.shutdown()) model_mt = policy_ctor(**policy_kwargs).to(dev) if min_bc: num_tasks = len(task_ids_and_demo_env_names) weight_mod = MinBCWeightingModule(num_tasks, num_demo_sources) \ .to(dev) all_params = it.chain(model_mt.parameters(), weight_mod.parameters()) else: weight_mod = None all_params = model_mt.parameters() # Adam mostly works fine, but in very loose informal tests it seems # like SGD had fewer weird failures where mean loss would jump up by a # factor of 2x for a period (?). (I don't think that was solely due to # high LR; probably an architectural issue.) opt_mt = # torch.optim.Adam(model_mt.parameters(), lr=3e-4) opt_mt = torch.optim.SGD(all_params, lr=1e-3, momentum=0.1) try: aug_opts = MILBenchAugmentations.PRESETS[aug_mode] except KeyError: raise ValueError(f"unsupported mode '{aug_mode}'") if aug_opts: print("Augmentations:", ", ".join(aug_opts)) aug_model = MILBenchAugmentations(**{k: True for k in aug_opts}) \ .to(dev) else: print("No augmentations") aug_model = None n_uniq_envs = len(task_ids_and_demo_env_names) log_params = { 'n_uniq_envs': n_uniq_envs, 'n_demos': len(demos), 'net_use_bn': net_use_bn, 'net_width_mul': net_width_mul, 'net_dropout': net_dropout, 'net_coord_conv': net_coord_conv, 'net_attention': net_attention, 'aug_mode': aug_mode, 'seed': seed, 'omit_noop': omit_noop, 'batch_size': batch_size, 'eval_n_traj': eval_n_traj, 'eval_every_n_batches': eval_every_n_batches, 'total_n_batches': total_n_batches, 'snapshot_gap': snapshot_gap, 'add_preproc': add_preproc, 'net_task_spec_layers': net_task_spec_layers, } with make_logger_ctx(out_dir, "mtbc", f"mt{n_uniq_envs}", run_name, snapshot_gap=snapshot_gap, log_params=log_params): # initial save torch.save( model_mt, os.path.join(logger.get_snapshot_dir(), 'full_model.pt')) # train for a while n_batches_done = 0 n_rounds = int(np.ceil(total_n_batches / eval_every_n_batches)) rnd = 1 assert eval_every_n_batches > 0 while n_batches_done < total_n_batches: batches_left_now = min(total_n_batches - n_batches_done, eval_every_n_batches) print(f"Done {n_batches_done}/{total_n_batches} " f"({n_batches_done/total_n_batches*100:.2f}%, " f"{rnd}/{n_rounds} rounds) batches; doing another " f"{batches_left_now}") model_mt.train() loss_ewma, losses, per_task_losses = do_training_mt( loader=loader_mt, model=model_mt, opt=opt_mt, dev=dev, aug_model=aug_model, min_bc_module=weight_mod, n_batches=batches_left_now) # TODO: record accuracy on a random subset of the train and # validation sets (both in eval mode, not train mode) print(f"Evaluating {eval_n_traj} trajectories on " f"{variant_groups.num_tasks} tasks") record_misc_calls = [] model_mt.eval() copy_model_into_agent_eval(model_mt, sampler.agent) scores_by_tv = eval_model( sampler, # shouldn't be any exploration itr=0, n_traj=eval_n_traj) for (task_id, variant_id), scores in scores_by_tv.items(): tv_id = (task_id, variant_id) env_name = variant_groups.env_name_by_task_variant[tv_id] tag = make_env_tag(strip_mb_preproc_name(env_name)) logger.record_tabular_misc_stat("Score%s" % tag, scores) env_losses = per_task_losses.get(tv_id, []) record_misc_calls.append((f"Loss{tag}", env_losses)) # we record score AFTER loss so that losses are all in one # place, and scores are all in another for args in record_misc_calls: logger.record_tabular_misc_stat(*args) # finish logging for this epoch logger.record_tabular("Round", rnd) logger.record_tabular("LossEWMA", loss_ewma) logger.record_tabular_misc_stat("Loss", losses) logger.dump_tabular() logger.save_itr_params( rnd, { 'model_state': model_mt.state_dict(), 'opt_state': opt_mt.state_dict(), }) # advance ctrs rnd += 1 n_batches_done += batches_left_now