def logging(self): """Logging the training information. """ if self.params.log_tabular and self.iter <= self.params.n_itr: if self.step_count % self.params.log_interval == 0: self.iter += 1 logger.log(' ') tabular.record('StepNum', self.step_count) record_num = 0 if self.params.log_dir is not None: if self.step_count == self.params.log_interval: # first time logging best_actions = [] else: with open(self.params.log_dir + '/best_actions.p', 'rb') as f: best_actions = pickle.load(f) best_actions.append( np.array([x.get() for x in self.top_paths.pq[0][0]])) with open(self.params.log_dir + '/best_actions.p', 'wb') as f: pickle.dump(best_actions, f) for (topi, path) in enumerate(self.top_paths): tabular.record('reward ' + str(topi), path[1]) record_num += 1 for topi_left in range(record_num, self.top_paths.N): tabular.record('reward ' + str(topi_left), 0) logger.log(tabular) logger.dump_all(self.step_count) tabular.clear()
def _train(self, n_epochs, n_epoch_cycles, batch_size, plot, store_paths, pause_for_plot, start_epoch=0): """Start actual training. Args: n_epochs(int): Number of epochs. n_epoch_cycles(int): Number of batches of samples in each epoch. This is only useful for off-policy algorithm. For on-policy algorithm this value should always be 1. batch_size(int): Number of steps in batch. plot(bool): Visualize policy by doing rollout after each epoch. store_paths(bool): Save paths in snapshot. pause_for_plot(bool): Pause for plot. start_epoch: (internal) The starting epoch. Use for experiment resuming. Returns: The average return in last epoch cycle. """ assert self.has_setup, ('Use Runner.setup() to setup runner before ' 'training.') # Save arguments for restore self.train_args = SimpleNamespace(n_epochs=n_epochs, n_epoch_cycles=n_epoch_cycles, batch_size=batch_size, plot=plot, store_paths=store_paths, pause_for_plot=pause_for_plot, start_epoch=start_epoch) self.start_worker() self.start_time = time.time() itr = start_epoch * n_epoch_cycles last_return = None for epoch in range(start_epoch, n_epochs): self.itr_start_time = time.time() paths = None with logger.prefix('epoch #%d | ' % epoch): for cycle in range(n_epoch_cycles): paths = self.obtain_samples(itr, batch_size) last_return = self.algo.train_once(itr, paths) itr += 1 self.save(epoch, paths if store_paths else None) self.log_diagnostics(pause_for_plot) logger.dump_all(itr) tabular.clear() self.shutdown_worker() return last_return
def step_epochs(self): """Step through each epoch. This function returns a magic generator. When iterated through, this generator automatically performs services such as snapshotting and log management. It is used inside train() in each algorithm. The generator initializes two variables: `self.step_itr` and `self.step_path`. To use the generator, these two have to be updated manually in each epoch, as the example shows below. Yields: int: The next training epoch. Examples: for epoch in runner.step_epochs(): runner.step_path = runner.obtain_samples(...) self.train_once(...) runner.step_itr += 1 """ self._start_worker() self._start_time = time.time() self.step_itr = self._stats.total_itr self.step_path = None # Used by integration tests to ensure examples can run one epoch. n_epochs = int( os.environ.get('GARAGE_EXAMPLE_TEST_N_EPOCHS', self._train_args.n_epochs)) logger.log('Obtaining samples...') suffix = str(uuid.uuid1()) src = Path(self._snapshotter.snapshot_dir) dstfile = f"{src.name}_{suffix}.tar.xz" for epoch in range(self._train_args.start_epoch, n_epochs): self._itr_start_time = time.time() with logger.prefix('epoch #%d | ' % epoch): yield epoch save_path = (self.step_path if self._train_args.store_paths else None) self._stats.last_path = save_path self._stats.total_epoch = epoch self._stats.total_itr = self.step_itr self.save(epoch) self.log_diagnostics(self._train_args.pause_for_plot) logger.dump_all(self.step_itr) tabular.clear()
def train(self, num_iter, dump=False): start = time.time() for i in range(num_iter): with logger.prefix(' | Iteration {} |'.format(i)): t1 = time.time() self.train_step() t2 = time.time() print('total time of one step', t2 - t1) print('iter ', i, ' done') if dump: logger.log(tabular) logger.dump_all(i) tabular.clear() return
def step_epochs(self): """Step through each epoch. This function returns a magic generator. When iterated through, this generator automatically performs services such as snapshotting and log management. It is used inside train() in each algorithm. The generator initializes two variables: `self.step_itr` and `self.step_path`. To use the generator, these two have to be updated manually in each epoch, as the example shows below. Yields: int: The next training epoch. Examples: for epoch in runner.step_epochs(): runner.step_path = runner.obtain_samples(...) self.train_once(...) runner.step_itr += 1 """ try: self._start_worker() self._start_time = time.time() self.step_itr = (self.train_args.start_epoch * self.train_args.n_epoch_cycles) self.step_path = None for epoch in range(self.train_args.start_epoch, self.train_args.n_epochs): self._itr_start_time = time.time() with logger.prefix('epoch #%d | ' % epoch): yield epoch save_path = (self.step_path if self.train_args.store_paths else None) print("save_path:", save_path) self.save(epoch, save_path) self.log_diagnostics(self.train_args.pause_for_plot) logger.dump_all(self.step_itr) tabular.clear() finally: self._shutdown_worker()
def step_epochs(self): """Generator for training. This function serves as a generator. It is used to separate services such as snapshotting, sampler control from the actual training loop. It is used inside train() in each algorithm. The generator initializes two variables: `self.step_itr` and `self.step_path`. To use the generator, these two have to be updated manually in each epoch, as the example shows below. Yields: int: The next training epoch. Examples: for epoch in runner.step_epochs(): runner.step_path = runner.obtain_samples(...) self.train_once(...) runner.step_itr += 1 """ try: self._start_worker() self._start_time = time.time() self.step_itr = (self.train_args.start_epoch * self.train_args.n_epoch_cycles) self.step_path = None for epoch in range(self.train_args.start_epoch, self.train_args.n_epochs): self._itr_start_time = time.time() with logger.prefix('epoch #%d | ' % epoch): yield epoch save_path = (self.step_path if self.train_args.store_paths else None) self.save(epoch, save_path) self.log_diagnostics(self.train_args.pause_for_plot) logger.dump_all(self.step_itr) tabular.clear() finally: self._shutdown_worker()
This example demonstrates how to log a simple progress metric using dowel. The metric is simultaneously sent to the screen, a CSV files, a text log file and TensorBoard. """ import time import dowel from dowel import logger, tabular logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput('progress.csv')) logger.add_output(dowel.TextOutput('progress.txt')) logger.add_output(dowel.TensorBoardOutput('tensorboard_logdir')) logger.log('Starting up...') for i in range(1000): logger.push_prefix('itr {}: '.format(i)) logger.log('Running training step') time.sleep(0.01) # Tensorboard doesn't like output to be too fast. tabular.record('itr', i) tabular.record('loss', 100.0 / (2 + i)) logger.log(tabular) logger.pop_prefix() logger.dump_all() logger.remove_all()
def main(args): import dowel from dowel import logger, tabular training.utility.set_up_logging() stages = {'500k': 'model.ckpt-2502500'} # stages = {'1000k': 'model.ckpt-5005000'} num_traj = 10 # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500'} # stages = {'1M': 'model.ckpt-5005000'} # stages = {'final': 'model.ckpt-2652650'} # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500', '1M': 'model.ckpt-5005000'} # stages = {'100k': 'model.ckpt-600500', '500k': 'model.ckpt-3002500', # 'final':'model.ckpt-3182650'} # methods = ['weighted_100'] # methods = ['aug7'] methods = ['baseline3'] # rival_method = 'baseline3' rival_method = 'aug7' rival_runs = 5 base_dir = 'benchmark' envs = ['finger_spin'] # envs = ['cartpole_swingup'] # envs = ['finger_spin', 'cartpole_swingup','cheetah_run', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run'] # envs = ['cartpole_swingup', 'cheetah_run', 'walker_walk', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch'] if not check_finish(base_dir, stages, methods, envs, args.num_runs): exit() for pref, chkpt in stages.items(): print(pref, 'begin') logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput('benchmark_{}.csv'.format(pref))) for env in envs: tabular.record('Env', env) for method in methods: for id in range(rival_runs): means, stds, all_scores = [], [], [] with args.params.unlocked: args.params.chkpt = chkpt args.params.tasks = [env] args.params.planner_horizon = 12 args.params.eval_ratio = 1 / num_traj # args.params.r_loss = 'contra' # args.params.aug = 'rad' args.params.planner = 'dual2' args.params.rival = '{}/{}/00{}'.format( env, rival_method, id + 1) experiment = training.Experiment( os.path.join(base_dir, env, method), process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs, planner=args.params.planner, task_str=env) for i, run in enumerate(experiment): scores = [] for i, unused_score in enumerate(run): print('unused', unused_score) scores.append(unused_score) if i == num_traj - 1: break means.append(np.mean(scores)) stds.append(np.std(scores)) all_scores.append(scores) print(means) # if args.params.planner != 'cem': # exit() if args.params.planner == 'cem_eval': np.save( os.path.join( args.logdir, env, method, '00{}/scores_{}_cem.npy'.format(i, pref)), np.array(all_scores)) mean, std = np.mean(means), np.std(means) print('{} {}+/-{}'.format(method, int(mean), int(std))) if mean > 0: tabular.record(method, '{}+/-{}'.format(int(mean), int(std))) np.save( os.path.join(args.logdir, env, method, 'scores_{}.npy'.format(pref)), np.array(all_scores)) logger.log(tabular) logger.dump_all() logger.remove_all()
def main(args): if args.output_folder is not None: if not os.path.exists(args.output_folder): raise ValueError( "The folder with the training files does not exist") policy_filename = os.path.join(args.output_folder, 'policy.th') dynamics_filename = os.path.join(args.output_folder, 'dynamics.th') config_filename = os.path.join(args.output_folder, 'config.json') # eval_filename = os.path.join(args.output_folder, 'eval.npz') text_log_file = os.path.join(args.output_folder, 'test_log.txt') tabular_log_file = os.path.join(args.output_folder, 'test_result.csv') output_test_folder = args.output_folder + "test" if args.output_folder[ -1] == '/' else args.output_folder + "/test" if os.path.exists(output_test_folder): shutil.rmtree(output_test_folder) os.makedirs(output_test_folder) # Set up logger logger.add_output(dowel.StdOutput()) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output( dowel.TensorBoardOutput(output_test_folder, x_axis='Batch')) logger.log('Logging to {}'.format(output_test_folder)) with open(config_filename, 'r') as f: config = json.load(f) seed = config["seed"] if "seed" in config else args.seed if seed is not None: torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) # Metaworld if config['env-name'].startswith('Metaworld'): env_name = config['env-name'].replace("Metaworld-", "") metaworld = __import__('metaworld') class_ = getattr(metaworld, env_name) metaworld_benchmark = class_() for name, env_cls in metaworld_benchmark.train_classes.items(): env = env_cls() env.close() benchmark = metaworld_benchmark # Other gym envs else: env_name = config['env-name'] env = gym.make(config['env-name'], **config.get('env-kwargs', {})) env.close() benchmark = None # Policy policy = get_policy_for_env(env, hidden_sizes=config['hidden-sizes'], nonlinearity=config['nonlinearity']) with open(policy_filename, 'rb') as f: state_dict = torch.load(f, map_location=torch.device(args.device)) policy.load_state_dict(state_dict) policy.share_memory() # Dynamics dynamics = get_dynamics_for_env(env, config['use_vime'], config['use_inv_vime'], args.device, config, benchmark=benchmark) inverse_dynamics = config['use_inv_vime'] use_dynamics = config["use_vime"] or config["use_inv_vime"] if use_dynamics: with open(dynamics_filename, 'rb') as f: state_dict = torch.load(f, map_location=torch.device(args.device)) dynamics.load_state_dict(state_dict) dynamics.share_memory() # Eta if config['adapt_eta']: eta_value = torch.Tensor([config["adapted-eta"]]) else: eta_value = torch.Tensor([config["eta"]]) eta_value = torch.log(eta_value / (1 - eta_value)) eta = EtaParameter(eta_value, adapt_eta=config['adapt_eta']) eta.share_memory() # Baseline baseline = LinearFeatureBaseline(get_input_size(env)) # Sampler normalize_spaces = config[ "normalize-spaces"] if "normalize-spaces" in config else True act_prev_mean = mp.Manager().list() obs_prev_mean = mp.Manager().list() # Sampler if normalize_spaces: obs_prev_mean.append({ "mean": torch.Tensor(config["obs_mean"]), "std": torch.Tensor(config["obs_std"]) }) act_prev_mean.append({ "mean": torch.Tensor(config["act_mean"]), "std": torch.Tensor(config["act_std"]) }) epochs_counter = mp.Value('i', 100) sampler = MultiTaskSampler( config['env-name'], env_kwargs=config.get('env-kwargs', {}), batch_size=config['fast-batch-size'], # TODO policy=policy, baseline=baseline, dynamics=dynamics, inverse_dynamics=inverse_dynamics, env=env, seed=args.seed, num_workers=args.num_workers, epochs_counter=epochs_counter, act_prev_mean=act_prev_mean, obs_prev_mean=obs_prev_mean, # rew_prev_mean=rew_prev_mean, eta=eta, benchmark=benchmark, normalize_spaces=normalize_spaces) logs = {'tasks': []} train_returns, valid_returns = [], [] for batch in trange(args.num_batches): tasks = sampler.sample_test_tasks(num_tasks=config['meta-batch-size']) train_episodes, valid_episodes = sampler.sample( tasks, num_steps=args.num_steps, fast_lr=config['fast-lr'], gamma=config['gamma'], gae_lambda=config['gae-lambda'], device=args.device) logs['tasks'].extend(tasks) train_returns.append(get_returns(train_episodes[0])) valid_returns.append(get_returns(valid_episodes)) logs['train_returns'] = np.concatenate(train_returns, axis=0) logs['valid_returns'] = np.concatenate(valid_returns, axis=0) tabular.record("Batch", batch) log_returns(train_episodes, valid_episodes, batch, log_dynamics=use_dynamics, benchmark=benchmark, env=env, env_name=env_name, is_testing=True) log_trajectories(config['env-name'], output_test_folder, train_episodes, valid_episodes, batch) logger.log(tabular) logger.dump_all() # with open(eval_filename + "_" + str(batch), 'wb') as f: # np.savez(f, **logs) logger.remove_all()
def train(self, n_epochs, batch_size=None, plot=False, store_episodes=False, pause_for_plot=False): """Start training. Args: n_epochs (int): Number of epochs. batch_size (int or None): Number of environment steps in one batch. plot (bool): Visualize an episode from the policy after each epoch. store_episodes (bool): Save episodes in snapshot. pause_for_plot (bool): Pause for plot. Raises: NotSetupError: If train() is called before setup(). Returns: float: The average return in last epoch cycle. """ self.batch_size = batch_size self.store_episodes = store_episodes self.pause_for_plot = pause_for_plot if not self._has_setup: raise NotSetupError( 'Use setup() to setup trainer before training.') self._plot = plot returns = [] for itr in range(self.start_itr, self.n_itr): with logger.prefix(f'itr #{itr} | '): # train policy self._algo.train(self) # compute irl and update reward function logger.log('Obtaining paths...') paths = self.obtain_samples(itr) logger.log('Processing paths...') paths = self._train_irl(paths, itr=itr) samples_data = self.process_samples(itr, paths) logger.log('Logging diagnostics...') logger.log('Time %.2f s' % (time.time() - self._start_time)) logger.log('EpochTime %.2f s' % (time.time() - self._itr_start_time)) tabular.record('TotalEnvSteps', self._stats.total_env_steps) self.log_diagnostics(paths) logger.log('Optimizing policy...') logger.log('Saving snapshot...') self.save(itr, paths=paths) logger.log('Saved') tabular.record('Time', time.time() - self._start_time) tabular.record('ItrTime', time.time() - self._itr_start_time) logger.dump_all(self.step_itr) tabular.clear() self._shutdown_worker() return