def get_train_log(self, optim_infos, traj=None): log_info = dict() vector_keys = set() scalar_keys = set() for oinf in optim_infos: for key in oinf.keys(): if 'vec_' in key: vector_keys.add(key) else: scalar_keys.add(key) for key in scalar_keys: log_info[key] = np.mean( [inf[key] for inf in optim_infos if key in inf]) for key in vector_keys: k_stats = get_list_stats( [inf[key] for inf in optim_infos if key in inf]) for sk, sv in k_stats.items(): log_info[f'{key}/' + sk] = sv if traj is not None: actions_stats = get_list_stats(traj.actions) for sk, sv in actions_stats.items(): log_info['rollout_action/' + sk] = sv log_info['rollout_steps_per_iter'] = traj.total_steps ep_returns_stats = get_list_stats(self.runner.train_ep_return) for sk, sv in ep_returns_stats.items(): log_info['episode_return/' + sk] = sv train_log_info = dict() for key, val in log_info.items(): train_log_info['train/' + key] = val return train_log_info
def get_train_log(self, optim_infos, traj): log_info = dict() for key in optim_infos[0].keys(): log_info[key] = np.mean( [inf[key] for inf in optim_infos if key in inf]) t1 = time.perf_counter() actions_stats = get_list_stats(traj.actions) for sk, sv in actions_stats.items(): log_info['rollout_action/' + sk] = sv log_info['optim_time'] = t1 - self.optim_stime log_info['rollout_steps_per_iter'] = traj.total_steps # log infos dones = traj.dones for key in traj.infos[0][0].keys(): #print("key", key) if "final" in key: all_finals = [] finals = np.array([[ step_data.info[i][key] for i in range(len(step_data.info)) ] for step_data in traj.traj_data]) epfinals = [] for i in range(dones.shape[1]): di = dones[:, i] if not np.any(di): epfinals.append(finals[-1, i]) else: done_idx = np.where(di)[0] t = 0 for idx in done_idx: epfinals.append(finals[idx, i]) t = idx + 1 info_list = epfinals else: info_list = [ tuple([info.get(key, 0) for info in infos]) for infos in traj.infos ] #print("info list[0]", info_list[0]) try: info_stats = get_list_stats(info_list) for sk, sv in info_stats.items(): log_info['rollout_{}/'.format(key) + sk] = sv except Exception: continue ep_returns = list(chain(*traj.episode_returns)) for epr in ep_returns: self.train_ep_return.append(epr) ep_returns_stats = get_list_stats(self.train_ep_return) for sk, sv in ep_returns_stats.items(): log_info['episode_return/' + sk] = sv train_log_info = dict() for key, val in log_info.items(): train_log_info['train/' + key] = val # histogram_log = {'histogram': {'rollout_action': traj.actions}} # self.tf_logger.save_dict(histogram_log, step=self.cur_step) return train_log_info
def get_train_log(self, optim_infos): log_info = dict() vector_keys = set() scalar_keys = set() for oinf in optim_infos: for key in oinf.keys(): if 'vec_' in key: vector_keys.add(key) else: scalar_keys.add(key) for key in scalar_keys: log_info[key] = np.mean( [inf[key] for inf in optim_infos if key in inf]) for key in vector_keys: k_stats = get_list_stats( [inf[key] for inf in optim_infos if key in inf]) for sk, sv in k_stats.items(): log_info[f'{key}/' + sk] = sv t1 = time.perf_counter() log_info['optim_time'] = t1 - self.optim_stime train_log_info = dict() for key, val in log_info.items(): train_log_info['train/' + key] = val return train_log_info
def eval(self, render=False, save_eval_traj=False, sample=True, eval_num=1, sleep_time=0, smooth=True, no_tqdm=None): time_steps = [] rets = [] lst_step_infos = [] if no_tqdm: disable_tqdm = bool(no_tqdm) else: disable_tqdm = not cfg.alg.test for idx in tqdm(range(eval_num), disable=disable_tqdm): traj, _ = self.rollout_once(time_steps=cfg.alg.episode_steps, return_on_done=True, sample=cfg.alg.sample_action and sample, render=render, sleep_time=sleep_time, render_image=save_eval_traj, evaluation=True) tsps = traj.steps_til_done.copy().tolist() rewards = traj.raw_rewards infos = traj.infos for ej in range(traj.num_envs): ret = np.sum(rewards[:tsps[ej], ej]) rets.append(ret) lst_step_infos.append(infos[tsps[ej] - 1][ej]) time_steps.extend(tsps) if save_eval_traj: save_traj(traj, cfg.alg.eval_dir) raw_traj_info = {'return': rets, 'episode_length': time_steps, 'lst_step_info': lst_step_infos} log_info = dict() for key, val in raw_traj_info.items(): if 'info' in key: continue val_stats = get_list_stats(val) for sk, sv in val_stats.items(): log_info['eval/' + key + '/' + sk] = sv if smooth: if self.smooth_eval_return is None: self.smooth_eval_return = log_info['eval/return/mean'] else: self.smooth_eval_return = self.smooth_eval_return * self.smooth_tau self.smooth_eval_return += (1 - self.smooth_tau) * log_info['eval/return/mean'] log_info['eval/smooth_return/mean'] = self.smooth_eval_return if self.smooth_eval_return > self._best_eval_ret: self._eval_is_best = True self._best_eval_ret = self.smooth_eval_return else: self._eval_is_best = False return log_info, raw_traj_info