Exemple #1
0
    def get_train_log(self, optim_infos, traj=None):
        log_info = dict()
        vector_keys = set()
        scalar_keys = set()
        for oinf in optim_infos:
            for key in oinf.keys():
                if 'vec_' in key:
                    vector_keys.add(key)
                else:
                    scalar_keys.add(key)

        for key in scalar_keys:
            log_info[key] = np.mean(
                [inf[key] for inf in optim_infos if key in inf])

        for key in vector_keys:
            k_stats = get_list_stats(
                [inf[key] for inf in optim_infos if key in inf])
            for sk, sv in k_stats.items():
                log_info[f'{key}/' + sk] = sv

        if traj is not None:
            actions_stats = get_list_stats(traj.actions)
            for sk, sv in actions_stats.items():
                log_info['rollout_action/' + sk] = sv
            log_info['rollout_steps_per_iter'] = traj.total_steps

            ep_returns_stats = get_list_stats(self.runner.train_ep_return)
            for sk, sv in ep_returns_stats.items():
                log_info['episode_return/' + sk] = sv

        train_log_info = dict()
        for key, val in log_info.items():
            train_log_info['train/' + key] = val
        return train_log_info
Exemple #2
0
    def get_train_log(self, optim_infos, traj):
        log_info = dict()
        for key in optim_infos[0].keys():
            log_info[key] = np.mean(
                [inf[key] for inf in optim_infos if key in inf])
        t1 = time.perf_counter()
        actions_stats = get_list_stats(traj.actions)
        for sk, sv in actions_stats.items():
            log_info['rollout_action/' + sk] = sv
        log_info['optim_time'] = t1 - self.optim_stime
        log_info['rollout_steps_per_iter'] = traj.total_steps

        # log infos
        dones = traj.dones
        for key in traj.infos[0][0].keys():
            #print("key", key)
            if "final" in key:
                all_finals = []
                finals = np.array([[
                    step_data.info[i][key] for i in range(len(step_data.info))
                ] for step_data in traj.traj_data])
                epfinals = []
                for i in range(dones.shape[1]):
                    di = dones[:, i]
                    if not np.any(di):
                        epfinals.append(finals[-1, i])
                    else:
                        done_idx = np.where(di)[0]
                        t = 0
                        for idx in done_idx:
                            epfinals.append(finals[idx, i])
                            t = idx + 1
                info_list = epfinals
            else:
                info_list = [
                    tuple([info.get(key, 0) for info in infos])
                    for infos in traj.infos
                ]
            #print("info list[0]", info_list[0])
            try:
                info_stats = get_list_stats(info_list)
                for sk, sv in info_stats.items():
                    log_info['rollout_{}/'.format(key) + sk] = sv
            except Exception:
                continue

        ep_returns = list(chain(*traj.episode_returns))
        for epr in ep_returns:
            self.train_ep_return.append(epr)
        ep_returns_stats = get_list_stats(self.train_ep_return)
        for sk, sv in ep_returns_stats.items():
            log_info['episode_return/' + sk] = sv

        train_log_info = dict()
        for key, val in log_info.items():
            train_log_info['train/' + key] = val
        # histogram_log = {'histogram': {'rollout_action': traj.actions}}
        # self.tf_logger.save_dict(histogram_log, step=self.cur_step)
        return train_log_info
Exemple #3
0
    def get_train_log(self, optim_infos):
        log_info = dict()
        vector_keys = set()
        scalar_keys = set()
        for oinf in optim_infos:
            for key in oinf.keys():
                if 'vec_' in key:
                    vector_keys.add(key)
                else:
                    scalar_keys.add(key)

        for key in scalar_keys:
            log_info[key] = np.mean(
                [inf[key] for inf in optim_infos if key in inf])

        for key in vector_keys:
            k_stats = get_list_stats(
                [inf[key] for inf in optim_infos if key in inf])
            for sk, sv in k_stats.items():
                log_info[f'{key}/' + sk] = sv

        t1 = time.perf_counter()
        log_info['optim_time'] = t1 - self.optim_stime
        train_log_info = dict()
        for key, val in log_info.items():
            train_log_info['train/' + key] = val
        return train_log_info
Exemple #4
0
    def eval(self, render=False, save_eval_traj=False, sample=True,
             eval_num=1, sleep_time=0, smooth=True, no_tqdm=None):
        time_steps = []
        rets = []
        lst_step_infos = []
        if no_tqdm:
            disable_tqdm = bool(no_tqdm)
        else:
            disable_tqdm = not cfg.alg.test
        for idx in tqdm(range(eval_num), disable=disable_tqdm):
            traj, _ = self.rollout_once(time_steps=cfg.alg.episode_steps,
                                        return_on_done=True,
                                        sample=cfg.alg.sample_action and sample,
                                        render=render,
                                        sleep_time=sleep_time,
                                        render_image=save_eval_traj,
                                        evaluation=True)
            tsps = traj.steps_til_done.copy().tolist()
            rewards = traj.raw_rewards
            infos = traj.infos
            for ej in range(traj.num_envs):
                ret = np.sum(rewards[:tsps[ej], ej])
                rets.append(ret)
                lst_step_infos.append(infos[tsps[ej] - 1][ej])
            time_steps.extend(tsps)
            if save_eval_traj:
                save_traj(traj, cfg.alg.eval_dir)

        raw_traj_info = {'return': rets,
                         'episode_length': time_steps,
                         'lst_step_info': lst_step_infos}
        log_info = dict()
        for key, val in raw_traj_info.items():
            if 'info' in key:
                continue
            val_stats = get_list_stats(val)
            for sk, sv in val_stats.items():
                log_info['eval/' + key + '/' + sk] = sv
        if smooth:
            if self.smooth_eval_return is None:
                self.smooth_eval_return = log_info['eval/return/mean']
            else:
                self.smooth_eval_return = self.smooth_eval_return * self.smooth_tau
                self.smooth_eval_return += (1 - self.smooth_tau) * log_info['eval/return/mean']
            log_info['eval/smooth_return/mean'] = self.smooth_eval_return
            if self.smooth_eval_return > self._best_eval_ret:
                self._eval_is_best = True
                self._best_eval_ret = self.smooth_eval_return
            else:
                self._eval_is_best = False
        return log_info, raw_traj_info