def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] if 'success' in episode: success = True in episode['success'] success_str = "succeeded" if success == 1 else "did not succeed" metrics.append((f'{prefix}/success', success)) print( f'{prefix.title()} episode of length {length} with return {ret:.1f}, which {success_str}.' ) else: print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.' ) sys.stdout.flush() step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] if prefix == 'test': tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def _process_data_to_latent(self, num_episodes=None): if num_episodes is None: num_episodes, _ = tools.count_episodes(self._c.datadir) for _ in range(num_episodes): filename = next(self._episode_itterator) try: with filename.open('rb') as f: episode = np.load(f) episode = {k: episode[k] for k in episode.keys()} except Exception as e: print(f'Could not load episode: {e}') continue obs = preprocess_raw(episode, self._c) if not self._c.pcont: obs['terminal'] = tf.zeros_like(obs['reward']) with tf.GradientTape(watch_accessed_variables=False) as _: embed = self._encode(obs) post, prior = self._dynamics.observe( tf.expand_dims(embed, 0), tf.expand_dims(obs['action'], 0)) feat = flatten(self._dynamics.get_feat(post)) self.latent_buffer.add_samples(feat.numpy()[:-1], obs['action'].numpy()[1:], feat.numpy()[1:], obs['reward'].numpy()[1:], obs['terminal'].numpy()[1:], sample_type='real')
def __init__(self, config, datadir, actspace, writer): self._c = config self._actspace = actspace self._actdim = actspace.n if hasattr(actspace, 'n') else actspace.shape[0] episodes, steps = tools.count_episodes(config.datadir) self.latent_buffer = buffer.LatentReplayBuffer( steps, steps, self._c.deter_size + self._c.stoch_size, self._actdim) self.lmbd = config.lmbd self.alpha = config.alpha self._writer = writer tf.summary.experimental.set_step(0) self._metrics = dict() self._agent_step = 0 self._model_step = 0 self._random = np.random.RandomState(config.seed) self._float = prec.global_policy().compute_dtype self._dataset = iter(load_dataset(config.datadir, self._c)) self._episode_itterator = episode_itterator(config.datadir, self._c) self._build_model() for _ in range(10): self._model_train_step(next(self._dataset), prefix='eval')
def summarize_episode(episode_list, config, datadir, writer, prefix): """ Write episode summary in tensorflow logs. Even if multi-agent returns multiple episodes (1 for each agent), the summary is written w.r.t. the first agent. :param episode_list: list of episodes (in multi-agent setting, each agent produce 1 episode) :param config: config dictionary :param datadir: dir where the episodes are stored :param writer: tf writer for logging :param prefix: either `train` or `test` :return: """ episode = episode_list[0] episodes, steps = tools.count_episodes(datadir) episode_len = len(episode['reward']) - 1 length = episode_len * config.action_repeat ret = episode['reward'].sum() metrics = [ (f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'{prefix}/progress', float(max(episode['progress']))), (f'episodes', episodes)] step = tools.count_steps(datadir, config) with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar(k, v) for k, v in metrics] print(f'\t[Summary] {prefix.title()} episode of length {episode_len} ({length} sim steps) with return {ret:.1f}.')
def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] if prefix == 'test': tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def summarize_episode(config, datadir, writer, prefix): list_of_files = glob.glob(str(datadir) + '/*.npz') latest_file = max(list_of_files, key=os.path.getctime) episode = np.load(latest_file) episode = {k: episode[k] for k in episode.keys()} episodes, steps = tools.count_episodes(datadir) print(episodes, steps) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode["reward"]) - 1) * config.action_repeat ret = episode["reward"].sum() print(f"{prefix.title()} episode of length {length} with return {ret:.1f}.") metrics = [ (f"{prefix}/return", float(episode["reward"].sum())), (f"{prefix}/length", len(episode["reward"]) - 1), (f"episodes", episodes), ] step = count_steps(datadir, config) with (config.logdir / "metrics.jsonl").open("a") as f: f.write(json.dumps(dict([("step", step)] + metrics)) + "\n") with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar("sim/" + k, v) for k, v in metrics] if prefix == "test": tools.video_summary(f"sim/{prefix}/video", episode["image"][None])
def summarize_episode(config, datadir, writer, prefix): list_of_files = glob.glob(str(datadir) + '/*.npz') latest_file = max(list_of_files, key=os.path.getctime) episode = np.load(latest_file) episode = {k: episode[k] for k in episode.keys()} episodes, steps = tools.count_episodes(datadir) print(episodes, steps) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def count_steps(datadir, config): return tools.count_episodes(datadir)[1] * config.action_repeat