def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] if 'success' in episode: success = True in episode['success'] success_str = "succeeded" if success == 1 else "did not succeed" metrics.append((f'{prefix}/success', success)) print( f'{prefix.title()} episode of length {length} with return {ret:.1f}, which {success_str}.' ) else: print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.' ) sys.stdout.flush() step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] if prefix == 'test': tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def summarize_episode(episode, config, datadir, writer, prefix): length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1)] with writer.as_default(): # Env might run in a different thread. [tf.summary.scalar('sim/' + k, v) for k, v in metrics] if prefix == 'test': tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def post_process_episodes(self, cache, episode_name, episode): total = 0 length = len(episode["rewards"]) - 1 score = float(episode["rewards"].astype(np.float64).sum()) video = episode["obp1s"] for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])): if total <= self._c.max_dataset_steps - length: total += len(ep["rewards"]) - 1 else: del cache[key] cache[str(episode_name)] = episode step = count_steps(self.datadir) with self._writer.as_default(): tf.summary.scalar( "dataset_size", total + length, step=step * self._c.action_repeat, ) # control by model.total_step, record the env total step tf.summary.scalar( "train_episodes", len(cache), step=step * self._c.action_repeat, ) # control by model.total_step, record the env total step tf.summary.scalar( "train_return", score, step=step * self._c.action_repeat, ) # control by model.total_step, record the env total step tf.summary.scalar( "train_length", length, step=step * self._c.action_repeat, ) # control by model.total_step, record the env total step print("save train_policy!!!!") tools.video_summary("train_policy", np.array(video[None]), step * self._c.action_repeat) print("the episodes size now is:", total + length, "steps")
def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] if prefix == 'test': tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
def summarize_episode(episode, config, datadir, writer, prefix): episodes, steps = tools.count_episodes(datadir) length = (len(episode["reward"]) - 1) * config.action_repeat ret = episode["reward"].sum() print(f"{prefix.title()} episode of length {length} with return {ret:.1f}.") metrics = [ (f"{prefix}/return", float(episode["reward"].sum())), (f"{prefix}/length", len(episode["reward"]) - 1), (f"episodes", episodes), ] step = count_steps(datadir, config) with (config.logdir / "metrics.jsonl").open("a") as f: f.write(json.dumps(dict([("step", step)] + metrics)) + "\n") with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar("sim/" + k, v) for k, v in metrics] if prefix == "test": tools.video_summary(f"sim/{prefix}/video", episode["image"][None])
def _image_summaries(self, data, embed, image_pred, step=None, prefix='train'): truth = data['image'][:6] + 0.5 recon = image_pred.mode()[:6] init, _ = self._dynamics.observe(embed[:6, :5], data['action'][:6, :5]) init = {k: v[:, -1] for k, v in init.items()} prior = self._dynamics.imagine(data['action'][:6, 5:], init) openl = self._decode(self._dynamics.get_feat(prior)).mode() model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1) error_prior = (model - truth + 1) / 2 error_posterior = (recon + 0.5 - truth + 1) / 2 openl = tf.concat( [truth, recon + 0.5, model, error_prior, error_posterior], 2) with self._writer.as_default(): tools.video_summary('agent/' + prefix, openl.numpy(), step=step)
def summarize_episode(config, datadir, writer, prefix): list_of_files = glob.glob(str(datadir) + '/*.npz') latest_file = max(list_of_files, key=os.path.getctime) episode = np.load(latest_file) episode = {k: episode[k] for k in episode.keys()} episodes, steps = tools.count_episodes(datadir) print(episodes, steps) length = (len(episode['reward']) - 1) * config.action_repeat ret = episode['reward'].sum() print( f'{prefix.title()} episode of length {length} with return {ret:.1f}.') metrics = [(f'{prefix}/return', float(episode['reward'].sum())), (f'{prefix}/length', len(episode['reward']) - 1), (f'episodes', episodes)] step = count_steps(datadir, config) with (config.logdir / 'metrics.jsonl').open('a') as f: f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') with writer.as_default(): # Env might run in a different thread. tf.summary.experimental.set_step(step) [tf.summary.scalar('sim/' + k, v) for k, v in metrics] tools.video_summary(f'sim/{prefix}/video', episode['image'][None])