Exemple #1
0
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    if 'success' in episode:
        success = True in episode['success']
        success_str = "succeeded" if success == 1 else "did not succeed"
        metrics.append((f'{prefix}/success', success))
        print(
            f'{prefix.title()} episode of length {length} with return {ret:.1f}, which {success_str}.'
        )
    else:
        print(
            f'{prefix.title()} episode of length {length} with return {ret:.1f}.'
        )
    sys.stdout.flush()
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        if prefix == 'test':
            tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
Exemple #2
0
def summarize_episode(episode, config, datadir, writer, prefix):
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1)]
    with writer.as_default():  # Env might run in a different thread.
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        if prefix == 'test':
            tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
    def post_process_episodes(self, cache, episode_name, episode):
        total = 0
        length = len(episode["rewards"]) - 1
        score = float(episode["rewards"].astype(np.float64).sum())
        video = episode["obp1s"]

        for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])):

            if total <= self._c.max_dataset_steps - length:
                total += len(ep["rewards"]) - 1
            else:
                del cache[key]

        cache[str(episode_name)] = episode

        step = count_steps(self.datadir)

        with self._writer.as_default():
            tf.summary.scalar(
                "dataset_size",
                total + length,
                step=step * self._c.action_repeat,
            )  # control by model.total_step, record the env total step

            tf.summary.scalar(
                "train_episodes",
                len(cache),
                step=step * self._c.action_repeat,
            )  # control by model.total_step, record the env total step

            tf.summary.scalar(
                "train_return",
                score,
                step=step * self._c.action_repeat,
            )  # control by model.total_step, record the env total step

            tf.summary.scalar(
                "train_length",
                length,
                step=step * self._c.action_repeat,
            )  # control by model.total_step, record the env total step

            print("save train_policy!!!!")
            tools.video_summary("train_policy", np.array(video[None]),
                                step * self._c.action_repeat)

        print("the episodes size now is:", total + length, "steps")
Exemple #4
0
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        if prefix == 'test':
            tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
Exemple #5
0
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode["reward"]) - 1) * config.action_repeat
    ret = episode["reward"].sum()
    print(f"{prefix.title()} episode of length {length} with return {ret:.1f}.")
    metrics = [
        (f"{prefix}/return", float(episode["reward"].sum())),
        (f"{prefix}/length", len(episode["reward"]) - 1),
        (f"episodes", episodes),
    ]
    step = count_steps(datadir, config)
    with (config.logdir / "metrics.jsonl").open("a") as f:
        f.write(json.dumps(dict([("step", step)] + metrics)) + "\n")
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar("sim/" + k, v) for k, v in metrics]
        if prefix == "test":
            tools.video_summary(f"sim/{prefix}/video", episode["image"][None])
Exemple #6
0
 def _image_summaries(self,
                      data,
                      embed,
                      image_pred,
                      step=None,
                      prefix='train'):
     truth = data['image'][:6] + 0.5
     recon = image_pred.mode()[:6]
     init, _ = self._dynamics.observe(embed[:6, :5], data['action'][:6, :5])
     init = {k: v[:, -1] for k, v in init.items()}
     prior = self._dynamics.imagine(data['action'][:6, 5:], init)
     openl = self._decode(self._dynamics.get_feat(prior)).mode()
     model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1)
     error_prior = (model - truth + 1) / 2
     error_posterior = (recon + 0.5 - truth + 1) / 2
     openl = tf.concat(
         [truth, recon + 0.5, model, error_prior, error_posterior], 2)
     with self._writer.as_default():
         tools.video_summary('agent/' + prefix, openl.numpy(), step=step)
def summarize_episode(config, datadir, writer, prefix):
    list_of_files = glob.glob(str(datadir) + '/*.npz')
    latest_file = max(list_of_files, key=os.path.getctime)
    episode = np.load(latest_file)
    episode = {k: episode[k] for k in episode.keys()}
    episodes, steps = tools.count_episodes(datadir)
    print(episodes, steps)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        tools.video_summary(f'sim/{prefix}/video', episode['image'][None])