コード例 #1
0
ファイル: dreamer.py プロジェクト: yuqingd/sim2real2sim
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    if 'success' in episode:
        success = True in episode['success']
        success_str = "succeeded" if success == 1 else "did not succeed"
        metrics.append((f'{prefix}/success', success))
        print(
            f'{prefix.title()} episode of length {length} with return {ret:.1f}, which {success_str}.'
        )
    else:
        print(
            f'{prefix.title()} episode of length {length} with return {ret:.1f}.'
        )
    sys.stdout.flush()
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        if prefix == 'test':
            tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
コード例 #2
0
ファイル: lompo.py プロジェクト: rmrafailov/LOMPO
    def _process_data_to_latent(self, num_episodes=None):
        if num_episodes is None:
            num_episodes, _ = tools.count_episodes(self._c.datadir)

        for _ in range(num_episodes):
            filename = next(self._episode_itterator)
            try:
                with filename.open('rb') as f:
                    episode = np.load(f)
                    episode = {k: episode[k] for k in episode.keys()}
            except Exception as e:
                print(f'Could not load episode: {e}')
                continue

            obs = preprocess_raw(episode, self._c)
            if not self._c.pcont:
                obs['terminal'] = tf.zeros_like(obs['reward'])
            with tf.GradientTape(watch_accessed_variables=False) as _:
                embed = self._encode(obs)
                post, prior = self._dynamics.observe(
                    tf.expand_dims(embed, 0), tf.expand_dims(obs['action'], 0))
                feat = flatten(self._dynamics.get_feat(post))
                self.latent_buffer.add_samples(feat.numpy()[:-1],
                                               obs['action'].numpy()[1:],
                                               feat.numpy()[1:],
                                               obs['reward'].numpy()[1:],
                                               obs['terminal'].numpy()[1:],
                                               sample_type='real')
コード例 #3
0
ファイル: lompo.py プロジェクト: rmrafailov/LOMPO
    def __init__(self, config, datadir, actspace, writer):
        self._c = config
        self._actspace = actspace
        self._actdim = actspace.n if hasattr(actspace,
                                             'n') else actspace.shape[0]
        episodes, steps = tools.count_episodes(config.datadir)
        self.latent_buffer = buffer.LatentReplayBuffer(
            steps, steps, self._c.deter_size + self._c.stoch_size,
            self._actdim)
        self.lmbd = config.lmbd
        self.alpha = config.alpha

        self._writer = writer
        tf.summary.experimental.set_step(0)
        self._metrics = dict()

        self._agent_step = 0
        self._model_step = 0

        self._random = np.random.RandomState(config.seed)
        self._float = prec.global_policy().compute_dtype
        self._dataset = iter(load_dataset(config.datadir, self._c))
        self._episode_itterator = episode_itterator(config.datadir, self._c)

        self._build_model()
        for _ in range(10):
            self._model_train_step(next(self._dataset), prefix='eval')
コード例 #4
0
def summarize_episode(episode_list, config, datadir, writer, prefix):
    """
    Write episode summary in tensorflow logs. Even if multi-agent returns multiple episodes (1 for each agent),
    the summary is written w.r.t. the first agent.

    :param episode_list:    list of episodes (in multi-agent setting, each agent produce 1 episode)
    :param config:          config dictionary
    :param datadir:         dir where the episodes are stored
    :param writer:          tf writer for logging
    :param prefix:          either `train` or `test`
    :return:
    """
    episode = episode_list[0]
    episodes, steps = tools.count_episodes(datadir)
    episode_len = len(episode['reward']) - 1
    length = episode_len * config.action_repeat
    ret = episode['reward'].sum()
    metrics = [
        (f'{prefix}/return', float(episode['reward'].sum())),
        (f'{prefix}/length', len(episode['reward']) - 1),
        (f'{prefix}/progress', float(max(episode['progress']))),
        (f'episodes', episodes)]
    step = tools.count_steps(datadir, config)
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar(k, v) for k, v in metrics]
    print(f'\t[Summary] {prefix.title()} episode of length {episode_len} ({length} sim steps) with return {ret:.1f}.')
コード例 #5
0
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        if prefix == 'test':
            tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
コード例 #6
0
def summarize_episode(config, datadir, writer, prefix):
    list_of_files = glob.glob(str(datadir) + '/*.npz')
    latest_file = max(list_of_files, key=os.path.getctime)
    episode = np.load(latest_file)
    episode = {k: episode[k] for k in episode.keys()}
    episodes, steps = tools.count_episodes(datadir)
    print(episodes, steps)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
コード例 #7
0
ファイル: dreamer.py プロジェクト: qingfengwuhen/RE3
def summarize_episode(episode, config, datadir, writer, prefix):
    episodes, steps = tools.count_episodes(datadir)
    length = (len(episode["reward"]) - 1) * config.action_repeat
    ret = episode["reward"].sum()
    print(f"{prefix.title()} episode of length {length} with return {ret:.1f}.")
    metrics = [
        (f"{prefix}/return", float(episode["reward"].sum())),
        (f"{prefix}/length", len(episode["reward"]) - 1),
        (f"episodes", episodes),
    ]
    step = count_steps(datadir, config)
    with (config.logdir / "metrics.jsonl").open("a") as f:
        f.write(json.dumps(dict([("step", step)] + metrics)) + "\n")
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar("sim/" + k, v) for k, v in metrics]
        if prefix == "test":
            tools.video_summary(f"sim/{prefix}/video", episode["image"][None])
コード例 #8
0
def summarize_episode(config, datadir, writer, prefix):
    list_of_files = glob.glob(str(datadir) + '/*.npz')
    latest_file = max(list_of_files, key=os.path.getctime)
    episode = np.load(latest_file)
    episode = {k: episode[k] for k in episode.keys()}
    episodes, steps = tools.count_episodes(datadir)
    print(episodes, steps)
    length = (len(episode['reward']) - 1) * config.action_repeat
    ret = episode['reward'].sum()
    print(
        f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
    metrics = [(f'{prefix}/return', float(episode['reward'].sum())),
               (f'{prefix}/length', len(episode['reward']) - 1),
               (f'episodes', episodes)]
    step = count_steps(datadir, config)
    with (config.logdir / 'metrics.jsonl').open('a') as f:
        f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
    with writer.as_default():  # Env might run in a different thread.
        tf.summary.experimental.set_step(step)
        [tf.summary.scalar('sim/' + k, v) for k, v in metrics]
        tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
コード例 #9
0
def count_steps(datadir, config):
    return tools.count_episodes(datadir)[1] * config.action_repeat