Ejemplo n.º 1
0
    def validation_epoch_end(self, outputs):
        mean_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        mean_psnr = torch.stack([x['val_psnr'] for x in outputs]).mean()

        log = {'val/loss': mean_loss, 'val/psnr': mean_psnr}
        self.log("val/loss", mean_loss)
        wandb.log(log)

        self.hparams.scene_name = self.hparams.exp_name
        self.hparams.N_importance = 64

        ckpt_dir = os.path.join(self.hparams.log_dir, self.hparams.exp_name,
                                "ckpts")
        ckpts = [f for f in os.listdir(ckpt_dir) if "epoch" in f]
        if len(ckpts) != 0:
            ckpts.sort()

            self.hparams.eval_ckpt_path = os.path.join(ckpt_dir, ckpts[-1])
            img_gif, depth_gif = eval(self.hparams)

            wandb.log({
                "val/depth_gif":
                wandb.Video(depth_gif, fps=30, format="gif")
            })
            # else:
            wandb.log(
                {"val/out_gif": wandb.Video(img_gif, fps=30, format="gif")})

        return {
            'progress_bar': {
                'val_loss': mean_loss,
                'val_psnr': mean_psnr
            },
        }
Ejemplo n.º 2
0
def vis_mse(dataset_or_dataloader, model, epoch, cfg):
    if epoch % cfg['output']['vis_every_epoch']:
        return

    # Settings
    device = cfg['training']['device']
    write_probs = cfg['output']['write_probs']
    probs_dir = cfg['paths']['predictions']
    bs, n_batches = cfg['training']['batch_size_test'], cfg['output'][
        'batches_vis']
    normalize = cfg['transforms']['test']['normalize']
    mean, std = cfg['data']['3d']['mean'][0], cfg['data']['3d']['std'][0]
    sigmoid = cfg['training']['sigmoid']
    kldiv = cfg['training'][f'test_criterion'] == 'kldivloss'

    wandb_dict = {'epoch': epoch}
    wandb_videos_true, wandb_videos_pred = [], []

    # Get data & predict
    if type(dataset_or_dataloader) == DataLoader:
        dataloader = dataset_or_dataloader
    else:
        dataloader = DataLoader(dataset_or_dataloader,
                                bs,
                                shuffle=False,
                                num_workers=1,
                                pin_memory=True)
    iter_dl = iter(dataloader)
    for i_batch in range(n_batches):
        batch_x, batch_y_true = next(iter_dl)
        with torch.no_grad():
            batch_y_pred = model(batch_x.to(device))
            if kldiv:
                batch_y_pred = torch.softmax(
                    batch_y_pred,
                    dim=1)[:, 1:]  # Get probabilities and ignore BG channel
                batch_y_true = batch_y_true[:, 1:]
            if sigmoid:
                batch_y_pred = torch.sigmoid(batch_y_pred)

        if write_probs:
            np.savez_compressed(os.path.join(probs_dir, f"{epoch}.npz"),
                                true=batch_y_true.cpu().numpy(),
                                pred=batch_y_pred.cpu().numpy())

        for i, (video, true_video, pred_video) in enumerate(
                zip(batch_x, batch_y_true, batch_y_pred)):
            if normalize:
                video = video * std + mean
            vid_true, vid_pred = create_video(video, pred_video, true_video)
            wandb_videos_true.append(
                wandb.Video(vid_true, fps=20, format="mp4"))
            wandb_videos_pred.append(
                wandb.Video(vid_pred, fps=20, format="mp4"))

    wandb_dict["videos_true"] = wandb_videos_true
    wandb_dict["videos_pred"] = wandb_videos_pred

    wandb.log(wandb_dict)
    print(f"logged: {wandb_dict}")
Ejemplo n.º 3
0
def log_video_hrl(env_name, actor_low, actor_high, params):
    actor_low = copy.deepcopy(actor_low).cpu()
    actor_high = copy.deepcopy(actor_high).cpu()
    actor_high.max_goal = actor_high.max_goal.to('cpu')
    policy_params = params.policy_params
    goal_dim = params.goal_dim
    if env_name in envnames_mujoco:
        env = gym.make(env_name)
    elif env_name in envnames_ant:
        env = create_maze_env(env_name=env_name)
    print('\n    > Collecting current trajectory...')
    done = False
    step = 1
    state = torch.Tensor(env.reset())
    goal = torch.Tensor(torch.randn(goal_dim))
    episode_reward, frame_buffer = 0, []
    while not done and step < 600:
        frame_buffer.append(env.render(mode='rgb_array'))
        action = actor_low(torch.Tensor(state), torch.Tensor(goal)).detach()
        next_state, reward, done, info = env.step(action)
        if (step + 1) % policy_params.c == 0 and step > 0:
            goal = actor_high(state)
        else:
            goal = (torch.Tensor(state)[:goal_dim] + goal -
                    torch.Tensor(next_state)[:goal_dim]).float()
        state = next_state
        episode_reward += reward
        step += 1
    print(
        f'    > Finished collection, saved video. Episode reward: {float(episode_reward):.3f}\n'
    )
    frame_buffer = np.array(frame_buffer).transpose(0, 3, 1, 2)
    wandb.log({"video": wandb.Video(frame_buffer, fps=30, format="mp4")})
    env.close()
Ejemplo n.º 4
0
    def log_blender_weights(self, blender_weights, image=None):
        """Visualize blender weights as image or video, depending on input shape

        Args:
            blender_weights (Tensor): [T_future x ] (T+1) x H_b x W_b
                here T_future is an optional dimension. When given, the result will visualized
                as a gif video containing T_future frames.
                T is the number of frames used for warping (same below).
            image (Optional[Tensor]): T x 3 x H x W
                when image dimensions (H, W) are not equal to blender dimensions, blender weights
                will be bilinearly interpolated to the image dimension.
        """

        if len(blender_weights.shape) == 4:
            frames = [
                self._blender_weight_on_image(b_weight,
                                              image,
                                              wandb_image=False)
                for b_weight in blender_weights
            ]
            frames = np.stack(frames, axis=0)
            frames = frames.transpose(0, 3, 1, 2)  # wandb needs T x 3 x H x W
            video = wandb.Video(frames, fps=self.video_fps, format='gif')
            self.log({'blender_weights_seq': video})
        else:
            image = self._blender_weight_on_image(blender_weights,
                                                  image,
                                                  wandb_image=True)
            self.log({
                'blender_weights':
                [wandb.Image(image, caption='weights_heatmap')]
            })
Ejemplo n.º 5
0
    def encode_images(img_strs, value):
        try:
            from PIL import Image
        except ImportError:
            wandb.termwarn(
                'Install pillow if you are logging images with Tensorboard. To install, run "pip install pillow".',
                repeat=False,
            )
            return

        if len(img_strs) == 0:
            return

        images = []
        for img_str in img_strs:
            # Supports gifs from TboardX
            if img_str.startswith(b"GIF"):
                images.append(wandb.Video(six.BytesIO(img_str), format="gif"))
            else:
                images.append(wandb.Image(Image.open(six.BytesIO(img_str))))
        tag_idx = value.tag.rsplit("/", 1)
        if len(tag_idx) > 1 and tag_idx[1].isdigit():
            tag, idx = tag_idx
            values.setdefault(history_image_key(tag, namespace),
                              []).extend(images)
        else:
            values[history_image_key(value.tag, namespace)] = images
Ejemplo n.º 6
0
    def _segmentation_mask_video(self,
                                 logits_or_mask,
                                 image=None,
                                 wandb_video=True):
        """
        convert input logit / mask sequence into video, with optional image as background.
        :param logits_or_mask: Tensor of shape T x C x H x W
        :param image: Tensor of shape T x 3 x H x W or None
        :param wandb_video: if True, return wandb_video; otherwise return stacked mask array in shape T x 3 x H x W
        return: wandb.Video for logging
        """
        np_imgs = []
        len_seq = logits_or_mask.shape[0]
        for t in range(len_seq):
            if image is None:
                np_imgs.append(
                    Logger._segmentation_to_numpy(logits_or_mask[t, ...]))
            else:
                np_imgs.append(
                    Logger._segmentation_to_numpy(logits_or_mask[t, ...],
                                                  image[t, ...]))
        np_imgs = np.stack(np_imgs, axis=0)

        if wandb_video:
            return wandb.Video(np_imgs, fps=self.video_fps, format="gif")

        return np_imgs
Ejemplo n.º 7
0
def test_video_path_invalid():
    run = wandb.wandb_run.Run()
    with CliRunner().isolated_filesystem():
        with open("video.avi", "w") as f:
            f.write("00000")
        with pytest.raises(ValueError):
            vid = wandb.Video("video.avi")
Ejemplo n.º 8
0
def wandb_log_paths_as_videos(paths,
                              step,
                              max_videos_to_save=2,
                              fps=10,
                              video_title='video'):

    # reshape the rollouts
    videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]

    # max rollout length
    max_videos_to_save = np.min([max_videos_to_save, len(videos)])
    max_length = videos[0].shape[0]
    for i in range(max_videos_to_save):
        if videos[i].shape[0] > max_length:
            max_length = videos[i].shape[0]

    # pad rollouts to all be same length
    for i in range(max_videos_to_save):
        if videos[i].shape[0] < max_length:
            padding = np.tile([videos[i][-1]],
                              (max_length - videos[i].shape[0], 1, 1, 1))
            videos[i] = np.concatenate([videos[i], padding], 0)

    # log videos to tensorboard event file
    videos = np.stack(videos[:max_videos_to_save], 0)

    wandb.log({
        f'{video_title}_i{i}_s{step}':
        wandb.Video(videos[i],
                    caption=f'{video_title}_s{step}',
                    fps=fps,
                    format="gif")
        for i in range(videos.shape[0])
    })
Ejemplo n.º 9
0
def update_logs(logpath,
                loss,
                rewards,
                total_steps,
                episode_length,
                training_steps,
                send_to_wandb=False,
                videopath=None):
    log = pd.read_csv(logpath, sep=',', header=[0], index_col=0)
    log = log.append(
        pd.DataFrame({
            'updated': [datetime.now()],
            'total_steps': [total_steps],
            'training_steps': [training_steps],
            'episode_length': [episode_length],
            'loss': [loss],
            'reward': [rewards]
        }))
    log.to_csv(logpath, sep=',', header=True)
    if send_to_wandb:
        if videopath is not None:
            wandb.log(
                {
                    'loss': loss,
                    'reward': reward,
                    'video': wandb.Video(videopath)
                },
                step=total_steps)
        else:
            wandb.log({'loss': loss, 'reward': reward}, step=total_steps)
Ejemplo n.º 10
0
def save_as_mp4(image_array, save_path, save_to_wandb: bool = True) -> None:
    dpi = 72.0
    xpixels, ypixels = image_array[0].shape[:2]
    fig = plt.figure(figsize=(ypixels / dpi, xpixels / dpi), dpi=dpi)
    im = plt.figimage(image_array[0])

    def animate(i):
        im.set_array(image_array[i])
        return (im, )

    anim = animation.FuncAnimation(fig,
                                   animate,
                                   frames=len(image_array),
                                   interval=33,
                                   repeat_delay=1,
                                   repeat=True)
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))

    anim.save(save_path)

    if save_to_wandb:
        wandb.log({
            'animation':
            wandb.Video(
                np.transpose(
                    np.array(image_array)[::3, :, :, :], (0, 3, 1, 2)),
                fps=16,
                format="mp4",
                caption=os.path.basename(save_path),
            )
        })
Ejemplo n.º 11
0
  def _on_step(self, plot=True) -> bool:
    """Evaluate the current policy for self.eval_episodes, then take a render
    and report all stats to W&B

    Args:
      plot: Enable matplotlib plotting behavior. Should be set to True unless 
        testing. Defaults to True.

    Returns:
      True, as per API requirements
    """
    mean_rewards, std_rewards = evaluate_policy(
      self.model, self.env, n_eval_episodes=self.eval_episodes)
    
    images = []
    rewards = []
    actions = []
    obses = []
    step_cnt = 0
    done, state = False, None
    obs = self.env.reset()
    while not done:
      if step_cnt % self.render_freq == 0:
        images.append(self.env.render(mode='rgb_array'))

      action, state = self.model.predict(obs, state=state, deterministic=True)
      obs, reward, done, _ = self.env.step(action)

      rewards.append(reward)
      actions.append(action)
      obses.append(obs)
      step_cnt += 1

    render = np.array(images)
    render = np.transpose(render, (0, 3, 1, 2))

    actions = np.array(actions).flatten()
    observes = np.array(obses).flatten()

    rewards = np.array(rewards)
    if plot:
      plt.clf()
      plt.plot(np.arange(len(rewards)), rewards)
      plt.xlabel('timesteps')
      plt.ylabel('rewards')
      plt.title('Timestep {}'.format(self.num_timesteps))

    wandb.log({
      'test_reward_mean': mean_rewards, 
      'test_reward_std': std_rewards,
      'render': wandb.Video(render, format='gif', fps=self.fps),
      'global_step': self.num_timesteps,
      'evaluations': self.n_calls,
      'reward_distribution': wandb.Histogram(rewards),
      'action_distribution': wandb.Histogram(actions),
      'observation_distribution': wandb.Histogram(observes),
      'reward_vs_time': plot and wandb.Image(plt),
    }, step=self.num_timesteps)

    return True
Ejemplo n.º 12
0
def training(
        env: Env,
        agent: AgentBase,
        max_steps: int,
        logging_interval: int = 1,
        evaluator: Optional[Evaluator] = None,
        recorder: Optional[Recoder] = None,
        logger: logging.Logger = logging.getLogger(__name__),
):
    def actor(state):
        return agent.act(state)

    interactions = TransitionGenerator(env, actor, max_step=max_steps)
    for steps, states, next_states, actions, rewards, dones, info in interactions:
        agent.observe(
            states=states,
            next_states=next_states,
            actions=actions,
            rewards=rewards,
            terminals=is_state_terminal(env, steps, dones, info),
            resets=dones,
        )
        with agent.eval_mode():
            # Evaluate
            if evaluator is not None:
                scores = evaluator.evaluate_if_necessary(
                    interactions.total_step.sum(), actor)
                if len(scores) > 0:
                    logger.info(
                        f"Evaluate Agent: mean_score: {mean(scores)} (stdev: {stdev(scores)})"
                    )
                    wandb.log({
                        "step": interactions.total_step.sum(),
                        "eval/mean": mean(scores),
                        "eval/stdev": stdev(scores),
                    })
            if recorder is not None:
                # Record videos
                videos = recorder.record_videos_if_necessary(
                    interactions.total_step.sum(), actor)
                for video in videos:
                    wandb.log({
                        "step":
                        interactions.total_step.sum(),
                        "video":
                        wandb.Video(video, fps=60, format="mp4"),
                    })

        if agent.just_updated and (interactions.total_step.sum() %
                                   logging_interval == 0):
            stats = agent.get_statistics()
            logger.info(stats)
            wandb.log({
                "step":
                interactions.total_step.sum(),
                **__add_header_to_dict_key(stats, "train"),
                **__add_header_to_dict_key(interactions.get_statistics(), "train"),
            })

    return agent
Ejemplo n.º 13
0
    def eval(self, model):
        print(f'Evaluating {self.prefix}')
        trajs = self.sampler.collect_trajectories(n_interactions=None,
                                                  n_trajs=self.n_trajs)

        frames = []
        for traj in trajs:
            task = traj.task

            for i in range(len(traj.obs)):
                obs = traj.obs[i]
                frame = np.concatenate([obs[-1], task.obs[-1]], axis=1)
                if self.time_to_go:
                    frame = self._add_time_to_go(
                        frame, traj.policy_infos[i].time_to_go)
                frames.append(frame)

            # Freeze the video for 10 frames after a goal is achieved
            for _ in range(10):
                frames.append(frames[-1])

        ann_frames = []
        for i in range(len(frames)):
            frame_image = Image.fromarray(frames[i]).convert('RGBA')
            np_frame = np.array(frame_image)
            np_frame = np.moveaxis(np_frame, -1, 0)
            ann_frames.append(np_frame)

        ann_frames = np.array(ann_frames)

        video = wandb.Video(ann_frames, fps=10, format='mp4')

        logs = {'policy_video': video}

        return logs
Ejemplo n.º 14
0
def test_video_numpy_multi():
    with CliRunner().isolated_filesystem():
        run = wandb.wandb_run.Run()
        video = np.random.random(size=(2, 10, 3, 28, 28))
        vid = wandb.Video(video)
        vid.bind_to_run(run, "videos", 0)
        assert vid.to_json(run)["path"].endswith(".gif")
Ejemplo n.º 15
0
    def mutate(self, weights, record):
        """ Mutate the inputted weights and evaluate its performance against the
        weights of the previous generation. """
        recorder = VideoRecorder(self.env,
                                 path=self.video_path) if record else None
        self.elite.set_weights(weights)
        self.oponent.set_weights(weights)
        perturbations = self.oponent.mutate(self.config['mutation_power'])

        _, oponent_reward1, ts1 = self.play_game(self.elite,
                                                 self.oponent,
                                                 recorder=recorder)
        oponent_reward2, _, ts2 = self.play_game(self.oponent,
                                                 self.elite,
                                                 recorder=recorder)

        if record:
            recorder.close()

        return {
            'total_reward': np.mean([oponent_reward1, oponent_reward2]),
            'timesteps_total': ts1 + ts2,
            'video': None if not record else wandb.Video(self.video_path),
            'noise': perturbations
        }
Ejemplo n.º 16
0
def test_video_path():
    with CliRunner().isolated_filesystem():
        run = wandb.wandb_run.Run()
        with open("video.mp4", "w") as f:
            f.write("00000")
        vid = wandb.Video("video.mp4")
        vid.bind_to_run(run, "videos", 0)
        assert vid.to_json(run)["path"].endswith(".mp4")
Ejemplo n.º 17
0
 def close(self):
     vcr.ImageEncoder.orig_close(self)
     m = re.match(r".+(video\.\d+).+", self.output_path)
     if m:
         key = m.group(1)
     else:
         key = "videos"
     wandb.log({key: wandb.Video(self.output_path)})
Ejemplo n.º 18
0
    def flush_data(self):

        if self.log_wandb:
            wandb.log({
                'vid': wandb.Video(np.stack(self.vizs).transpose((0,3,1,2)), fps=20, format='mp4')
            })
            
        self.vizs.clear()
Ejemplo n.º 19
0
def evaluate(actor_critic,
             ob_rms,
             env_name,
             seed,
             num_processes,
             eval_log_dir,
             device,
             custom_gym,
             gif=False):
    eval_envs = make_vec_envs(env_name, seed + num_processes, num_processes,
                              None, eval_log_dir, device, True, custom_gym)

    vec_norm = utils.get_vec_normalize(eval_envs)
    if vec_norm is not None:
        vec_norm.eval()
        vec_norm.ob_rms = ob_rms

    eval_episode_rewards = []

    obs = eval_envs.reset()
    eval_recurrent_hidden_states = torch.zeros(
        num_processes, actor_critic.recurrent_hidden_state_size, device=device)
    eval_masks = torch.zeros(num_processes, 1, device=device)

    images = []
    while len(eval_episode_rewards) < 10:
        with torch.no_grad():
            images.append(obs[0, -3:, :].squeeze().cpu().numpy())
            _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                obs,
                eval_recurrent_hidden_states,
                eval_masks,
                deterministic=True)

        # Obser reward and next obs
        obs, _, done, infos = eval_envs.step(action)

        eval_masks = torch.tensor([[0.0] if done_ else [1.0]
                                   for done_ in done],
                                  dtype=torch.float32,
                                  device=device)

        for info in infos:
            if 'episode' in info.keys():
                eval_episode_rewards.append(info['episode']['r'])

    images.append(obs[0, -3:, :].squeeze().cpu().numpy())
    eval_envs.close()
    if gif:
        array2gif.write_gif(images, 'replay.gif', fps=4)
        config.tensorboard.run.log(
            {"video": wandb.Video('replay.gif', fps=4, format="gif")},
            commit=True)
        config.tensorboard.run.history._flush()

    print(" Evaluation using {} episodes: mean reward {:.5f}\n".format(
        len(eval_episode_rewards), np.mean(eval_episode_rewards)))
Ejemplo n.º 20
0
 def log_videos(self, vids, name, fps=15, step=None):
     """Logs videos to WandB in mp4 format.
     Assumes list of numpy arrays as input with [time, channels, height, width]."""
     assert len(vids[0].shape) == 4 and vids[0].shape[1] == 3
     assert isinstance(vids[0], np.ndarray)
     log_dict = {
         name: [wandb.Video(vid, fps=fps, format="mp4") for vid in vids]
     }
     wandb.log(log_dict) if step is None else wandb.log(log_dict, step=step)
Ejemplo n.º 21
0
    def evaluate(
        self,
        checkpoint: Optional[Path] = None,
        baseline: Optional[str] = None,
        agents: Optional[Mapping[str, Agent]] = None,
        num_episodes: int = 10,
        visualize_planner: bool = False,
        seed: Optional[int] = None,
        sync_wandb: bool = True,
    ) -> Tuple[Dict[str, Dict[str, float]], Dict[str, List[wandb.Video]]]:
        """Evaluate trained model in all environments."""
        assert [checkpoint, baseline, agents].count(
            None
        ) == 2, 'Exactly one of checkpoint, baseline and agents must be provided'

        mean_metrics = {}
        videos = {}
        for task, sim in self.sims.items():
            logger.info(f'Evaluating on {task} task.')

            if seed is not None:
                # TODO: Make deterministic mode work again
                sim.seed(seed)

            save_dir = self.logdir / 'eval' / task / f'{datetime.now():%Y%m%d-%H%M%S}'

            get_distribution_strategy()
            if agents is not None:
                agent = agents[task]
            else:
                agent = self.get_agent(sim.action_space, checkpoint, baseline)
            if isinstance(agent, MPCAgent):
                agent.visualize = visualize_planner  # type: ignore[misc]  # mypy/issues/1362
            mean_metrics[task] = sim.run(agent,
                                         num_episodes,
                                         log=True,
                                         save_dir=save_dir,
                                         save_video=self.video)

            if self.video:
                videos[task] = [
                    wandb.Video(str(vid), fps=10, format="mp4")
                    for vid in save_dir.glob('*.mp4')
                ]

        if sync_wandb:
            # First delete existing summary items
            for k in list(wandb.run.summary._json_dict.keys()):
                wandb.run.summary._root_del((k, ))
            wandb.run.summary.update(mean_metrics)
            wandb.run.summary['seed'] = seed
            if self.video:
                for task, task_videos in videos.items():
                    for i, vid in enumerate(task_videos):
                        wandb.run.summary[f'{task}/video_{i}'] = vid

        return mean_metrics, videos
Ejemplo n.º 22
0
 def _log_gym_to_wandb(self, filename):
     # obtain gym.env from rllab.env
     render_env(self._env,
                path=self.wandb_configs['gif_dir'],
                filename=filename)
     if self._log_wandb:
         full_fn = os.path.join(os.getcwd(), self.wandb_configs['gif_dir'],
                                filename)
         wandb.log({"video": wandb.Video(full_fn, fps=60, format="gif")})
Ejemplo n.º 23
0
    def step(self):
        """ Evolve one generation using the Evolution Strategies algorithm.
        This consists of four steps:
        1. Send the current weights to a number of workers and mutate and evaluate them.
        2. Communicate the mutated weights and their fitness back to the Trainer.
        3. Update the weights using the ES update rule.
        4. Evaluate the updated weights against a random policy and log the outcome.
        """
        worker_jobs = []
        for i in range(self.config['population_size']):
            worker_id = i % self.config['num_workers']
            record = i < self.config['num_train_videos']
            worker_jobs += [
                self._workers[worker_id].mutate.remote(
                    self.weights.get_weights(), record)
            ]

        results = ray.get(worker_jobs)
        rewards = [result['total_reward'] for result in results]
        noises = [result['noise'] for result in results]

        normalized_rewards = self.normalize_rewards(rewards)
        weight_update = self.compute_weight_update(noises, normalized_rewards)
        weights = self.weights.get_perturbable_weights()
        self.weights.set_perturbable_weights(weights + weight_update)
        winner_file = self.try_save_winner(self.weights.get_weights())

        evaluate_results = self.evaluate_current_weights(
            self.weights.get_weights())
        evaluate_rewards = [
            result['total_reward'] for result in evaluate_results
        ]
        evaluate_videos = [result['video'] for result in evaluate_results]

        self.increment_metrics(results)

        summary = dict(
            timesteps_total=self.timesteps_total,
            episodes_total=self.episodes_total,
            train_reward_min=np.min(rewards),
            train_reward_mean=np.mean(rewards),
            train_reward_max=np.max(rewards),
            train_top_5_reward_avg=np.mean(np.sort(rewards)[-5:]),
            evaluate_reward_min=np.min(evaluate_rewards),
            evaluate_reward_mean=np.mean(evaluate_rewards),
            evaluate_reward_med=np.median(evaluate_rewards),
            evaluate_reward_max=np.max(evaluate_rewards),
            avg_timesteps_train=np.mean(
                [result['timesteps_total'] for result in results]),
            avg_timesteps_evaluate=np.mean(
                [result['timesteps_total'] for result in evaluate_results]),
            eval_max_video=evaluate_videos[np.argmax(evaluate_rewards)],
            eval_min_video=evaluate_videos[np.argmin(evaluate_rewards)],
            winner_file=wandb.Video(winner_file) if winner_file else None)
        self.add_videos_to_summary(results, summary)
        return summary
Ejemplo n.º 24
0
    def train(self, sess=None):
        created_session = True if (sess is None) else False
        if sess is None:
            sess = tf.Session()
            sess.__enter__()

        sess.run(tf.global_variables_initializer())
        self.start_worker()
        start_time = time.time()
        for itr in range(self.start_itr, self.n_itr):
            itr_start_time = time.time()
            with logger.prefix('itr #%d | ' % itr):
                logger.log("Obtaining samples...")
                paths = self.obtain_samples(itr)
                logger.log("Processing samples...")
                samples_data = self.process_samples(itr, paths)
                logger.log("Logging diagnostics...")
                self.log_diagnostics(paths)
                logger.log("Optimizing policy...")
                self.optimize_policy(itr, samples_data, self._wandb_dict)
                logger.log("Saving snapshot...")
                params = self.get_itr_snapshot(itr,
                                               samples_data)  # , **kwargs)
                if self.store_paths:
                    params["paths"] = samples_data["paths"]
                logger.save_itr_params(itr, params)
                logger.log("Saved")
                logger.record_tabular('Time', time.time() - start_time)
                logger.record_tabular('ItrTime', time.time() - itr_start_time)
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    rollout(self.env,
                            self.policy,
                            animated=True,
                            max_path_length=self.max_path_length)
                    if self.pause_for_plot:
                        input("Plotting evaluation run: Press Enter to "
                              "continue...")
                if self._render:
                    fn = self._gif_header + str(itr) + '.gif'
                    # obtain gym.env from rllab.env
                    render_env(self.env.wrapped_env.env,
                               path=self._gif_dir,
                               filename=fn)
                    if self._log_wandb:
                        full_fn = os.path.join(os.getcwd(), self._gif_dir, fn)
                        wandb.log({
                            "video":
                            wandb.Video(full_fn, fps=60, format="gif")
                        })
                if self._log_wandb:
                    wandb.log(self._wandb_dict)

        self.shutdown_worker()
        if created_session:
            sess.close()
Ejemplo n.º 25
0
 def send_wandb_video(self):
     if self.last_frames is None or len(self.last_frames) == 0:
         print("Not enough images for GIF. continuing...")
         return
     lf = np.array(self.last_frames)
     print(lf.shape)
     frames = np.swapaxes(lf, 1, 3)
     frames = np.swapaxes(frames, 2, 3)
     wandb.log({"video": wandb.Video(frames, fps=10, format="gif")})
     print("=== Logged GIF")
Ejemplo n.º 26
0
    def _flow_video(self, flow_seq, wandb_video=True):
        images = [
            self._flow_image(flow_frame, wandb_image=False)
            for flow_frame in flow_seq
        ]
        images = np.stack(images, axis=0).transpose(0, 3, 1, 2)

        if wandb_video:
            return wandb.Video(images, fps=self.video_fps, format='gif')
        return images
Ejemplo n.º 27
0
    def log_pre_update(self):
        """
        Initialize the info dictionary to be logged in wandb and collect base metrics
        Returns info dictionary.
        """

        # Initialize and update the info dict for logging
        info = dict()
        info["ppo/advantage_mean"] = self.buf_advantages.mean()
        info["ppo/advantage_std"] = self.buf_advantages.std()
        info["ppo/return_mean"] = self.buf_returns.mean()
        info["ppo/return_std"] = self.buf_returns.std()
        info["ppo/value_est_mean"] = self.rollout.buf_vpreds.mean()
        info["ppo/value_est_std"] = self.rollout.buf_vpreds.std()
        info["ppo/explained_variance"] = explained_variance(
            self.rollout.buf_vpreds.flatten(),  # TODO: switch to ravel if pytorch>=1.9
            self.buf_returns.flatten()  # TODO: switch to ravel if pytorch >= 1.9
        )
        info["ppo/reward_mean"] = torch.mean(self.rollout.buf_rewards)

        if self.rollout.best_ext_return is not None:
            info["performance/best_ext_return"] = self.rollout.best_ext_return
        # TODO: maybe add extra flag for detailed logging so runs are not slowed down
        if not self.debugging:
            feature_stats, stacked_act_feat = self.get_activation_stats(
                self.rollout.buf_acts_features, "activations_features/"
            )
            hidden_stats, stacked_act_pi = self.get_activation_stats(
                self.rollout.buf_acts_pi, "activations_hidden/"
            )
            info.update(feature_stats)
            info.update(hidden_stats)

            info["activations_features/raw_act_distribution"] = wandb.Histogram(
                to_numpy(stacked_act_feat)
            )
            info["activations_hidden/raw_act_distribution"] = wandb.Histogram(
                to_numpy(stacked_act_pi)
            )

            info["ppo/action_distribution"] = wandb.Histogram(
                to_numpy(self.rollout.buf_acs).flatten()
            )

            if self.vlog_freq >= 0 and self.n_updates % self.vlog_freq == 0:
                print(str(self.n_updates) + " updates - logging video.")
                # Reshape images such that they have shape [time,channels,width,height]
                sample_video = torch.moveaxis(self.rollout.buf_obs[0], 3, 1)
                # Log buffer video from first env
                info["observations"] = wandb.Video(
                    to_numpy(sample_video), fps=12, format="gif"
                )

        return info
Ejemplo n.º 28
0
def main(env_id: str):
    logging.basicConfig(level=logging.INFO)
    wandb.init(project="example_rl_algos")
    env = gym.make(env_id)

    def actor(state):
        return env.action_space.sample()

    recorder = Recoder(env)
    videos = recorder.record_videos(actor)
    wandb.log({"video": wandb.Video(videos[0], fps=60, format="mp4")})
Ejemplo n.º 29
0
 def log_videos(self, vids, name, step=None):
     """Logs videos to WandB in mp4 format.
     Assumes list of numpy arrays as input with [time, channels, height, width]."""
     assert len(vids[0].shape) == 4 and vids[0].shape[1] == 3
     assert isinstance(vids[0], np.ndarray)
     if vids[0].max() <= 1.0:
         vids = [np.asarray(vid * 255.0, dtype=np.uint8) for vid in vids]
     # TODO(karl) expose the FPS as a parameter
     log_dict = {
         name: [wandb.Video(vid, fps=20, format="mp4") for vid in vids]
     }
     wandb.log(log_dict) if step is None else wandb.log(log_dict, step=step)
Ejemplo n.º 30
0
 def save_video(self):
     timer = Timer()
     timer.start()
     output = np.uint8(self.frames)
     vidpath = os.path.join(
         self.vidpth,
         self.env.spec.id + '-' + str(int(self.returns)) + '.mp4')
     skvideo.io.vwrite(vidpath, output, outputdict={'-pix_fmt': 'yuv420p'})
     if self.upload:
         wandb.log({'video': wandb.Video(vidpath, fps=30, format='mp4')},
                   step=self.steps)
     self.durations['saving'] += timer.stop()