def validation_epoch_end(self, outputs): mean_loss = torch.stack([x['val_loss'] for x in outputs]).mean() mean_psnr = torch.stack([x['val_psnr'] for x in outputs]).mean() log = {'val/loss': mean_loss, 'val/psnr': mean_psnr} self.log("val/loss", mean_loss) wandb.log(log) self.hparams.scene_name = self.hparams.exp_name self.hparams.N_importance = 64 ckpt_dir = os.path.join(self.hparams.log_dir, self.hparams.exp_name, "ckpts") ckpts = [f for f in os.listdir(ckpt_dir) if "epoch" in f] if len(ckpts) != 0: ckpts.sort() self.hparams.eval_ckpt_path = os.path.join(ckpt_dir, ckpts[-1]) img_gif, depth_gif = eval(self.hparams) wandb.log({ "val/depth_gif": wandb.Video(depth_gif, fps=30, format="gif") }) # else: wandb.log( {"val/out_gif": wandb.Video(img_gif, fps=30, format="gif")}) return { 'progress_bar': { 'val_loss': mean_loss, 'val_psnr': mean_psnr }, }
def vis_mse(dataset_or_dataloader, model, epoch, cfg): if epoch % cfg['output']['vis_every_epoch']: return # Settings device = cfg['training']['device'] write_probs = cfg['output']['write_probs'] probs_dir = cfg['paths']['predictions'] bs, n_batches = cfg['training']['batch_size_test'], cfg['output'][ 'batches_vis'] normalize = cfg['transforms']['test']['normalize'] mean, std = cfg['data']['3d']['mean'][0], cfg['data']['3d']['std'][0] sigmoid = cfg['training']['sigmoid'] kldiv = cfg['training'][f'test_criterion'] == 'kldivloss' wandb_dict = {'epoch': epoch} wandb_videos_true, wandb_videos_pred = [], [] # Get data & predict if type(dataset_or_dataloader) == DataLoader: dataloader = dataset_or_dataloader else: dataloader = DataLoader(dataset_or_dataloader, bs, shuffle=False, num_workers=1, pin_memory=True) iter_dl = iter(dataloader) for i_batch in range(n_batches): batch_x, batch_y_true = next(iter_dl) with torch.no_grad(): batch_y_pred = model(batch_x.to(device)) if kldiv: batch_y_pred = torch.softmax( batch_y_pred, dim=1)[:, 1:] # Get probabilities and ignore BG channel batch_y_true = batch_y_true[:, 1:] if sigmoid: batch_y_pred = torch.sigmoid(batch_y_pred) if write_probs: np.savez_compressed(os.path.join(probs_dir, f"{epoch}.npz"), true=batch_y_true.cpu().numpy(), pred=batch_y_pred.cpu().numpy()) for i, (video, true_video, pred_video) in enumerate( zip(batch_x, batch_y_true, batch_y_pred)): if normalize: video = video * std + mean vid_true, vid_pred = create_video(video, pred_video, true_video) wandb_videos_true.append( wandb.Video(vid_true, fps=20, format="mp4")) wandb_videos_pred.append( wandb.Video(vid_pred, fps=20, format="mp4")) wandb_dict["videos_true"] = wandb_videos_true wandb_dict["videos_pred"] = wandb_videos_pred wandb.log(wandb_dict) print(f"logged: {wandb_dict}")
def log_video_hrl(env_name, actor_low, actor_high, params): actor_low = copy.deepcopy(actor_low).cpu() actor_high = copy.deepcopy(actor_high).cpu() actor_high.max_goal = actor_high.max_goal.to('cpu') policy_params = params.policy_params goal_dim = params.goal_dim if env_name in envnames_mujoco: env = gym.make(env_name) elif env_name in envnames_ant: env = create_maze_env(env_name=env_name) print('\n > Collecting current trajectory...') done = False step = 1 state = torch.Tensor(env.reset()) goal = torch.Tensor(torch.randn(goal_dim)) episode_reward, frame_buffer = 0, [] while not done and step < 600: frame_buffer.append(env.render(mode='rgb_array')) action = actor_low(torch.Tensor(state), torch.Tensor(goal)).detach() next_state, reward, done, info = env.step(action) if (step + 1) % policy_params.c == 0 and step > 0: goal = actor_high(state) else: goal = (torch.Tensor(state)[:goal_dim] + goal - torch.Tensor(next_state)[:goal_dim]).float() state = next_state episode_reward += reward step += 1 print( f' > Finished collection, saved video. Episode reward: {float(episode_reward):.3f}\n' ) frame_buffer = np.array(frame_buffer).transpose(0, 3, 1, 2) wandb.log({"video": wandb.Video(frame_buffer, fps=30, format="mp4")}) env.close()
def log_blender_weights(self, blender_weights, image=None): """Visualize blender weights as image or video, depending on input shape Args: blender_weights (Tensor): [T_future x ] (T+1) x H_b x W_b here T_future is an optional dimension. When given, the result will visualized as a gif video containing T_future frames. T is the number of frames used for warping (same below). image (Optional[Tensor]): T x 3 x H x W when image dimensions (H, W) are not equal to blender dimensions, blender weights will be bilinearly interpolated to the image dimension. """ if len(blender_weights.shape) == 4: frames = [ self._blender_weight_on_image(b_weight, image, wandb_image=False) for b_weight in blender_weights ] frames = np.stack(frames, axis=0) frames = frames.transpose(0, 3, 1, 2) # wandb needs T x 3 x H x W video = wandb.Video(frames, fps=self.video_fps, format='gif') self.log({'blender_weights_seq': video}) else: image = self._blender_weight_on_image(blender_weights, image, wandb_image=True) self.log({ 'blender_weights': [wandb.Image(image, caption='weights_heatmap')] })
def encode_images(img_strs, value): try: from PIL import Image except ImportError: wandb.termwarn( 'Install pillow if you are logging images with Tensorboard. To install, run "pip install pillow".', repeat=False, ) return if len(img_strs) == 0: return images = [] for img_str in img_strs: # Supports gifs from TboardX if img_str.startswith(b"GIF"): images.append(wandb.Video(six.BytesIO(img_str), format="gif")) else: images.append(wandb.Image(Image.open(six.BytesIO(img_str)))) tag_idx = value.tag.rsplit("/", 1) if len(tag_idx) > 1 and tag_idx[1].isdigit(): tag, idx = tag_idx values.setdefault(history_image_key(tag, namespace), []).extend(images) else: values[history_image_key(value.tag, namespace)] = images
def _segmentation_mask_video(self, logits_or_mask, image=None, wandb_video=True): """ convert input logit / mask sequence into video, with optional image as background. :param logits_or_mask: Tensor of shape T x C x H x W :param image: Tensor of shape T x 3 x H x W or None :param wandb_video: if True, return wandb_video; otherwise return stacked mask array in shape T x 3 x H x W return: wandb.Video for logging """ np_imgs = [] len_seq = logits_or_mask.shape[0] for t in range(len_seq): if image is None: np_imgs.append( Logger._segmentation_to_numpy(logits_or_mask[t, ...])) else: np_imgs.append( Logger._segmentation_to_numpy(logits_or_mask[t, ...], image[t, ...])) np_imgs = np.stack(np_imgs, axis=0) if wandb_video: return wandb.Video(np_imgs, fps=self.video_fps, format="gif") return np_imgs
def test_video_path_invalid(): run = wandb.wandb_run.Run() with CliRunner().isolated_filesystem(): with open("video.avi", "w") as f: f.write("00000") with pytest.raises(ValueError): vid = wandb.Video("video.avi")
def wandb_log_paths_as_videos(paths, step, max_videos_to_save=2, fps=10, video_title='video'): # reshape the rollouts videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] # max rollout length max_videos_to_save = np.min([max_videos_to_save, len(videos)]) max_length = videos[0].shape[0] for i in range(max_videos_to_save): if videos[i].shape[0] > max_length: max_length = videos[i].shape[0] # pad rollouts to all be same length for i in range(max_videos_to_save): if videos[i].shape[0] < max_length: padding = np.tile([videos[i][-1]], (max_length - videos[i].shape[0], 1, 1, 1)) videos[i] = np.concatenate([videos[i], padding], 0) # log videos to tensorboard event file videos = np.stack(videos[:max_videos_to_save], 0) wandb.log({ f'{video_title}_i{i}_s{step}': wandb.Video(videos[i], caption=f'{video_title}_s{step}', fps=fps, format="gif") for i in range(videos.shape[0]) })
def update_logs(logpath, loss, rewards, total_steps, episode_length, training_steps, send_to_wandb=False, videopath=None): log = pd.read_csv(logpath, sep=',', header=[0], index_col=0) log = log.append( pd.DataFrame({ 'updated': [datetime.now()], 'total_steps': [total_steps], 'training_steps': [training_steps], 'episode_length': [episode_length], 'loss': [loss], 'reward': [rewards] })) log.to_csv(logpath, sep=',', header=True) if send_to_wandb: if videopath is not None: wandb.log( { 'loss': loss, 'reward': reward, 'video': wandb.Video(videopath) }, step=total_steps) else: wandb.log({'loss': loss, 'reward': reward}, step=total_steps)
def save_as_mp4(image_array, save_path, save_to_wandb: bool = True) -> None: dpi = 72.0 xpixels, ypixels = image_array[0].shape[:2] fig = plt.figure(figsize=(ypixels / dpi, xpixels / dpi), dpi=dpi) im = plt.figimage(image_array[0]) def animate(i): im.set_array(image_array[i]) return (im, ) anim = animation.FuncAnimation(fig, animate, frames=len(image_array), interval=33, repeat_delay=1, repeat=True) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) anim.save(save_path) if save_to_wandb: wandb.log({ 'animation': wandb.Video( np.transpose( np.array(image_array)[::3, :, :, :], (0, 3, 1, 2)), fps=16, format="mp4", caption=os.path.basename(save_path), ) })
def _on_step(self, plot=True) -> bool: """Evaluate the current policy for self.eval_episodes, then take a render and report all stats to W&B Args: plot: Enable matplotlib plotting behavior. Should be set to True unless testing. Defaults to True. Returns: True, as per API requirements """ mean_rewards, std_rewards = evaluate_policy( self.model, self.env, n_eval_episodes=self.eval_episodes) images = [] rewards = [] actions = [] obses = [] step_cnt = 0 done, state = False, None obs = self.env.reset() while not done: if step_cnt % self.render_freq == 0: images.append(self.env.render(mode='rgb_array')) action, state = self.model.predict(obs, state=state, deterministic=True) obs, reward, done, _ = self.env.step(action) rewards.append(reward) actions.append(action) obses.append(obs) step_cnt += 1 render = np.array(images) render = np.transpose(render, (0, 3, 1, 2)) actions = np.array(actions).flatten() observes = np.array(obses).flatten() rewards = np.array(rewards) if plot: plt.clf() plt.plot(np.arange(len(rewards)), rewards) plt.xlabel('timesteps') plt.ylabel('rewards') plt.title('Timestep {}'.format(self.num_timesteps)) wandb.log({ 'test_reward_mean': mean_rewards, 'test_reward_std': std_rewards, 'render': wandb.Video(render, format='gif', fps=self.fps), 'global_step': self.num_timesteps, 'evaluations': self.n_calls, 'reward_distribution': wandb.Histogram(rewards), 'action_distribution': wandb.Histogram(actions), 'observation_distribution': wandb.Histogram(observes), 'reward_vs_time': plot and wandb.Image(plt), }, step=self.num_timesteps) return True
def training( env: Env, agent: AgentBase, max_steps: int, logging_interval: int = 1, evaluator: Optional[Evaluator] = None, recorder: Optional[Recoder] = None, logger: logging.Logger = logging.getLogger(__name__), ): def actor(state): return agent.act(state) interactions = TransitionGenerator(env, actor, max_step=max_steps) for steps, states, next_states, actions, rewards, dones, info in interactions: agent.observe( states=states, next_states=next_states, actions=actions, rewards=rewards, terminals=is_state_terminal(env, steps, dones, info), resets=dones, ) with agent.eval_mode(): # Evaluate if evaluator is not None: scores = evaluator.evaluate_if_necessary( interactions.total_step.sum(), actor) if len(scores) > 0: logger.info( f"Evaluate Agent: mean_score: {mean(scores)} (stdev: {stdev(scores)})" ) wandb.log({ "step": interactions.total_step.sum(), "eval/mean": mean(scores), "eval/stdev": stdev(scores), }) if recorder is not None: # Record videos videos = recorder.record_videos_if_necessary( interactions.total_step.sum(), actor) for video in videos: wandb.log({ "step": interactions.total_step.sum(), "video": wandb.Video(video, fps=60, format="mp4"), }) if agent.just_updated and (interactions.total_step.sum() % logging_interval == 0): stats = agent.get_statistics() logger.info(stats) wandb.log({ "step": interactions.total_step.sum(), **__add_header_to_dict_key(stats, "train"), **__add_header_to_dict_key(interactions.get_statistics(), "train"), }) return agent
def eval(self, model): print(f'Evaluating {self.prefix}') trajs = self.sampler.collect_trajectories(n_interactions=None, n_trajs=self.n_trajs) frames = [] for traj in trajs: task = traj.task for i in range(len(traj.obs)): obs = traj.obs[i] frame = np.concatenate([obs[-1], task.obs[-1]], axis=1) if self.time_to_go: frame = self._add_time_to_go( frame, traj.policy_infos[i].time_to_go) frames.append(frame) # Freeze the video for 10 frames after a goal is achieved for _ in range(10): frames.append(frames[-1]) ann_frames = [] for i in range(len(frames)): frame_image = Image.fromarray(frames[i]).convert('RGBA') np_frame = np.array(frame_image) np_frame = np.moveaxis(np_frame, -1, 0) ann_frames.append(np_frame) ann_frames = np.array(ann_frames) video = wandb.Video(ann_frames, fps=10, format='mp4') logs = {'policy_video': video} return logs
def test_video_numpy_multi(): with CliRunner().isolated_filesystem(): run = wandb.wandb_run.Run() video = np.random.random(size=(2, 10, 3, 28, 28)) vid = wandb.Video(video) vid.bind_to_run(run, "videos", 0) assert vid.to_json(run)["path"].endswith(".gif")
def mutate(self, weights, record): """ Mutate the inputted weights and evaluate its performance against the weights of the previous generation. """ recorder = VideoRecorder(self.env, path=self.video_path) if record else None self.elite.set_weights(weights) self.oponent.set_weights(weights) perturbations = self.oponent.mutate(self.config['mutation_power']) _, oponent_reward1, ts1 = self.play_game(self.elite, self.oponent, recorder=recorder) oponent_reward2, _, ts2 = self.play_game(self.oponent, self.elite, recorder=recorder) if record: recorder.close() return { 'total_reward': np.mean([oponent_reward1, oponent_reward2]), 'timesteps_total': ts1 + ts2, 'video': None if not record else wandb.Video(self.video_path), 'noise': perturbations }
def test_video_path(): with CliRunner().isolated_filesystem(): run = wandb.wandb_run.Run() with open("video.mp4", "w") as f: f.write("00000") vid = wandb.Video("video.mp4") vid.bind_to_run(run, "videos", 0) assert vid.to_json(run)["path"].endswith(".mp4")
def close(self): vcr.ImageEncoder.orig_close(self) m = re.match(r".+(video\.\d+).+", self.output_path) if m: key = m.group(1) else: key = "videos" wandb.log({key: wandb.Video(self.output_path)})
def flush_data(self): if self.log_wandb: wandb.log({ 'vid': wandb.Video(np.stack(self.vizs).transpose((0,3,1,2)), fps=20, format='mp4') }) self.vizs.clear()
def evaluate(actor_critic, ob_rms, env_name, seed, num_processes, eval_log_dir, device, custom_gym, gif=False): eval_envs = make_vec_envs(env_name, seed + num_processes, num_processes, None, eval_log_dir, device, True, custom_gym) vec_norm = utils.get_vec_normalize(eval_envs) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms eval_episode_rewards = [] obs = eval_envs.reset() eval_recurrent_hidden_states = torch.zeros( num_processes, actor_critic.recurrent_hidden_state_size, device=device) eval_masks = torch.zeros(num_processes, 1, device=device) images = [] while len(eval_episode_rewards) < 10: with torch.no_grad(): images.append(obs[0, -3:, :].squeeze().cpu().numpy()) _, action, _, eval_recurrent_hidden_states = actor_critic.act( obs, eval_recurrent_hidden_states, eval_masks, deterministic=True) # Obser reward and next obs obs, _, done, infos = eval_envs.step(action) eval_masks = torch.tensor([[0.0] if done_ else [1.0] for done_ in done], dtype=torch.float32, device=device) for info in infos: if 'episode' in info.keys(): eval_episode_rewards.append(info['episode']['r']) images.append(obs[0, -3:, :].squeeze().cpu().numpy()) eval_envs.close() if gif: array2gif.write_gif(images, 'replay.gif', fps=4) config.tensorboard.run.log( {"video": wandb.Video('replay.gif', fps=4, format="gif")}, commit=True) config.tensorboard.run.history._flush() print(" Evaluation using {} episodes: mean reward {:.5f}\n".format( len(eval_episode_rewards), np.mean(eval_episode_rewards)))
def log_videos(self, vids, name, fps=15, step=None): """Logs videos to WandB in mp4 format. Assumes list of numpy arrays as input with [time, channels, height, width].""" assert len(vids[0].shape) == 4 and vids[0].shape[1] == 3 assert isinstance(vids[0], np.ndarray) log_dict = { name: [wandb.Video(vid, fps=fps, format="mp4") for vid in vids] } wandb.log(log_dict) if step is None else wandb.log(log_dict, step=step)
def evaluate( self, checkpoint: Optional[Path] = None, baseline: Optional[str] = None, agents: Optional[Mapping[str, Agent]] = None, num_episodes: int = 10, visualize_planner: bool = False, seed: Optional[int] = None, sync_wandb: bool = True, ) -> Tuple[Dict[str, Dict[str, float]], Dict[str, List[wandb.Video]]]: """Evaluate trained model in all environments.""" assert [checkpoint, baseline, agents].count( None ) == 2, 'Exactly one of checkpoint, baseline and agents must be provided' mean_metrics = {} videos = {} for task, sim in self.sims.items(): logger.info(f'Evaluating on {task} task.') if seed is not None: # TODO: Make deterministic mode work again sim.seed(seed) save_dir = self.logdir / 'eval' / task / f'{datetime.now():%Y%m%d-%H%M%S}' get_distribution_strategy() if agents is not None: agent = agents[task] else: agent = self.get_agent(sim.action_space, checkpoint, baseline) if isinstance(agent, MPCAgent): agent.visualize = visualize_planner # type: ignore[misc] # mypy/issues/1362 mean_metrics[task] = sim.run(agent, num_episodes, log=True, save_dir=save_dir, save_video=self.video) if self.video: videos[task] = [ wandb.Video(str(vid), fps=10, format="mp4") for vid in save_dir.glob('*.mp4') ] if sync_wandb: # First delete existing summary items for k in list(wandb.run.summary._json_dict.keys()): wandb.run.summary._root_del((k, )) wandb.run.summary.update(mean_metrics) wandb.run.summary['seed'] = seed if self.video: for task, task_videos in videos.items(): for i, vid in enumerate(task_videos): wandb.run.summary[f'{task}/video_{i}'] = vid return mean_metrics, videos
def _log_gym_to_wandb(self, filename): # obtain gym.env from rllab.env render_env(self._env, path=self.wandb_configs['gif_dir'], filename=filename) if self._log_wandb: full_fn = os.path.join(os.getcwd(), self.wandb_configs['gif_dir'], filename) wandb.log({"video": wandb.Video(full_fn, fps=60, format="gif")})
def step(self): """ Evolve one generation using the Evolution Strategies algorithm. This consists of four steps: 1. Send the current weights to a number of workers and mutate and evaluate them. 2. Communicate the mutated weights and their fitness back to the Trainer. 3. Update the weights using the ES update rule. 4. Evaluate the updated weights against a random policy and log the outcome. """ worker_jobs = [] for i in range(self.config['population_size']): worker_id = i % self.config['num_workers'] record = i < self.config['num_train_videos'] worker_jobs += [ self._workers[worker_id].mutate.remote( self.weights.get_weights(), record) ] results = ray.get(worker_jobs) rewards = [result['total_reward'] for result in results] noises = [result['noise'] for result in results] normalized_rewards = self.normalize_rewards(rewards) weight_update = self.compute_weight_update(noises, normalized_rewards) weights = self.weights.get_perturbable_weights() self.weights.set_perturbable_weights(weights + weight_update) winner_file = self.try_save_winner(self.weights.get_weights()) evaluate_results = self.evaluate_current_weights( self.weights.get_weights()) evaluate_rewards = [ result['total_reward'] for result in evaluate_results ] evaluate_videos = [result['video'] for result in evaluate_results] self.increment_metrics(results) summary = dict( timesteps_total=self.timesteps_total, episodes_total=self.episodes_total, train_reward_min=np.min(rewards), train_reward_mean=np.mean(rewards), train_reward_max=np.max(rewards), train_top_5_reward_avg=np.mean(np.sort(rewards)[-5:]), evaluate_reward_min=np.min(evaluate_rewards), evaluate_reward_mean=np.mean(evaluate_rewards), evaluate_reward_med=np.median(evaluate_rewards), evaluate_reward_max=np.max(evaluate_rewards), avg_timesteps_train=np.mean( [result['timesteps_total'] for result in results]), avg_timesteps_evaluate=np.mean( [result['timesteps_total'] for result in evaluate_results]), eval_max_video=evaluate_videos[np.argmax(evaluate_rewards)], eval_min_video=evaluate_videos[np.argmin(evaluate_rewards)], winner_file=wandb.Video(winner_file) if winner_file else None) self.add_videos_to_summary(results, summary) return summary
def train(self, sess=None): created_session = True if (sess is None) else False if sess is None: sess = tf.Session() sess.__enter__() sess.run(tf.global_variables_initializer()) self.start_worker() start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() with logger.prefix('itr #%d | ' % itr): logger.log("Obtaining samples...") paths = self.obtain_samples(itr) logger.log("Processing samples...") samples_data = self.process_samples(itr, paths) logger.log("Logging diagnostics...") self.log_diagnostics(paths) logger.log("Optimizing policy...") self.optimize_policy(itr, samples_data, self._wandb_dict) logger.log("Saving snapshot...") params = self.get_itr_snapshot(itr, samples_data) # , **kwargs) if self.store_paths: params["paths"] = samples_data["paths"] logger.save_itr_params(itr, params) logger.log("Saved") logger.record_tabular('Time', time.time() - start_time) logger.record_tabular('ItrTime', time.time() - itr_start_time) logger.dump_tabular(with_prefix=False) if self.plot: rollout(self.env, self.policy, animated=True, max_path_length=self.max_path_length) if self.pause_for_plot: input("Plotting evaluation run: Press Enter to " "continue...") if self._render: fn = self._gif_header + str(itr) + '.gif' # obtain gym.env from rllab.env render_env(self.env.wrapped_env.env, path=self._gif_dir, filename=fn) if self._log_wandb: full_fn = os.path.join(os.getcwd(), self._gif_dir, fn) wandb.log({ "video": wandb.Video(full_fn, fps=60, format="gif") }) if self._log_wandb: wandb.log(self._wandb_dict) self.shutdown_worker() if created_session: sess.close()
def send_wandb_video(self): if self.last_frames is None or len(self.last_frames) == 0: print("Not enough images for GIF. continuing...") return lf = np.array(self.last_frames) print(lf.shape) frames = np.swapaxes(lf, 1, 3) frames = np.swapaxes(frames, 2, 3) wandb.log({"video": wandb.Video(frames, fps=10, format="gif")}) print("=== Logged GIF")
def _flow_video(self, flow_seq, wandb_video=True): images = [ self._flow_image(flow_frame, wandb_image=False) for flow_frame in flow_seq ] images = np.stack(images, axis=0).transpose(0, 3, 1, 2) if wandb_video: return wandb.Video(images, fps=self.video_fps, format='gif') return images
def log_pre_update(self): """ Initialize the info dictionary to be logged in wandb and collect base metrics Returns info dictionary. """ # Initialize and update the info dict for logging info = dict() info["ppo/advantage_mean"] = self.buf_advantages.mean() info["ppo/advantage_std"] = self.buf_advantages.std() info["ppo/return_mean"] = self.buf_returns.mean() info["ppo/return_std"] = self.buf_returns.std() info["ppo/value_est_mean"] = self.rollout.buf_vpreds.mean() info["ppo/value_est_std"] = self.rollout.buf_vpreds.std() info["ppo/explained_variance"] = explained_variance( self.rollout.buf_vpreds.flatten(), # TODO: switch to ravel if pytorch>=1.9 self.buf_returns.flatten() # TODO: switch to ravel if pytorch >= 1.9 ) info["ppo/reward_mean"] = torch.mean(self.rollout.buf_rewards) if self.rollout.best_ext_return is not None: info["performance/best_ext_return"] = self.rollout.best_ext_return # TODO: maybe add extra flag for detailed logging so runs are not slowed down if not self.debugging: feature_stats, stacked_act_feat = self.get_activation_stats( self.rollout.buf_acts_features, "activations_features/" ) hidden_stats, stacked_act_pi = self.get_activation_stats( self.rollout.buf_acts_pi, "activations_hidden/" ) info.update(feature_stats) info.update(hidden_stats) info["activations_features/raw_act_distribution"] = wandb.Histogram( to_numpy(stacked_act_feat) ) info["activations_hidden/raw_act_distribution"] = wandb.Histogram( to_numpy(stacked_act_pi) ) info["ppo/action_distribution"] = wandb.Histogram( to_numpy(self.rollout.buf_acs).flatten() ) if self.vlog_freq >= 0 and self.n_updates % self.vlog_freq == 0: print(str(self.n_updates) + " updates - logging video.") # Reshape images such that they have shape [time,channels,width,height] sample_video = torch.moveaxis(self.rollout.buf_obs[0], 3, 1) # Log buffer video from first env info["observations"] = wandb.Video( to_numpy(sample_video), fps=12, format="gif" ) return info
def main(env_id: str): logging.basicConfig(level=logging.INFO) wandb.init(project="example_rl_algos") env = gym.make(env_id) def actor(state): return env.action_space.sample() recorder = Recoder(env) videos = recorder.record_videos(actor) wandb.log({"video": wandb.Video(videos[0], fps=60, format="mp4")})
def log_videos(self, vids, name, step=None): """Logs videos to WandB in mp4 format. Assumes list of numpy arrays as input with [time, channels, height, width].""" assert len(vids[0].shape) == 4 and vids[0].shape[1] == 3 assert isinstance(vids[0], np.ndarray) if vids[0].max() <= 1.0: vids = [np.asarray(vid * 255.0, dtype=np.uint8) for vid in vids] # TODO(karl) expose the FPS as a parameter log_dict = { name: [wandb.Video(vid, fps=20, format="mp4") for vid in vids] } wandb.log(log_dict) if step is None else wandb.log(log_dict, step=step)
def save_video(self): timer = Timer() timer.start() output = np.uint8(self.frames) vidpath = os.path.join( self.vidpth, self.env.spec.id + '-' + str(int(self.returns)) + '.mp4') skvideo.io.vwrite(vidpath, output, outputdict={'-pix_fmt': 'yuv420p'}) if self.upload: wandb.log({'video': wandb.Video(vidpath, fps=30, format='mp4')}, step=self.steps) self.durations['saving'] += timer.stop()