def composite_rgb_reward_factor_image(x_t_pixels, reward_map, z, num_rewards=4): simulated_rgb = imutil.get_pixels(x_t_pixels * 255, 512, 512, normalize=False) reward_positive = reward_map[0] * (reward_map[0] > 0).type( torch.cuda.FloatTensor) reward_negative = -reward_map[0] * (reward_map[0] < 0).type( torch.cuda.FloatTensor) red_map = imutil.get_pixels(reward_negative.sum(dim=0) * 255, 512, 512, normalize=False) red_map[:, :, 1:] = 0 blue_map = imutil.get_pixels(reward_positive.sum(dim=0) * 255, 512, 512, normalize=False) blue_map[:, :, :2] = 0 reward_overlay_simulation = np.clip(simulated_rgb + red_map + blue_map, 0, 255) feature_maps = imutil.get_pixels(z[0], 512, 512, img_padding=4) * 255 composite_visual = np.concatenate( [reward_overlay_simulation, feature_maps], axis=1) return composite_visual
def convert_atari_frame(state, width=64, height=64): if ENV_NAME.startswith('SpaceInvaders'): # Crop to playable area state = state[20:] state = imutil.get_pixels(state, width, height) state = state.transpose((2, 0, 1)) return state
def composite_aleatoric_surprise_image(x_t_pixels, surprise_map, z, num_factors=16): simulated_rgb = imutil.get_pixels(x_t_pixels * 255, 512, 512, normalize=False) # Green for aleatoric surprise surprise = surprise_map[0].sum(0) / num_factors green_map = imutil.get_pixels(surprise * 255, 512, 512, normalize=False) green_map[:, :, 0] = 0 green_map[:, :, 2] = 0 #blue_map = imutil.get_pixels(reward_positive.sum(dim=0) * 255, 512, 512, normalize=False) #blue_map[:, :, :2] = 0 reward_overlay_simulation = np.clip(simulated_rgb + green_map, 0, 255) feature_maps = imutil.get_pixels(z[0], 512, 512, img_padding=4) * 255 composite_visual = np.concatenate( [reward_overlay_simulation, feature_maps], axis=1) return composite_visual
def composite_feature_rgb_image(actual_features, actual_rgb, predicted_features, predicted_rgb): lbot = imutil.get_pixels(actual_features[0], 384, 512, img_padding=4, normalize=False) rbot = imutil.get_pixels(predicted_features[0], 384, 512, img_padding=4, normalize=False) height, width, channels = lbot.shape ltop = imutil.get_pixels(actual_rgb[0], width, width, normalize=False) rtop = imutil.get_pixels(predicted_rgb[0], width, width, normalize=False) left = np.concatenate([ltop, lbot], axis=0) right = np.concatenate([rtop, rbot], axis=0) pixels = np.concatenate([left, right], axis=1) pixels = np.clip(pixels, 0, 1) return pixels * 255
def convert_frame(state): feature_map, feature_screen, rgb_map, rgb_screen = state rgb_screen = imutil.get_pixels(rgb_screen) return feature_screen, rgb_screen
fig = plt.figure(figsize=(6.4, 6.4)) ax = fig.add_subplot(111, projection='3d') ax.view_init(10, 0) fig.tight_layout(rect=[0, 0.01, 1, 0.99]) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D) vid = imutil.Video('output_{}.mp4'.format(int(time.time()))) for img in read_images(): start_time = time.time() preds = fa.get_landmarks(img)[0] print('Timing: {:.02f} seconds for one frame'.format(time.time() - start_time)) import pdb; pdb.set_trace() #ax.set_xlim3d(-500, 500) #ax.set_ylim3d(-500, 500) #ax.set_zlim3d(-200, 200) ax.scatter(preds[:, 2], preds[:, 0], -preds[:, 1]) left = imutil.get_pixels(img, 640, 640) right = imutil.get_pixels(plt, 640, 640) pixels = np.concatenate([left, right], axis=1) vid.write_frame(pixels) imutil.show(pixels, save=False) ax.clear() vid.finish()
def simulate_trajectory_from_actions(z, decoder, reward_pred, transition, states, rewards, dones, actions, ftr_vid, timesteps=60, caption_tag='', num_actions=4, num_rewards=4): estimated_cumulative_reward = np.zeros(num_rewards) true_cumulative_reward = np.zeros(num_rewards) estimated_rewards = [] for t in range(2, timesteps - 1): x_t, x_t_separable = decoder(z, visualize=True) x_t = torch.sigmoid(x_t) x_t_pixels = convert_ndim_image_to_rgb(x_t) estimated_reward, reward_map = reward_pred(z, visualize=True) estimated_rewards.append(estimated_reward[0]) estimated_cumulative_reward += estimated_reward[0].data.cpu().numpy() true_cumulative_reward += rewards[0, t] # Visualize features and RGB caption = '{} t+{} a={} R_est={} R_true = {} '.format( caption_tag, t, actions[:, t], format_reward_vector(estimated_reward[0]), format_reward_vector(rewards[0, t])) #rgb_pixels = composite_feature_rgb_image(states[:, t], rgb_states[:, t], x_t, x_t_pixels) #rgb_vid.write_frame(rgb_pixels, caption=caption, normalize=False) # Visualize factors and reward mask ftr_pixels = composite_rgb_reward_factor_image(x_t_pixels, reward_map, z) gt_state = states[0, t].mean(0) * 255 true_pixels = imutil.get_pixels(gt_state, 512, 512, img_padding=8, normalize=False) ftr_pixels = np.concatenate([true_pixels, ftr_pixels], axis=1) ftr_vid.write_frame(ftr_pixels, caption=caption, normalize=False) # Visualize each separate factor num_factors, num_features, height, width = x_t_separable.shape #for z_i in range(num_factors): # factor_vis = rgb_decoder(x_t_separable[z_i].unsqueeze(0), enable_bg=False) # factor_vids[z_i].write_frame(factor_vis * 255, normalize=False) # Predict the next latent point onehot_a = torch.eye(num_actions)[actions[:, t]].cuda() z = transition(z, onehot_a).detach() if dones[0, t]: break for _ in range(10): caption = 'R_est={} R_true = {} '.format( format_reward_vector(estimated_cumulative_reward), format_reward_vector(true_cumulative_reward)) #rgb_vid.write_frame(rgb_pixels, caption=caption, normalize=False) ftr_vid.write_frame(ftr_pixels, caption=caption, normalize=False) print('True cumulative reward: {}'.format( format_reward_vector(true_cumulative_reward))) print('Estimated cumulative reward: {}'.format( format_reward_vector(estimated_cumulative_reward)))
def render(self, *args, **kwargs): state, reward, done, info = self.unpack_observation() feature_map, feature_screen, rgb_map, rgb_screen = state visual = np.concatenate([rgb_map, rgb_screen], axis=1) result = imutil.get_pixels(rgb_screen, 64, 64) * 255 return state