def cem_make_gif(env, initial_states, action_trajs, configs, save_dir, save_name, img_size=720): all_frames = [] for i in range(len(action_trajs)): frames = [] env.reset(config=configs[i], initial_state=initial_states[i]) frames.append(env.get_image(img_size, img_size)) for action in action_trajs[i]: _, reward, _, info = env.step(action, record_continuous_video=True, img_size=img_size) frames.extend(info['flex_env_recorded_frames']) all_frames.append(frames) # Convert to T x index x C x H x W for pytorch all_frames = np.array(all_frames).transpose([1, 0, 4, 2, 3]) grid_imgs = [ torchvision.utils.make_grid(torch.from_numpy(frame), nrow=5).permute(1, 2, 0).data.cpu().numpy() for frame in all_frames ] save_numpy_as_gif(np.array(grid_imgs), osp.join(save_dir, save_name))
def end_record(self, video_path=None, **kwargs): if not self.recording: print('function end_record: Error! Not recording video') self.recording = False if video_path is not None: save_numpy_as_gif(np.array(self.video_frames), video_path, **kwargs) del self.video_frames
def main(): parser = argparse.ArgumentParser(description='Process some integers.') # ['PassWater', 'PourWater', 'PourWaterAmount', 'RopeFlatten', 'ClothFold', 'ClothFlatten', 'ClothDrop', 'ClothFoldCrumpled', 'ClothFoldDrop', 'RopeConfiguration'] parser.add_argument('--env_name', type=str, default='ClothFlatten') parser.add_argument( '--headless', type=int, default=0, help='Whether to run the environment with headless rendering') parser.add_argument( '--num_variations', type=int, default=1, help='Number of environment variations to be generated') parser.add_argument('--save_video_dir', type=str, default='./data/', help='Path to the saved video') parser.add_argument('--img_size', type=int, default=256, help='Size of the recorded videos') args = parser.parse_args() env_kwargs = env_arg_dict[args.env_name] # Generate and save the initial states for running this environment for the first time env_kwargs['use_cached_states'] = False env_kwargs['save_cached_states'] = False env_kwargs['num_variations'] = args.num_variations env_kwargs['render'] = True env_kwargs['headless'] = args.headless if not env_kwargs['use_cached_states']: print( 'Waiting to generate environment variations. May take 1 minute for each variation...' ) env = normalize(SOFTGYM_ENVS[args.env_name](**env_kwargs)) env.reset() frames = [env.get_image(args.img_size, args.img_size)] for i in range(env.horizon): action = env.action_space.sample() # By default, the environments will apply action repitition. The option of record_continuous_video provides rendering of all # intermediate frames. Only use this option for visualization as it increases computation. _, _, _, info = env.step(action, record_continuous_video=True, img_size=args.img_size) frames.extend(info['flex_env_recorded_frames']) if args.save_video_dir is not None: save_name = osp.join(args.save_video_dir, args.env_name + '.gif') save_numpy_as_gif(np.array(frames), save_name) print('Video generated and save to {}'.format(save_name))
def evaluate(self): average_episode_reward = 0 infos = [] all_frames = [] plt.figure() for episode in range(self.cfg.num_eval_episodes): obs = self.env.reset() # print(type(obs)) # print(obs.shape) # print(obs) # exit() # self.video_recorder.init(enabled=(episode == 0)) done = False episode_reward = 0 episode_step = 0 ep_info = [] frames = [self.env.get_image(128, 128)] rewards = [] while not done: with utils.eval_mode(self.agent): action = self.agent.act(obs, sample=False) obs, reward, done, info = self.env.step(action) # self.video_recorder.record(self.env) episode_reward += reward episode_step += 1 ep_info.append(info) frames.append(self.env.get_image(128, 128)) rewards.append(reward) average_episode_reward += episode_reward # self.video_recorder.save(f'{self.step}.mp4') infos.append(ep_info) plt.plot(range(len(rewards)), rewards) if len(all_frames) < 8: all_frames.append(frames) average_episode_reward /= self.cfg.num_eval_episodes for key, val in get_info_stats(infos).items(): self.logger.log('eval/info_' + key, val, self.step) self.logger.log('eval/episode_reward', average_episode_reward, self.step) self.logger.dump(self.step) all_frames = np.array(all_frames).swapaxes(0, 1) all_frames = np.array([make_grid(np.array(frame), nrow=2, padding=3) for frame in all_frames]) save_numpy_as_gif(all_frames, os.path.join(self.video_dir, '%d.gif' % self.step)) plt.savefig(os.path.join(self.video_dir, '%d.png' % self.step))
def run_eval_loop(sample_stochastically=True): start_time = time.time() prefix = 'stochastic_' if sample_stochastically else '' infos = [] all_frames = [] plt.figure() for i in range(num_episodes): obs = env.reset() done = False episode_reward = 0 ep_info = [] frames = [env.get_image(128, 128)] rewards = [] while not done: # center crop image if args.encoder_type == 'pixel': obs = utils.center_crop_image(obs, args.image_size) with utils.eval_mode(agent): if sample_stochastically: action = agent.sample_action(obs) else: action = agent.select_action(obs) obs, reward, done, info = env.step(action) episode_reward += reward ep_info.append(info) frames.append(env.get_image(128, 128)) rewards.append(reward) plt.plot(range(len(rewards)), rewards) if len(all_frames) < 8: all_frames.append(frames) infos.append(ep_info) L.log('eval/' + prefix + 'episode_reward', episode_reward, step) all_ep_rewards.append(episode_reward) plt.savefig(os.path.join(video_dir, '%d.png' % step)) all_frames = np.array(all_frames).swapaxes(0, 1) all_frames = np.array([ make_grid(np.array(frame), nrow=2, padding=3) for frame in all_frames ]) save_numpy_as_gif(all_frames, os.path.join(video_dir, '%d.gif' % step)) for key, val in get_info_stats(infos).items(): L.log('eval/info_' + prefix + key, val, step) L.log('eval/' + prefix + 'eval_time', time.time() - start_time, step) mean_ep_reward = np.mean(all_ep_rewards) best_ep_reward = np.max(all_ep_rewards) L.log('eval/' + prefix + 'mean_episode_reward', mean_ep_reward, step) L.log('eval/' + prefix + 'best_episode_reward', best_ep_reward, step)
def generate_video(env, env_name): all_videos = [] for i in range(8): obs = env.reset() obs = (obs + 0.5) * 256 video = [obs] for j in range(env.horizon): action = env.action_space.sample() obs, _, _, info = env.step(action) obs = (obs + 0.5) * 256. video.append(obs) all_videos.append(torch.cat(video, 0)) print('Env: {}, Eval traj {}'.format(env_name, i)) # Convert to T x index x C x H x W for pytorch all_videos = torch.stack(all_videos, 0).permute(1, 0, 2, 3, 4) grid_imgs = np.array( [torchvision.utils.make_grid(frame, nrow=4, padding=2, pad_value=120).permute(1, 2, 0).data.cpu().numpy() for frame in all_videos]) save_numpy_as_gif(grid_imgs, osp.join(SAVE_PATH, env_name + '.gif')) print('Video generated and save to {}'.format(osp.join(SAVE_PATH, env_name + '.gif')))
def main(): parser = argparse.ArgumentParser() parser.add_argument('snapshot_dir', type=str) parser.add_argument('max_q_eval_mode', type=str) parser.add_argument('--vis', type=bool, default=0) parser.add_argument('--save_folder', type=str, default='./data/qpg_visualization') args = parser.parse_args() snapshot_file = join(args.snapshot_dir, 'params.pkl') config_file = join(args.snapshot_dir, 'params.json') params = torch.load(snapshot_file, map_location='cpu') with open(config_file, 'r') as f: config = json.load(f) config['sampler']['batch_B'] = 1 config['sampler']['eval_n_envs'] = 1 config['sampler']['eval_max_trajectories'] = 10 if not args.vis else 1 config['env_kwargs']['maxq'] = True itr, cum_steps = params['itr'], params['cum_steps'] print(f'Loading experiment at itr {itr}, cum_steps {cum_steps}') agent_state_dict = params['agent_state_dict'] config['env_kwargs']['headless'] = True config['env_kwargs']['horizon'] = 20 sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module']) sac_agent_module = importlib.import_module(sac_agent_module) SacAgent = sac_agent_module.SacAgent agent = SacAgent(max_q_eval_mode=args.max_q_eval_mode, **config["agent"]) sampler = SerialSampler(EnvCls=SOFTGYM_ENVS[config['env_name']], env_kwargs=config["env_kwargs"], eval_env_kwargs=config["env_kwargs"], **config["sampler"]) sampler.initialize(agent) agent.load_state_dict(agent_state_dict) agent.to_device(cuda_idx=0) agent.eval_mode(0) if args.vis: all_traj_infos = [] all_video_frames = [] for i in range(4): sampler.envs[0].start_record() traj_infos = sampler.evaluate_agent(0, include_observations=True) all_traj_infos.extend(traj_infos) raw_video_frames = sampler.envs[0].video_frames video_frames = [] for j in range(0, len(raw_video_frames), 2): video_frames.append( np.array( cv.resize( raw_video_frames[j].astype('float32'), (256, 256)))) # Down sample and resize to save memory all_video_frames.append(copy.copy(video_frames)) sampler.envs[0].end_record() max_length = max( len(video_frames) for video_frames in all_video_frames) for i in range(len(all_video_frames)): pad_length = max_length - len(all_video_frames[i]) all_video_frames[i] = np.vstack([ all_video_frames[i], np.tile(all_video_frames[i][-1][None], [pad_length, 1, 1, 1]) ]) all_video_frames = np.array(all_video_frames).swapaxes(0, 1) grid_image = np.array( [make_grid(frame, 1, 4) for frame in all_video_frames]) save_numpy_as_gif( grid_image, osp.join(args.save_folder, 'vis_{}.gif'.format(config['env_name']))) for i in range(6): traj_infos = sampler.evaluate_agent(0, include_observations=True) all_traj_infos.extend(traj_infos) traj_infos = all_traj_infos else: traj_infos = sampler.evaluate_agent(0, include_observations=True) returns = [traj_info.Return for traj_info in traj_infos] lengths = [traj_info.Length for traj_info in traj_infos] performance = [ traj_info.env_infos[-1].normalized_performance for traj_info in traj_infos ] print('Performance: {}, Average performance: {}'.format( performance, np.mean(np.array(performance)))) print('Returns', returns) print( f'Average Return {np.mean(returns)}, Average Length {np.mean(lengths)}' ) all_performance = np.array( [[info.normalized_performance for info in traj_info.env_infos] for traj_info in traj_infos]) all_steps = np.array([[info.total_steps for info in traj_info.env_infos] for traj_info in traj_infos]) with open( osp.join(args.save_folder, 'qpg_traj_{}.npy'.format(config['env_name'])), 'wb') as f: np.save(f, all_performance) np.save(f, all_steps)