Exemple #1
0
def cem_make_gif(env,
                 initial_states,
                 action_trajs,
                 configs,
                 save_dir,
                 save_name,
                 img_size=720):
    all_frames = []
    for i in range(len(action_trajs)):
        frames = []
        env.reset(config=configs[i], initial_state=initial_states[i])
        frames.append(env.get_image(img_size, img_size))
        for action in action_trajs[i]:
            _, reward, _, info = env.step(action,
                                          record_continuous_video=True,
                                          img_size=img_size)
            frames.extend(info['flex_env_recorded_frames'])
        all_frames.append(frames)
    # Convert to T x index x C x H x W for pytorch
    all_frames = np.array(all_frames).transpose([1, 0, 4, 2, 3])
    grid_imgs = [
        torchvision.utils.make_grid(torch.from_numpy(frame),
                                    nrow=5).permute(1, 2,
                                                    0).data.cpu().numpy()
        for frame in all_frames
    ]
    save_numpy_as_gif(np.array(grid_imgs), osp.join(save_dir, save_name))
Exemple #2
0
 def end_record(self, video_path=None, **kwargs):
     if not self.recording:
         print('function end_record: Error! Not recording video')
     self.recording = False
     if video_path is not None:
         save_numpy_as_gif(np.array(self.video_frames), video_path, **kwargs)
     del self.video_frames
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    # ['PassWater', 'PourWater', 'PourWaterAmount', 'RopeFlatten', 'ClothFold', 'ClothFlatten', 'ClothDrop', 'ClothFoldCrumpled', 'ClothFoldDrop', 'RopeConfiguration']
    parser.add_argument('--env_name', type=str, default='ClothFlatten')
    parser.add_argument(
        '--headless',
        type=int,
        default=0,
        help='Whether to run the environment with headless rendering')
    parser.add_argument(
        '--num_variations',
        type=int,
        default=1,
        help='Number of environment variations to be generated')
    parser.add_argument('--save_video_dir',
                        type=str,
                        default='./data/',
                        help='Path to the saved video')
    parser.add_argument('--img_size',
                        type=int,
                        default=256,
                        help='Size of the recorded videos')

    args = parser.parse_args()

    env_kwargs = env_arg_dict[args.env_name]

    # Generate and save the initial states for running this environment for the first time
    env_kwargs['use_cached_states'] = False
    env_kwargs['save_cached_states'] = False
    env_kwargs['num_variations'] = args.num_variations
    env_kwargs['render'] = True
    env_kwargs['headless'] = args.headless

    if not env_kwargs['use_cached_states']:
        print(
            'Waiting to generate environment variations. May take 1 minute for each variation...'
        )

    env = normalize(SOFTGYM_ENVS[args.env_name](**env_kwargs))
    env.reset()
    frames = [env.get_image(args.img_size, args.img_size)]
    for i in range(env.horizon):
        action = env.action_space.sample()
        # By default, the environments will apply action repitition. The option of record_continuous_video provides rendering of all
        # intermediate frames. Only use this option for visualization as it increases computation.
        _, _, _, info = env.step(action,
                                 record_continuous_video=True,
                                 img_size=args.img_size)
        frames.extend(info['flex_env_recorded_frames'])
    if args.save_video_dir is not None:
        save_name = osp.join(args.save_video_dir, args.env_name + '.gif')
        save_numpy_as_gif(np.array(frames), save_name)
        print('Video generated and save to {}'.format(save_name))
Exemple #4
0
    def evaluate(self):
        average_episode_reward = 0
        infos = []
        all_frames = []
        plt.figure()

        for episode in range(self.cfg.num_eval_episodes):
            obs = self.env.reset()
            # print(type(obs))
            # print(obs.shape)
            # print(obs)
            # exit()
            # self.video_recorder.init(enabled=(episode == 0))
            done = False
            episode_reward = 0
            episode_step = 0
            ep_info = []
            frames = [self.env.get_image(128, 128)]
            rewards = []

            while not done:
                with utils.eval_mode(self.agent):
                    action = self.agent.act(obs, sample=False)
                obs, reward, done, info = self.env.step(action)
                # self.video_recorder.record(self.env)
                episode_reward += reward
                episode_step += 1
                ep_info.append(info)
                frames.append(self.env.get_image(128, 128))
                rewards.append(reward)

            average_episode_reward += episode_reward
            # self.video_recorder.save(f'{self.step}.mp4')
            infos.append(ep_info)
            plt.plot(range(len(rewards)), rewards)
            if len(all_frames) < 8:
                all_frames.append(frames)

        average_episode_reward /= self.cfg.num_eval_episodes
        for key, val in get_info_stats(infos).items():
            self.logger.log('eval/info_' + key, val, self.step)

        self.logger.log('eval/episode_reward', average_episode_reward,
                        self.step)
        self.logger.dump(self.step)

        all_frames = np.array(all_frames).swapaxes(0, 1)
        all_frames = np.array([make_grid(np.array(frame), nrow=2, padding=3) for frame in all_frames])
        save_numpy_as_gif(all_frames, os.path.join(self.video_dir, '%d.gif' % self.step))
        plt.savefig(os.path.join(self.video_dir, '%d.png' % self.step))
Exemple #5
0
    def run_eval_loop(sample_stochastically=True):
        start_time = time.time()
        prefix = 'stochastic_' if sample_stochastically else ''
        infos = []
        all_frames = []
        plt.figure()
        for i in range(num_episodes):
            obs = env.reset()
            done = False
            episode_reward = 0
            ep_info = []
            frames = [env.get_image(128, 128)]
            rewards = []
            while not done:
                # center crop image
                if args.encoder_type == 'pixel':
                    obs = utils.center_crop_image(obs, args.image_size)
                with utils.eval_mode(agent):
                    if sample_stochastically:
                        action = agent.sample_action(obs)
                    else:
                        action = agent.select_action(obs)
                obs, reward, done, info = env.step(action)
                episode_reward += reward
                ep_info.append(info)
                frames.append(env.get_image(128, 128))
                rewards.append(reward)
            plt.plot(range(len(rewards)), rewards)
            if len(all_frames) < 8:
                all_frames.append(frames)
            infos.append(ep_info)

            L.log('eval/' + prefix + 'episode_reward', episode_reward, step)
            all_ep_rewards.append(episode_reward)
        plt.savefig(os.path.join(video_dir, '%d.png' % step))
        all_frames = np.array(all_frames).swapaxes(0, 1)
        all_frames = np.array([
            make_grid(np.array(frame), nrow=2, padding=3)
            for frame in all_frames
        ])
        save_numpy_as_gif(all_frames, os.path.join(video_dir, '%d.gif' % step))

        for key, val in get_info_stats(infos).items():
            L.log('eval/info_' + prefix + key, val, step)
        L.log('eval/' + prefix + 'eval_time', time.time() - start_time, step)
        mean_ep_reward = np.mean(all_ep_rewards)
        best_ep_reward = np.max(all_ep_rewards)
        L.log('eval/' + prefix + 'mean_episode_reward', mean_ep_reward, step)
        L.log('eval/' + prefix + 'best_episode_reward', best_ep_reward, step)
Exemple #6
0
def generate_video(env, env_name):
    all_videos = []
    for i in range(8):
        obs = env.reset()
        obs = (obs + 0.5) * 256
        video = [obs]
        for j in range(env.horizon):
            action = env.action_space.sample()
            obs, _, _, info = env.step(action)
            obs = (obs + 0.5) * 256.
            video.append(obs)
        all_videos.append(torch.cat(video, 0))
        print('Env: {}, Eval traj {}'.format(env_name, i))

    # Convert to T x index x C x H x W for pytorch
    all_videos = torch.stack(all_videos, 0).permute(1, 0, 2, 3, 4)
    grid_imgs = np.array(
        [torchvision.utils.make_grid(frame, nrow=4, padding=2, pad_value=120).permute(1, 2, 0).data.cpu().numpy()
         for frame in all_videos])
    save_numpy_as_gif(grid_imgs, osp.join(SAVE_PATH, env_name + '.gif'))
    print('Video generated and save to {}'.format(osp.join(SAVE_PATH, env_name + '.gif')))
Exemple #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('snapshot_dir', type=str)
    parser.add_argument('max_q_eval_mode', type=str)
    parser.add_argument('--vis', type=bool, default=0)
    parser.add_argument('--save_folder',
                        type=str,
                        default='./data/qpg_visualization')
    args = parser.parse_args()

    snapshot_file = join(args.snapshot_dir, 'params.pkl')
    config_file = join(args.snapshot_dir, 'params.json')

    params = torch.load(snapshot_file, map_location='cpu')
    with open(config_file, 'r') as f:
        config = json.load(f)
    config['sampler']['batch_B'] = 1
    config['sampler']['eval_n_envs'] = 1
    config['sampler']['eval_max_trajectories'] = 10 if not args.vis else 1
    config['env_kwargs']['maxq'] = True

    itr, cum_steps = params['itr'], params['cum_steps']
    print(f'Loading experiment at itr {itr}, cum_steps {cum_steps}')

    agent_state_dict = params['agent_state_dict']

    config['env_kwargs']['headless'] = True
    config['env_kwargs']['horizon'] = 20

    sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module'])
    sac_agent_module = importlib.import_module(sac_agent_module)
    SacAgent = sac_agent_module.SacAgent

    agent = SacAgent(max_q_eval_mode=args.max_q_eval_mode, **config["agent"])
    sampler = SerialSampler(EnvCls=SOFTGYM_ENVS[config['env_name']],
                            env_kwargs=config["env_kwargs"],
                            eval_env_kwargs=config["env_kwargs"],
                            **config["sampler"])

    sampler.initialize(agent)
    agent.load_state_dict(agent_state_dict)

    agent.to_device(cuda_idx=0)
    agent.eval_mode(0)

    if args.vis:
        all_traj_infos = []
        all_video_frames = []
        for i in range(4):
            sampler.envs[0].start_record()
            traj_infos = sampler.evaluate_agent(0, include_observations=True)
            all_traj_infos.extend(traj_infos)
            raw_video_frames = sampler.envs[0].video_frames
            video_frames = []
            for j in range(0, len(raw_video_frames), 2):
                video_frames.append(
                    np.array(
                        cv.resize(
                            raw_video_frames[j].astype('float32'),
                            (256,
                             256))))  # Down sample and resize to save memory
            all_video_frames.append(copy.copy(video_frames))
            sampler.envs[0].end_record()
        max_length = max(
            len(video_frames) for video_frames in all_video_frames)
        for i in range(len(all_video_frames)):
            pad_length = max_length - len(all_video_frames[i])
            all_video_frames[i] = np.vstack([
                all_video_frames[i],
                np.tile(all_video_frames[i][-1][None], [pad_length, 1, 1, 1])
            ])
        all_video_frames = np.array(all_video_frames).swapaxes(0, 1)
        grid_image = np.array(
            [make_grid(frame, 1, 4) for frame in all_video_frames])
        save_numpy_as_gif(
            grid_image,
            osp.join(args.save_folder,
                     'vis_{}.gif'.format(config['env_name'])))
        for i in range(6):
            traj_infos = sampler.evaluate_agent(0, include_observations=True)
            all_traj_infos.extend(traj_infos)
        traj_infos = all_traj_infos
    else:
        traj_infos = sampler.evaluate_agent(0, include_observations=True)
    returns = [traj_info.Return for traj_info in traj_infos]
    lengths = [traj_info.Length for traj_info in traj_infos]
    performance = [
        traj_info.env_infos[-1].normalized_performance
        for traj_info in traj_infos
    ]
    print('Performance: {}, Average performance: {}'.format(
        performance, np.mean(np.array(performance))))
    print('Returns', returns)
    print(
        f'Average Return {np.mean(returns)}, Average Length {np.mean(lengths)}'
    )

    all_performance = np.array(
        [[info.normalized_performance for info in traj_info.env_infos]
         for traj_info in traj_infos])
    all_steps = np.array([[info.total_steps for info in traj_info.env_infos]
                          for traj_info in traj_infos])
    with open(
            osp.join(args.save_folder,
                     'qpg_traj_{}.npy'.format(config['env_name'])), 'wb') as f:
        np.save(f, all_performance)
        np.save(f, all_steps)