def simulate_policy(args):
    data = torch.load(args.file)
    policy = data['evaluation/policy']
    env = data['evaluation/env']
    print("Policy and environment loaded")
    if args.gpu:
        ptu.set_gpu_mode(True)
        policy.to(ptu.device)
        print('Using GPU')
    if isinstance(env, VAEWrappedEnv) and hasattr(env, 'mode'):
        env.mode(args.mode)
        print('Set environment mode {}'.format(args.mode))
    if args.enable_render or hasattr(env, 'enable_render'):
        # some environments need to be reconfigured for visualization
        env.enable_render()
    paths = []
    while True:
        paths.append(
            multitask_rollout(
                env,
                policy,
                max_path_length=args.H,
                render=not args.hide,
                observation_key='observation',
                desired_goal_key='desired_goal',
            ))
        if hasattr(env, "log_diagnostics"):
            env.log_diagnostics(paths)
        if hasattr(env, "get_diagnostics"):
            for k, v in env.get_diagnostics(paths).items():
                logger.record_tabular(k, v)
        logger.dump_tabular()
Beispiel #2
0
 def collect_new_paths(
     self,
     max_path_length,
     num_steps,
     discard_incomplete_paths,
 ):
     paths = []
     num_steps_collected = 0
     while num_steps_collected < num_steps:
         max_path_length_this_loop = min(  # Do not go over num_steps
             max_path_length,
             num_steps - num_steps_collected,
         )
         path = multitask_rollout(
             self._env,
             self._policy,
             max_path_length=max_path_length_this_loop,
             render=self._render,
             render_kwargs=self._render_kwargs,
             observation_key=self._observation_key,
             desired_goal_key=self._desired_goal_key,
             return_dict_obs=True,
         )
         path_len = len(path['actions'])
         if (path_len != max_path_length and not path['terminals'][-1]
                 and discard_incomplete_paths):
             break
         num_steps_collected += path_len
         paths.append(path)
     self._num_paths_total += len(paths)
     self._num_steps_total += num_steps_collected
     self._epoch_paths.extend(paths)
     return paths
Beispiel #3
0
 def obtain_samples(self, rollout_type="multitask"):
     paths = []
     n_steps_total = 0
     while n_steps_total + self.max_path_length <= self.max_samples:
         if self.randomize_env:
             self.env, env_name = self.alg.get_new_env()
             print(f"Evaluating {env_name}")
         if rollout_type == "multitask":
             path = multitask_rollout(
                 self.env,
                 self.policy,
                 max_path_length=self.max_path_length,
                 animated=False,
                 observation_key='observation',
                 desired_goal_key='desired_goal',
                 get_action_kwargs=dict(
                     return_stacked_softmax=False,
                     mask=np.ones((1, self.env.unwrapped.num_blocks)),
                     deterministic=True
                 )
             )
         else:
             path = rollout(
                 self.env, self.policy, max_path_length=self.max_path_length
             )
         paths.append(path)
         n_steps_total += len(path['observations'])
     return paths
def rollout(*args, **kwargs):
    return multitask_rollout(
        *args,
        **kwargs,
        observation_key='latent_observation',
        desired_goal_key='latent_desired_goal',
    )
Beispiel #5
0
def simulate_policy(args):
    if args.pause:
        import ipdb
        ipdb.set_trace()
    data = pickle.load(open(args.file, "rb"))
    policy = data['policy']
    env = data['env']
    print("Policy and environment loaded")
    if args.gpu:
        ptu.set_gpu_mode(True)
        policy.to(ptu.device)
    if isinstance(env, VAEWrappedEnv):
        env.mode(args.mode)
    if args.enable_render or hasattr(env, 'enable_render'):
        # some environments need to be reconfigured for visualization
        env.enable_render()
    policy.train(False)
    paths = []
    while True:
        paths.append(
            multitask_rollout(
                env,
                policy,
                max_path_length=args.H,
                animated=not args.hide,
                observation_key='observation',
                desired_goal_key='desired_goal',
            ))
        if hasattr(env, "log_diagnostics"):
            env.log_diagnostics(paths)
        if hasattr(env, "get_diagnostics"):
            for k, v in env.get_diagnostics(paths).items():
                logger.record_tabular(k, v)
        logger.dump_tabular()
Beispiel #6
0
 def eval_multitask_rollout(self):
     return multitask_rollout(
         self.env,
         self.policy,
         self.max_path_length,
         observation_key=self.observation_key,
         desired_goal_key=self.desired_goal_key,
     )
Beispiel #7
0
 def rollout_fn():
     return multitask_rollout(
         env,
         policy,
         horizon,
         render,
         observation_key="observation",
         desired_goal_key="desired_goal",
         representation_goal_key="representation_goal",
         **reset_kwargs,
     )
Beispiel #8
0
def simulate_policy(args):
    if args.pause:
        import ipdb
        ipdb.set_trace()
    data = pickle.load(open(args.file, "rb"))  # joblib.load(args.file)
    if 'policy' in data:
        policy = data['policy']
    elif 'evaluation/policy' in data:
        policy = data['evaluation/policy']

    if 'env' in data:
        env = data['env']
    elif 'evaluation/env' in data:
        env = data['evaluation/env']

    if isinstance(env, RemoteRolloutEnv):
        env = env._wrapped_env
    print("Policy loaded")
    if args.gpu:
        ptu.set_gpu_mode(True)
        policy.to(ptu.device)
    else:
        ptu.set_gpu_mode(False)
        policy.to(ptu.device)
    if isinstance(env, VAEWrappedEnv):
        env.mode(args.mode)
    if args.enable_render or hasattr(env, 'enable_render'):
        # some environments need to be reconfigured for visualization
        env.enable_render()
    if args.multitaskpause:
        env.pause_on_goal = True
    if isinstance(policy, PyTorchModule):
        policy.train(False)
    paths = []
    while True:
        paths.append(
            multitask_rollout(
                env,
                policy,
                max_path_length=args.H,
                render=not args.hide,
                observation_key=data.get('evaluation/observation_key',
                                         'observation'),
                desired_goal_key=data.get('evaluation/desired_goal_key',
                                          'desired_goal'),
            ))
        if hasattr(env, "log_diagnostics"):
            env.log_diagnostics(paths)
        if hasattr(env, "get_diagnostics"):
            for k, v in env.get_diagnostics(paths).items():
                logger.record_tabular(k, v)
        logger.dump_tabular()
Beispiel #9
0
    def eval_multitask_rollout(self):
        get_action_kwargs = dict()
        # if not hasattr(self, "exploration_masking") or self.exploration_masking:
        # masks = np.pad(masks, ((0,0), (0, int(self.replay_buffer.max_num_blocks - self.env.unwrapped.num_blocks))), "constant", constant_values=((0,0), (0, 0)))
        get_action_kwargs['mask'] = get_masks(
            self.env.unwrapped.num_blocks,
            self.replay_buffer.max_num_blocks,
            1,
            keepdim=True)

        return multitask_rollout(
            self.env,
            self.policy,
            self.max_path_length,
            observation_key=self.observation_key,
            desired_goal_key=self.desired_goal_key,
            get_action_kwargs=get_action_kwargs,
            max_num_blocks=self.replay_buffer.max_num_blocks,
            cur_num_blocks=self.env.unwrapped.num_blocks)
Beispiel #10
0
def simulate_policy(args):
    data = torch.load(args.file)
    policy = data['evaluation/policy']
    env = data['evaluation/env']
    print("Policy loaded")
    if args.gpu:
        set_gpu_mode(True)
        policy.cuda()
    while True:
        path = multitask_rollout(
            env,
            policy,
            max_path_length=args.H,
            render=True,
            observation_key='observation',
            desired_goal_key='desired_goal',
        )
        if hasattr(env, "log_diagnostics"):
            env.log_diagnostics([path])
        logger.dump_tabular()
Beispiel #11
0
 def collect_new_paths(self, max_path_length, num_steps):
     paths = []
     num_steps_collected = 0
     while num_steps_collected < num_steps:
         path = multitask_rollout(
             self._env,
             self._policy,
             max_path_length=min(  # Do not go over num_steps
                 max_path_length,
                 num_steps - num_steps_collected,
             ),
             observation_key=self._observation_key,
             desired_goal_key=self._desired_goal_key,
             return_dict_obs=True,
         )
         num_steps_collected += len(path['actions'])
         paths.append(path)
     self._num_paths_total += len(paths)
     self._num_steps_total += num_steps_collected
     self._epoch_paths.extend(paths)
     return paths
def simulate_policy(args):
    # import torch
    # torch.manual_seed(6199)
    if args.pause:
        import ipdb
        ipdb.set_trace()
    data = pickle.load(open(args.file, "rb"))
    policy = data['algorithm'].policy

    num_blocks = 6
    stack_only = True

    # env = data['env']
    env = gym.make(
        F"FetchBlockConstruction_{num_blocks}Blocks_IncrementalReward_DictstateObs_42Rendersize_{stack_only}Stackonly_AllCase-v1"
    )

    env = Monitor(env,
                  force=True,
                  directory="videos/",
                  video_callable=lambda x: x)

    print("Policy and environment loaded")
    if args.gpu:
        ptu.set_gpu_mode(True)
        policy.to(ptu.device)
    if args.enable_render or hasattr(env, 'enable_render'):
        # some environments need to be reconfigured for visualization
        env.enable_render()
    policy.train(False)
    failures = []
    successes = []
    for path_idx in range(100):
        path = multitask_rollout(
            env,
            policy,
            max_path_length=num_blocks * 50,
            animated=not args.hide,
            observation_key='observation',
            desired_goal_key='desired_goal',
            get_action_kwargs=dict(mask=np.ones((1, num_blocks)),
                                   deterministic=True),
        )

        if not is_solved(path, num_blocks):
            failures.append(path)
            print(F"Failed {path_idx}")
        else:
            print(F"Succeeded {path_idx}")
            successes.append(path)
        # if hasattr(env, "log_diagnostics"):
        #     env.log_diagnostics(paths)
        # if hasattr(env, "get_diagnostics"):
        #     for k, v in env.get_diagnostics(paths).items():
        #         logger.record_tabular(k, v)
        # logger.dump_tabular()
    print(f"Success rate {len(successes)/(len(successes) + len(failures))}")
    from rlkit.core.eval_util import get_generic_path_information
    path_info = get_generic_path_information(successes + failures,
                                             num_blocks=num_blocks)
    print(path_info)