예제 #1
0
 def setUp(self):
     self.task = generate_task(task_generator_id="reaching")
     self.env = CausalWorld(task=self.task,
                            enable_visualization=False,
                            action_mode='joint_positions',
                            normalize_observations=False,
                            normalize_actions=False)
     return
예제 #2
0
 def setUp(self):
     self.task = generate_task(task_generator_id="picking")
     self.env = CausalWorld(task=self.task,
                            enable_visualization=False,
                            skip_frame=1,
                            action_mode="end_effector_positions",
                            normalize_actions=False,
                            normalize_observations=False)
     return
예제 #3
0
    def test_timing_profile(self):
        from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
        import time

        kuka_env = KukaGymEnv(renders=False,
                              isDiscrete=False)  # operates at 240 HZ
        task = generate_task(task_generator_id="pushing")
        causal_rl_env = CausalWorld(
            task=task,
            enable_visualization=False,
            seed=0,
            skip_frame=10,
            normalize_actions=False,
            normalize_observations=False)  # operates at 250 HZ
        start = time.time()
        kuka_env.reset()
        end = time.time()
        kuka_reset_time = end - start

        start = time.time()
        causal_rl_env.reset()
        end = time.time()
        causal_rl_reset_time = end - start

        self.assertLess(causal_rl_reset_time, kuka_reset_time * 1.25)

        start = time.time()
        kuka_env.step(kuka_env.action_space.sample())
        end = time.time()
        kuka_step_time = end - start

        start = time.time()
        causal_rl_env.step(causal_rl_env.action_space.sample())
        end = time.time()
        causal_rl_step_time = end - start
        print("time 1", causal_rl_step_time)
        print("time 2", kuka_step_time)
        self.assertLess(causal_rl_step_time, kuka_step_time * 10)

        start = time.time()
        kuka_env.render()
        end = time.time()
        kuka_render_time = end - start

        start = time.time()
        causal_rl_env.render()
        end = time.time()
        causal_rl_render_time = end - start
        self.assertLess(causal_rl_render_time, kuka_render_time * 1.25)

        causal_rl_env.close()
        kuka_env.close()
        return
예제 #4
0
def goal_interventions():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(10):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        goal_intervention_dict = env.sample_new_goal()
        print("new goal chosen: ", goal_intervention_dict)
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
    env.close()
예제 #5
0
def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, sac_config, total_time_steps,
                 validate_every_timesteps, task_name):
    task = generate_task(task_generator_id=task_name,
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=skip_frame,
                      enable_visualization=False,
                      seed=seed_num,
                      max_episode_length=maximum_episode_length)
    set_global_seeds(seed_num)
    policy_kwargs = dict(layers=[256, 256])
    checkpoint_callback = CheckpointCallback(save_freq=int(
        validate_every_timesteps / num_of_envs),
                                             save_path=log_relative_path,
                                             name_prefix='model')
    model = SAC(MlpPolicy,
                env,
                verbose=1,
                policy_kwargs=policy_kwargs,
                **sac_config,
                seed=seed_num)
    model.learn(total_timesteps=total_time_steps,
                tb_log_name="sac",
                callback=checkpoint_callback)
    return
예제 #6
0
def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, her_config, total_time_steps,
                 validate_every_timesteps, task_name):
    task = generate_task(task_generator_id=task_name,
                         dense_reward_weights=np.array([100000, 0, 0, 0]),
                         fractional_reward_weight=0)
    env = CausalWorld(task=task,
                      skip_frame=skip_frame,
                      enable_visualization=False,
                      seed=seed_num,
                      max_episode_length=maximum_episode_length)
    env = HERGoalEnvWrapper(env)
    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=[(0, 1000000000, 1, 0)])
    set_global_seeds(seed_num)
    checkpoint_callback = CheckpointCallback(save_freq=int(
        validate_every_timesteps / num_of_envs),
                                             save_path=log_relative_path,
                                             name_prefix='model')
    model = HER(MlpPolicy,
                env,
                SAC,
                verbose=1,
                policy_kwargs=dict(layers=[256, 256, 256]),
                **her_config,
                seed=seed_num)
    model.learn(total_timesteps=total_time_steps,
                tb_log_name="her_sac",
                callback=checkpoint_callback)
    return
예제 #7
0
 def _init():
     task = generate_task(task_generator_id=task_name)
     env = CausalWorld(task=task,
                       skip_frame=skip_frame,
                       enable_visualization=False,
                       seed=seed_num + rank,
                       max_episode_length=maximum_episode_length)
     return env
예제 #8
0
def example():
    task = generate_task(task_generator_id='pick_and_place')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    intervention_space = env.get_variable_space_used()
    for _ in range(100):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.low)
        intervention = {
            'tool_block': {
                'size':
                np.random.uniform(intervention_space['tool_block']['size'][0],
                                  intervention_space['tool_block']['size'][1])
            }
        }
        env.do_intervention(intervention)
    env.close()
예제 #9
0
def example():
    task = MyOwnTask()
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2000):
        for _ in range(10):
            obs, reward, done, info = \
                env.step(env.action_space.sample())
        random_intervention_dict = env.do_single_random_intervention()
    env.close()
예제 #10
0
def simulate_policy():
    file = './her-sac-fetch-experiment/her-sac-fetch-experiment_2020_07_07_11_11_14_0000--s-0/params.pkl'
    data = torch.load(file)
    policy = data['evaluation/policy']
    policy.reset()

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        a, agent_info = policy.get_action(obs)
        return a

    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=1,
                      seed=0,
                      max_episode_length=2500)
    env = CurriculumWrapper(env,
                            intervention_actors=[GoalInterventionActorPolicy()],
                            actives=[(0, 1000000000, 1, 0)])
    # env = HERGoalEnvWrapper(env)

    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(2500):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
예제 #11
0
def example():
    #initialize env
    task_gen = generate_task(task_generator_id='pushing')
    env = CausalWorld(task_gen, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:

    # No intervention actor is defined until episode number 5
    # Goal intervention actor from episode number 5 to 10 after reset at time step 0
    # Visual intervention actor from episode number 10 to 20 every two episodes after reset at time step 0
    # Random intervention actor from episode number 20 to 25 after reset at time step 0
    # Goal intervention actor from episode number 25 to 30 each at time step 50

    env = CurriculumWrapper(env,
                            intervention_actors=[
                                GoalInterventionActorPolicy(),
                                VisualInterventionActorPolicy(),
                                RandomInterventionActorPolicy(),
                                GoalInterventionActorPolicy()
                            ],
                            actives=[(5, 10, 1, 0), (10, 20, 2, 0),
                                     (20, 25, 1, 0), (25, 30, 1, 50)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(100):
            desired_action = env.action_space.sample()
            obs, reward, done, info = env.step(action=desired_action)
    env.close()
예제 #12
0
def simulate_policy():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=3,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    file = './itr_1097499.pkl'
    data = torch.load(file)
    agent_state_dict = data['agent_state_dict']
    agent = SacAgent(initial_model_state_dict=agent_state_dict)
    agent.initialize(env_spaces=env.spaces)
    agent.eval_mode(itr=data['itr'])

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        agent_info = agent.step(torchify_buffer(obs),
                                prev_action=None,
                                prev_reward=None)
        return agent_info.action.numpy()

    # env = HERGoalEnvWrapper(env)
    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(600):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
예제 #13
0
def _make_env(rank):
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=False,
                      seed=0 + rank,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
예제 #14
0
def end_effector_pos():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=False,
                      normalize_observations=False)
    obs = env.reset()
    for _ in range(100):
        goal_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_dict)
        obs, reward, done, info = env.step(control_policy(env, obs))
        for _ in range(250):
            obs, reward, done, info = env.step(control_policy(env, obs))
    env.close()
예제 #15
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(50):
        random_intervention_dict, success_signal, obs = \
            env.do_single_random_intervention()
        print("The random intervention performed is ",
              random_intervention_dict)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.sample())
    env.close()
예제 #16
0
def without_intervention_split():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention(
            {'stage_color': np.random.uniform(0, 1, [
                3,
            ])})
        print("Intervention success signal", success_signal)
    env.close()
예제 #17
0
파일: ppo.py 프로젝트: thias15/CausalWorld
 def _init():
     task = generate_task(task_generator_id=task_name,
                          dense_reward_weights=np.array(
                              [250, 0, 125, 0, 750, 0, 0, 0.005]),
                          fractional_reward_weight=1,
                          goal_height=0.15,
                          tool_block_mass=0.02)
     env = CausalWorld(task=task,
                       skip_frame=skip_frame,
                       enable_visualization=False,
                       seed=seed_num + rank,
                       max_episode_length=maximum_episode_length)
     return env
예제 #18
0
def with_intervention_split_2():
    task = generate_task(task_generator_id='pushing',
                          variables_space='space_b')
    env = CausalWorld(task=task, enable_visualization=False)
    interventions_space = task.get_intervention_space_a()
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention({
            'stage_color':
                np.random.uniform(interventions_space['stage_color'][0],
                                  interventions_space['stage_color'][1])
        })
        print("Intervention success signal", success_signal)
    env.close()
예제 #19
0
def _make_env(rank):
    task = generate_task(task_generator_id='picking',
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
예제 #20
0
def get_world(task_generator_id,
              task_params,
              world_params,
              enable_visualization=False,
              env_wrappers=np.array([]),
              env_wrappers_args=np.array([])):
    """
    Returns a particular CausalWorld instance with optional wrappers

    :param task_generator_id: (str) id of the task of the environment
    :param task_params: (dict) task params of the environment
    :param world_params: (dict) world_params of the environment
    :param enable_visualization: (bool) if GUI visualization is enabled
    :param env_wrappers: (list) a list of gym wrappers
    :param env_wrappers_args: (list) a list of kwargs for the gym wrappers
    :return: (CausalWorld) a CausalWorld environment instance
    """
    world_params["skip_frame"] = 1
    if task_params is None:
        task = generate_task(task_generator_id)
    else:
        if "task_name" in task_params:
            del task_params["task_name"]
        task = generate_task(task_generator_id, **task_params)
    if "enable_visualization" in world_params.keys():
        world_params_temp = dict(world_params)
        del world_params_temp["enable_visualization"]
        env = CausalWorld(task,
                          **world_params_temp,
                          enable_visualization=enable_visualization)
    else:
        env = CausalWorld(task,
                          **world_params,
                          enable_visualization=enable_visualization)
    for i in range(len(env_wrappers)):
        env = env_wrappers[i](env, **env_wrappers_args[i])
    return env
예제 #21
0
def _make_env(rank):
    task = generate_task('pushing',
                         dense_reward_weights=np.array([2500, 2500, 0]),
                         variables_space='space_a',
                         fractional_reward_weight=100)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0 + rank)
    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=(0, 1e9, 2, 0))
    env = GymEnvWrapper(env)
    return env
예제 #22
0
파일: ppo.py 프로젝트: thias15/CausalWorld
        def _init():
            task = generate_task(task_generator_id=task_name,
                                 dense_reward_weights=np.array(
                                     [100000, 0, 0, 0]),
                                 fractional_reward_weight=0)
            env = CausalWorld(task=task,
                              skip_frame=skip_frame,
                              enable_visualization=False,
                              seed=seed_num + rank,
                              max_episode_length=maximum_episode_length)
            env = CurriculumWrapper(
                env,
                intervention_actors=[GoalInterventionActorPolicy()],
                actives=[(0, 1000000000, 1, 0)])

            return env
예제 #23
0
 def _init():
     task = generate_task(
         task_generator_id='picking',
         joint_positions=[-0.21737874, 0.55613149,
                          -1.09308519, -0.12868997,
                          0.52551013, -1.08006493,
                          -0.00221536, 0.46163487,
                          -1.00948735],
         tool_block_position=[0.0, 0, 0.035],
         fractional_reward_weight=1,
         dense_reward_weights=np.array([0, 10, 0,
                                        1, 1, 0, 0,
                                        0]))
     env = CausalWorld(task=task,
                       skip_frame=skip_frame,
                       enable_visualization=False,
                       seed=seed)
     return env
예제 #24
0
def test_pd_gains():
    #control the robot using pd controller
    np.random.seed(0)
    task = generate_task(task_generator_id='pushing')
    skip_frame = 1
    env = CausalWorld(task=task,
                      enable_visualization=False,
                      skip_frame=skip_frame,
                      normalize_observations=False,
                      normalize_actions=False,
                      seed=0)
    zero_hold = int(5000 / skip_frame)  #reach desired position in 4 secs?
    obs = env.reset()
    #test bounds first

    for _ in range(zero_hold):
        chosen_action = np.zeros(9, )
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    for _ in range(zero_hold):
        chosen_action = env.action_space.high
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    # for i in range(200):
    #     #check for first finger
    #     chosen_action = np.random.uniform(env.action_space.low, env.action_space.high)
    #     chosen_action[3:] = env.action_space.low[3:]
    #     chosen_action[1] = 0
    #     chosen_action[2] = 0
    #     for _ in range(zero_hold):
    #         chosen_action = chosen_action
    #         obs, reward, done, info = env.step(chosen_action)
    #     current_joint_positions = obs[:9]
    #     if(((current_joint_positions - chosen_action) > 0.1).any()):
    #         raise AssertionError("The pd controller failed to reach these values {} but reached instead {}".
    #                              format(chosen_action, current_joint_positions))
    env.close()
예제 #25
0
def smooth_action():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=True,
                      normalize_observations=True,
                      skip_frame=1)
    env = MovingAverageActionEnvWrapper(env)
    for _ in range(50):
        obs = env.reset()
        for _ in range(1000):
            desired_action = np.zeros([
                9,
            ])
            obs, reward, done, info = env.step(desired_action)
    env.close()
예제 #26
0
def example():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=True,
                      seed=0,
                      action_mode="joint_positions",
                      observation_mode="pixel",
                      camera_indicies=[0, 1, 2])
    env.reset()
    for _ in range(5):
        obs, reward, done, info = env.step(env.action_space.sample())
    #show last images
    for i in range(6):
        plt.imshow(obs[i])
        plt.show()
    env.close()
예제 #27
0
def example():
    #initialize env
    task = generate_task(task_generator_id='pick_and_place')
    env = CausalWorld(task, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:
    # Goal intervention actor each episode after reset

    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=[(0, 1000000000, 1, 0)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(300):
            obs, reward, done, info = env.step(env.action_space.low)
    env.close()
예제 #28
0
def get_single_process_env(model_settings, model_path, ckpt_step):
    task = generate_task(model_settings['benchmarks']['task_generator_id'],
                         **model_settings['task_configs'])
    env = CausalWorld(task=task,
                      **model_settings['world_params'],
                      seed=model_settings['world_seed'])
    env = CurriculumWrapper(
        env,
        intervention_actors=model_settings["intervention_actors"],
        actives=model_settings["actives"])
    if ckpt_step is None:
        prefix = 0
    else:
        prefix = ckpt_step
    monitor_file = os.path.join(model_path, str(prefix))
    env = Monitor(env,
                  filename=monitor_file,
                  info_keywords=('fractional_success', ))

    return env
예제 #29
0
def load_world(tracker_relative_path, enable_visualization=False):
    """
    Loads a world again at the same state as when it was saved.

    :param tracker_relative_path: (str) path specifying where the tracker
                                        saved.
    :param enable_visualization: (bool) True if enabling visualization is
                                        needed.
    :return: (causal_world.CausalWorld) loaded CausalWorld env instance.
    """
    tracker = Tracker(file_path=os.path.join(tracker_relative_path, 'tracker'))
    task_stats = tracker.task_stats_log[0]
    wrapper_dict = copy.deepcopy(tracker.world_params['wrappers'])
    del tracker.world_params['wrappers']
    if 'task_name' in task_stats.task_params:
        del task_stats.task_params['task_name']
    task = generate_task(task_generator_id=task_stats.task_name,
                         **task_stats.task_params)
    env = CausalWorld(task,
                      **tracker.world_params,
                      enable_visualization=enable_visualization)
    for wrapper in wrapper_dict:
        if wrapper == 'object_selector':
            env = ObjectSelectorWrapper(env, **wrapper_dict[wrapper])
        elif wrapper == 'delta_action':
            env = DeltaActionEnvWrapper(env, **wrapper_dict[wrapper])
        elif wrapper == 'moving_average_action':
            env = MovingAverageActionEnvWrapper(env, **wrapper_dict[wrapper])
        elif wrapper == 'her_environment':
            env = HERGoalEnvWrapper(env, **wrapper_dict[wrapper])
        elif wrapper == 'curriculum_environment':
            #first initialize actors
            intervention_actors = \
                initialize_intervention_actors(wrapper_dict[wrapper]['actor_params'])
            #initialize intervention curriculum
            env = CurriculumWrapper(env,
                                    intervention_actors=intervention_actors,
                                    actives=wrapper_dict[wrapper]['actives'])
        else:
            raise Exception("wrapper is not known to be loaded")
    return env
예제 #30
0
파일: ddpg.py 프로젝트: thias15/CausalWorld
def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, ddpg_config, total_time_steps,
                 validate_every_timesteps, task_name):
    print("Using MPI for multiprocessing with {} workers".format(
        MPI.COMM_WORLD.Get_size()))
    rank = MPI.COMM_WORLD.Get_rank()
    print("Worker rank: {}".format(rank))
    task = generate_task(task_generator_id=task_name,
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=skip_frame,
                      enable_visualization=False,
                      seed=0,
                      max_episode_length=maximum_episode_length,
                      normalize_actions=False,
                      normalize_observations=False)
    n_actions = env.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    policy_kwargs = dict(layers=[256, 256])
    checkpoint_callback = CheckpointCallback(save_freq=int(
        validate_every_timesteps / num_of_envs),
                                             save_path=log_relative_path,
                                             name_prefix='model')
    model = DDPG(MlpPolicy,
                 env,
                 verbose=2,
                 param_noise=param_noise,
                 action_noise=action_noise,
                 policy_kwargs=policy_kwargs,
                 **ddpg_config)
    model.learn(total_timesteps=total_time_steps,
                tb_log_name="ddpg",
                callback=checkpoint_callback)
    return