Esempio n. 1
0
    def test_determinism(self):
        task = generate_task(task_generator_id="stacked_blocks")
        observations_v1 = []
        observations_v2 = []
        observations_v3 = []
        rewards_v1 = []
        rewards_v2 = []
        rewards_v3 = []
        horizon = 30

        env_v1 = CausalWorld(task=task, enable_visualization=False, seed=27)

        obs = env_v1.reset()
        observations_v1.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v1.step(env_v1.action_space.low)
            observations_v1.append(obs)
            rewards_v1.append(reward)
        env_v1.close()

        task = generate_task(task_generator_id="stacked_blocks")
        env_v2 = CausalWorld(task=task, enable_visualization=False, seed=27)

        obs = env_v2.reset()
        observations_v2.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v2.step(env_v2.action_space.low)
            observations_v2.append(obs)
            rewards_v2.append(reward)
        env_v2.close()

        task = generate_task(task_generator_id="stacked_blocks")
        env_v3 = CausalWorld(task=task, enable_visualization=False, seed=54)

        obs = env_v3.reset()
        observations_v3.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v3.step(env_v3.action_space.low)
            observations_v3.append(obs)
            rewards_v3.append(reward)
        env_v3.close()

        assert all(
            np.array_equal(observations_v1[i], observations_v2[i])
            for i in range(horizon))
        assert rewards_v1 == rewards_v2
        assert all(
            np.array_equal(observations_v1[i], observations_v3[i])
            for i in range(horizon))
        assert rewards_v1 == rewards_v3
Esempio n. 2
0
 def test_parallelism(self):
     task = generate_task(task_generator_id="stacked_blocks")
     env1 = CausalWorld(task=task, enable_visualization=False, seed=0)
     env1.reset()
     task2 = generate_task(task_generator_id="stacked_blocks")
     env2 = CausalWorld(task=task2, enable_visualization=False, seed=0)
     observations_env1_v1, rewards_env1_v1, _, _ = env1.step(
         env1.action_space.low)
     env2.reset()
     observations_env2_v1, rewards_env2_v1, _, _ = env2.step(
         env2.action_space.low)
     env1.close()
     env2.close()
     assert np.array_equal(observations_env2_v1, observations_env1_v1)
     return
Esempio n. 3
0
def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, sac_config, total_time_steps,
                 validate_every_timesteps, task_name):
    task = generate_task(task_generator_id=task_name,
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=skip_frame,
                      enable_visualization=False,
                      seed=seed_num,
                      max_episode_length=maximum_episode_length)
    set_global_seeds(seed_num)
    policy_kwargs = dict(layers=[256, 256])
    checkpoint_callback = CheckpointCallback(save_freq=int(
        validate_every_timesteps / num_of_envs),
                                             save_path=log_relative_path,
                                             name_prefix='model')
    model = SAC(MlpPolicy,
                env,
                verbose=1,
                policy_kwargs=policy_kwargs,
                **sac_config,
                seed=seed_num)
    model.learn(total_timesteps=total_time_steps,
                tb_log_name="sac",
                callback=checkpoint_callback)
    return
Esempio n. 4
0
def simulate_policy():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=3,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    file = './itr_1097499.pkl'
    data = torch.load(file)
    agent_state_dict = data['agent_state_dict']
    agent = SacAgent(initial_model_state_dict=agent_state_dict)
    agent.initialize(env_spaces=env.spaces)
    agent.eval_mode(itr=data['itr'])

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        agent_info = agent.step(torchify_buffer(obs),
                                prev_action=None,
                                prev_reward=None)
        return agent_info.action.numpy()

    # env = HERGoalEnvWrapper(env)
    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(600):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
Esempio n. 5
0
def example():
    # This tutorial shows how to view policies of trained actors

    task = generate_task(task_generator_id='picking')
    world_params = dict()
    world_params["skip_frame"] = 3
    world_params["seed"] = 0
    stable_baselines_policy_path = "./model_2000000_steps.zip"
    model = SAC.load(stable_baselines_policy_path)

    # define a method for the policy fn of your trained model
    def policy_fn(obs):
        return model.predict(obs, deterministic=True)[0]

    # # Record a video of the policy is done in one line
    viewer.record_video_of_policy(task=task,
                                  world_params=world_params,
                                  policy_fn=policy_fn,
                                  file_name="pushing_video",
                                  number_of_resets=10,
                                  max_time_steps=10 * 100)

    # Similarly for interactive visualization in the GUI
    viewer.view_policy(task=task,
                       world_params=world_params,
                       policy_fn=policy_fn,
                       max_time_steps=40 * 600,
                       number_of_resets=40)
Esempio n. 6
0
def simulate_policy():
    file = './her-sac-fetch-experiment/her-sac-fetch-experiment_2020_07_07_11_11_14_0000--s-0/params.pkl'
    data = torch.load(file)
    policy = data['evaluation/policy']
    policy.reset()

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        a, agent_info = policy.get_action(obs)
        return a

    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=1,
                      seed=0,
                      max_episode_length=2500)
    env = CurriculumWrapper(env,
                            intervention_actors=[GoalInterventionActorPolicy()],
                            actives=[(0, 1000000000, 1, 0)])
    # env = HERGoalEnvWrapper(env)

    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(2500):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
Esempio n. 7
0
def example():
    # This tutorial shows how to view policies of trained actors

    task = generate_task(task_generator_id='pick_and_place')
    world_params = dict()
    world_params["skip_frame"] = 3
    world_params["seed"] = 0
    stable_baselines_policy_path = "./model_100000000_steps.zip"
    model = PPO2.load(stable_baselines_policy_path)

    # define a method for the policy fn of your trained model
    def policy_fn(obs):
        return model.predict(obs, deterministic=True)[0]

    # Similarly for interactive visualization in the GUI
    viewer.view_policy(task=task,
                       world_params=world_params,
                       policy_fn=policy_fn,
                       max_time_steps=40 * 600,
                       number_of_resets=40,
                       env_wrappers=[CurriculumWrapper],
                       env_wrappers_args=[{
                           'intervention_actors':
                           [GoalInterventionActorPolicy()],
                           'actives': [(0, 1000000000, 1, 0)]
                       }])
Esempio n. 8
0
def example():
    #initialize env
    task_gen = generate_task(task_generator_id='pushing')
    env = CausalWorld(task_gen, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:

    # No intervention actor is defined until episode number 5
    # Goal intervention actor from episode number 5 to 10 after reset at time step 0
    # Visual intervention actor from episode number 10 to 20 every two episodes after reset at time step 0
    # Random intervention actor from episode number 20 to 25 after reset at time step 0
    # Goal intervention actor from episode number 25 to 30 each at time step 50

    env = CurriculumWrapper(env,
                            intervention_actors=[
                                GoalInterventionActorPolicy(),
                                VisualInterventionActorPolicy(),
                                RandomInterventionActorPolicy(),
                                GoalInterventionActorPolicy()
                            ],
                            actives=[(5, 10, 1, 0), (10, 20, 2, 0),
                                     (20, 25, 1, 0), (25, 30, 1, 50)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(100):
            desired_action = env.action_space.sample()
            obs, reward, done, info = env.step(action=desired_action)
    env.close()
Esempio n. 9
0
def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, her_config, total_time_steps,
                 validate_every_timesteps, task_name):
    task = generate_task(task_generator_id=task_name,
                         dense_reward_weights=np.array([100000, 0, 0, 0]),
                         fractional_reward_weight=0)
    env = CausalWorld(task=task,
                      skip_frame=skip_frame,
                      enable_visualization=False,
                      seed=seed_num,
                      max_episode_length=maximum_episode_length)
    env = HERGoalEnvWrapper(env)
    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=[(0, 1000000000, 1, 0)])
    set_global_seeds(seed_num)
    checkpoint_callback = CheckpointCallback(save_freq=int(
        validate_every_timesteps / num_of_envs),
                                             save_path=log_relative_path,
                                             name_prefix='model')
    model = HER(MlpPolicy,
                env,
                SAC,
                verbose=1,
                policy_kwargs=dict(layers=[256, 256, 256]),
                **her_config,
                seed=seed_num)
    model.learn(total_timesteps=total_time_steps,
                tb_log_name="her_sac",
                callback=checkpoint_callback)
    return
Esempio n. 10
0
 def setUp(self):
     self.task = generate_task(task_generator_id="reaching")
     self.env = CausalWorld(task=self.task,
                            enable_visualization=False,
                            action_mode='joint_positions',
                            normalize_observations=False,
                            normalize_actions=False)
     return
Esempio n. 11
0
 def _init():
     task = generate_task(task_generator_id=task_name)
     env = CausalWorld(task=task,
                       skip_frame=skip_frame,
                       enable_visualization=False,
                       seed=seed_num + rank,
                       max_episode_length=maximum_episode_length)
     return env
Esempio n. 12
0
 def setUp(self):
     self.task = generate_task(task_generator_id="picking")
     self.env = CausalWorld(task=self.task,
                            enable_visualization=False,
                            skip_frame=1,
                            action_mode="end_effector_positions",
                            normalize_actions=False,
                            normalize_observations=False)
     return
Esempio n. 13
0
def _make_env(rank):
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=False,
                      seed=0 + rank,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
Esempio n. 14
0
    def test_timing_profile(self):
        from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
        import time

        kuka_env = KukaGymEnv(renders=False,
                              isDiscrete=False)  # operates at 240 HZ
        task = generate_task(task_generator_id="pushing")
        causal_rl_env = CausalWorld(
            task=task,
            enable_visualization=False,
            seed=0,
            skip_frame=10,
            normalize_actions=False,
            normalize_observations=False)  # operates at 250 HZ
        start = time.time()
        kuka_env.reset()
        end = time.time()
        kuka_reset_time = end - start

        start = time.time()
        causal_rl_env.reset()
        end = time.time()
        causal_rl_reset_time = end - start

        self.assertLess(causal_rl_reset_time, kuka_reset_time * 1.25)

        start = time.time()
        kuka_env.step(kuka_env.action_space.sample())
        end = time.time()
        kuka_step_time = end - start

        start = time.time()
        causal_rl_env.step(causal_rl_env.action_space.sample())
        end = time.time()
        causal_rl_step_time = end - start
        print("time 1", causal_rl_step_time)
        print("time 2", kuka_step_time)
        self.assertLess(causal_rl_step_time, kuka_step_time * 10)

        start = time.time()
        kuka_env.render()
        end = time.time()
        kuka_render_time = end - start

        start = time.time()
        causal_rl_env.render()
        end = time.time()
        causal_rl_render_time = end - start
        self.assertLess(causal_rl_render_time, kuka_render_time * 1.25)

        causal_rl_env.close()
        kuka_env.close()
        return
Esempio n. 15
0
def goal_interventions():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(10):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        goal_intervention_dict = env.sample_new_goal()
        print("new goal chosen: ", goal_intervention_dict)
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
    env.close()
Esempio n. 16
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(50):
        random_intervention_dict, success_signal, obs = \
            env.do_single_random_intervention()
        print("The random intervention performed is ",
              random_intervention_dict)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.sample())
    env.close()
Esempio n. 17
0
def example():
    # This tutorial shows how to view a random policy on the pyramid task

    task = generate_task(task_generator_id='picking')
    world_params = dict()
    world_params["skip_frame"] = 3
    world_params["seed"] = 200

    viewer.record_video_of_random_policy(task=task,
                                         world_params=world_params,
                                         file_name="picking_video",
                                         number_of_resets=1,
                                         max_time_steps=300)
Esempio n. 18
0
 def _init():
     task = generate_task(task_generator_id=task_name,
                          dense_reward_weights=np.array(
                              [250, 0, 125, 0, 750, 0, 0, 0.005]),
                          fractional_reward_weight=1,
                          goal_height=0.15,
                          tool_block_mass=0.02)
     env = CausalWorld(task=task,
                       skip_frame=skip_frame,
                       enable_visualization=False,
                       seed=seed_num + rank,
                       max_episode_length=maximum_episode_length)
     return env
Esempio n. 19
0
def without_intervention_split():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention(
            {'stage_color': np.random.uniform(0, 1, [
                3,
            ])})
        print("Intervention success signal", success_signal)
    env.close()
Esempio n. 20
0
def example():
    # Here you learn how to record/ log entire episodes into a directory
    # to reuse it later e.g. for reviewing logged episodes or using this
    # data for pre-training  policies.

    # Construct a data_recorder that keeps track of every change in the environment
    # We set the recording dumb frequency of episodes into log_files to 11 (default is 100)
    data_recorder = DataRecorder(output_directory='pushing_episodes',
                                 rec_dumb_frequency=11)

    # Pass the data recorder to the World
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      data_recorder=data_recorder)

    # Record some episodes
    for _ in range(23):
        env.reset()
        for _ in range(50):
            env.step(env.action_space.sample())
    env.close()

    # Load the logged episodes
    data = DataLoader(episode_directory='pushing_episodes')
    episode = data.get_episode(14)

    # Initialize a new environment according a specific episode and replay it
    task = generate_task(episode.task_name, **episode.task_params)
    env = CausalWorld(task, **episode.world_params, enable_visualization=True)
    env.set_starting_state(episode.initial_full_state,
                           check_bounds=False)
    for action in episode.robot_actions:
        env.step(action)
    env.close()

    # You can achieve the same by using the viewer module in one line
    viewer.view_episode(episode)
Esempio n. 21
0
def _make_env(rank):
    task = generate_task(task_generator_id='picking',
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
Esempio n. 22
0
def get_world(task_generator_id,
              task_params,
              world_params,
              enable_visualization=False,
              env_wrappers=np.array([]),
              env_wrappers_args=np.array([])):
    """
    Returns a particular CausalWorld instance with optional wrappers

    :param task_generator_id: (str) id of the task of the environment
    :param task_params: (dict) task params of the environment
    :param world_params: (dict) world_params of the environment
    :param enable_visualization: (bool) if GUI visualization is enabled
    :param env_wrappers: (list) a list of gym wrappers
    :param env_wrappers_args: (list) a list of kwargs for the gym wrappers
    :return: (CausalWorld) a CausalWorld environment instance
    """
    world_params["skip_frame"] = 1
    if task_params is None:
        task = generate_task(task_generator_id)
    else:
        if "task_name" in task_params:
            del task_params["task_name"]
        task = generate_task(task_generator_id, **task_params)
    if "enable_visualization" in world_params.keys():
        world_params_temp = dict(world_params)
        del world_params_temp["enable_visualization"]
        env = CausalWorld(task,
                          **world_params_temp,
                          enable_visualization=enable_visualization)
    else:
        env = CausalWorld(task,
                          **world_params,
                          enable_visualization=enable_visualization)
    for i in range(len(env_wrappers)):
        env = env_wrappers[i](env, **env_wrappers_args[i])
    return env
Esempio n. 23
0
def _make_env(rank):
    task = generate_task('pushing',
                         dense_reward_weights=np.array([2500, 2500, 0]),
                         variables_space='space_a',
                         fractional_reward_weight=100)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0 + rank)
    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=(0, 1e9, 2, 0))
    env = GymEnvWrapper(env)
    return env
Esempio n. 24
0
def end_effector_pos():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=False,
                      normalize_observations=False)
    obs = env.reset()
    for _ in range(100):
        goal_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_dict)
        obs, reward, done, info = env.step(control_policy(env, obs))
        for _ in range(250):
            obs, reward, done, info = env.step(control_policy(env, obs))
    env.close()
Esempio n. 25
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.set_starting_state(
        {'goal_block': {
            'cartesian_position': [0.1, 0.1, 0.1]
        }})
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.reset_default_state()
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.reset()
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.close()
Esempio n. 26
0
        def _init():
            task = generate_task(task_generator_id=task_name,
                                 dense_reward_weights=np.array(
                                     [100000, 0, 0, 0]),
                                 fractional_reward_weight=0)
            env = CausalWorld(task=task,
                              skip_frame=skip_frame,
                              enable_visualization=False,
                              seed=seed_num + rank,
                              max_episode_length=maximum_episode_length)
            env = CurriculumWrapper(
                env,
                intervention_actors=[GoalInterventionActorPolicy()],
                actives=[(0, 1000000000, 1, 0)])

            return env
Esempio n. 27
0
def with_intervention_split_2():
    task = generate_task(task_generator_id='pushing',
                          variables_space='space_b')
    env = CausalWorld(task=task, enable_visualization=False)
    interventions_space = task.get_intervention_space_a()
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention({
            'stage_color':
                np.random.uniform(interventions_space['stage_color'][0],
                                  interventions_space['stage_color'][1])
        })
        print("Intervention success signal", success_signal)
    env.close()
Esempio n. 28
0
def test_pd_gains():
    #control the robot using pd controller
    np.random.seed(0)
    task = generate_task(task_generator_id='pushing')
    skip_frame = 1
    env = CausalWorld(task=task,
                      enable_visualization=False,
                      skip_frame=skip_frame,
                      normalize_observations=False,
                      normalize_actions=False,
                      seed=0)
    zero_hold = int(5000 / skip_frame)  #reach desired position in 4 secs?
    obs = env.reset()
    #test bounds first

    for _ in range(zero_hold):
        chosen_action = np.zeros(9, )
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    for _ in range(zero_hold):
        chosen_action = env.action_space.high
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    # for i in range(200):
    #     #check for first finger
    #     chosen_action = np.random.uniform(env.action_space.low, env.action_space.high)
    #     chosen_action[3:] = env.action_space.low[3:]
    #     chosen_action[1] = 0
    #     chosen_action[2] = 0
    #     for _ in range(zero_hold):
    #         chosen_action = chosen_action
    #         obs, reward, done, info = env.step(chosen_action)
    #     current_joint_positions = obs[:9]
    #     if(((current_joint_positions - chosen_action) > 0.1).any()):
    #         raise AssertionError("The pd controller failed to reach these values {} but reached instead {}".
    #                              format(chosen_action, current_joint_positions))
    env.close()
Esempio n. 29
0
def example():
    task = generate_task(task_generator_id='pick_and_place')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    intervention_space = env.get_variable_space_used()
    for _ in range(100):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.low)
        intervention = {
            'tool_block': {
                'size':
                np.random.uniform(intervention_space['tool_block']['size'][0],
                                  intervention_space['tool_block']['size'][1])
            }
        }
        env.do_intervention(intervention)
    env.close()
Esempio n. 30
0
def smooth_action():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=True,
                      normalize_observations=True,
                      skip_frame=1)
    env = MovingAverageActionEnvWrapper(env)
    for _ in range(50):
        obs = env.reset()
        for _ in range(1000):
            desired_action = np.zeros([
                9,
            ])
            obs, reward, done, info = env.step(desired_action)
    env.close()