Esempio n. 1
0
def example():
    task = MyOwnTask()
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2000):
        for _ in range(10):
            obs, reward, done, info = \
                env.step(env.action_space.sample())
        random_intervention_dict = env.do_single_random_intervention()
    env.close()
Esempio n. 2
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(50):
        random_intervention_dict, success_signal, obs = \
            env.do_single_random_intervention()
        print("The random intervention performed is ",
              random_intervention_dict)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.sample())
    env.close()
Esempio n. 3
0
def goal_interventions():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(10):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        goal_intervention_dict = env.sample_new_goal()
        print("new goal chosen: ", goal_intervention_dict)
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
    env.close()
Esempio n. 4
0
    def test_timing_profile(self):
        from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
        import time

        kuka_env = KukaGymEnv(renders=False,
                              isDiscrete=False)  # operates at 240 HZ
        task = generate_task(task_generator_id="pushing")
        causal_rl_env = CausalWorld(
            task=task,
            enable_visualization=False,
            seed=0,
            skip_frame=10,
            normalize_actions=False,
            normalize_observations=False)  # operates at 250 HZ
        start = time.time()
        kuka_env.reset()
        end = time.time()
        kuka_reset_time = end - start

        start = time.time()
        causal_rl_env.reset()
        end = time.time()
        causal_rl_reset_time = end - start

        self.assertLess(causal_rl_reset_time, kuka_reset_time * 1.25)

        start = time.time()
        kuka_env.step(kuka_env.action_space.sample())
        end = time.time()
        kuka_step_time = end - start

        start = time.time()
        causal_rl_env.step(causal_rl_env.action_space.sample())
        end = time.time()
        causal_rl_step_time = end - start
        print("time 1", causal_rl_step_time)
        print("time 2", kuka_step_time)
        self.assertLess(causal_rl_step_time, kuka_step_time * 10)

        start = time.time()
        kuka_env.render()
        end = time.time()
        kuka_render_time = end - start

        start = time.time()
        causal_rl_env.render()
        end = time.time()
        causal_rl_render_time = end - start
        self.assertLess(causal_rl_render_time, kuka_render_time * 1.25)

        causal_rl_env.close()
        kuka_env.close()
        return
Esempio n. 5
0
def without_intervention_split():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention(
            {'stage_color': np.random.uniform(0, 1, [
                3,
            ])})
        print("Intervention success signal", success_signal)
    env.close()
Esempio n. 6
0
    def test_determinism(self):
        task = generate_task(task_generator_id="stacked_blocks")
        observations_v1 = []
        observations_v2 = []
        observations_v3 = []
        rewards_v1 = []
        rewards_v2 = []
        rewards_v3 = []
        horizon = 30

        env_v1 = CausalWorld(task=task, enable_visualization=False, seed=27)

        obs = env_v1.reset()
        observations_v1.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v1.step(env_v1.action_space.low)
            observations_v1.append(obs)
            rewards_v1.append(reward)
        env_v1.close()

        task = generate_task(task_generator_id="stacked_blocks")
        env_v2 = CausalWorld(task=task, enable_visualization=False, seed=27)

        obs = env_v2.reset()
        observations_v2.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v2.step(env_v2.action_space.low)
            observations_v2.append(obs)
            rewards_v2.append(reward)
        env_v2.close()

        task = generate_task(task_generator_id="stacked_blocks")
        env_v3 = CausalWorld(task=task, enable_visualization=False, seed=54)

        obs = env_v3.reset()
        observations_v3.append(obs)
        for _ in range(horizon):
            obs, reward, done, info = env_v3.step(env_v3.action_space.low)
            observations_v3.append(obs)
            rewards_v3.append(reward)
        env_v3.close()

        assert all(
            np.array_equal(observations_v1[i], observations_v2[i])
            for i in range(horizon))
        assert rewards_v1 == rewards_v2
        assert all(
            np.array_equal(observations_v1[i], observations_v3[i])
            for i in range(horizon))
        assert rewards_v1 == rewards_v3
Esempio n. 7
0
 def test_parallelism(self):
     task = generate_task(task_generator_id="stacked_blocks")
     env1 = CausalWorld(task=task, enable_visualization=False, seed=0)
     env1.reset()
     task2 = generate_task(task_generator_id="stacked_blocks")
     env2 = CausalWorld(task=task2, enable_visualization=False, seed=0)
     observations_env1_v1, rewards_env1_v1, _, _ = env1.step(
         env1.action_space.low)
     env2.reset()
     observations_env2_v1, rewards_env2_v1, _, _ = env2.step(
         env2.action_space.low)
     env1.close()
     env2.close()
     assert np.array_equal(observations_env2_v1, observations_env1_v1)
     return
Esempio n. 8
0
def simulate_policy():
    file = './her-sac-fetch-experiment/her-sac-fetch-experiment_2020_07_07_11_11_14_0000--s-0/params.pkl'
    data = torch.load(file)
    policy = data['evaluation/policy']
    policy.reset()

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        a, agent_info = policy.get_action(obs)
        return a

    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=1,
                      seed=0,
                      max_episode_length=2500)
    env = CurriculumWrapper(env,
                            intervention_actors=[GoalInterventionActorPolicy()],
                            actives=[(0, 1000000000, 1, 0)])
    # env = HERGoalEnvWrapper(env)

    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(2500):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
Esempio n. 9
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.set_starting_state(
        {'goal_block': {
            'cartesian_position': [0.1, 0.1, 0.1]
        }})
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.reset_default_state()
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.reset()
    for _ in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
    env.close()
Esempio n. 10
0
def with_intervention_split_2():
    task = generate_task(task_generator_id='pushing',
                          variables_space='space_b')
    env = CausalWorld(task=task, enable_visualization=False)
    interventions_space = task.get_intervention_space_a()
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention({
            'stage_color':
                np.random.uniform(interventions_space['stage_color'][0],
                                  interventions_space['stage_color'][1])
        })
        print("Intervention success signal", success_signal)
    env.close()
Esempio n. 11
0
def example():
    #initialize env
    task_gen = generate_task(task_generator_id='pushing')
    env = CausalWorld(task_gen, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:

    # No intervention actor is defined until episode number 5
    # Goal intervention actor from episode number 5 to 10 after reset at time step 0
    # Visual intervention actor from episode number 10 to 20 every two episodes after reset at time step 0
    # Random intervention actor from episode number 20 to 25 after reset at time step 0
    # Goal intervention actor from episode number 25 to 30 each at time step 50

    env = CurriculumWrapper(env,
                            intervention_actors=[
                                GoalInterventionActorPolicy(),
                                VisualInterventionActorPolicy(),
                                RandomInterventionActorPolicy(),
                                GoalInterventionActorPolicy()
                            ],
                            actives=[(5, 10, 1, 0), (10, 20, 2, 0),
                                     (20, 25, 1, 0), (25, 30, 1, 50)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(100):
            desired_action = env.action_space.sample()
            obs, reward, done, info = env.step(action=desired_action)
    env.close()
Esempio n. 12
0
def simulate_policy():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=3,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    file = './itr_1097499.pkl'
    data = torch.load(file)
    agent_state_dict = data['agent_state_dict']
    agent = SacAgent(initial_model_state_dict=agent_state_dict)
    agent.initialize(env_spaces=env.spaces)
    agent.eval_mode(itr=data['itr'])

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        agent_info = agent.step(torchify_buffer(obs),
                                prev_action=None,
                                prev_reward=None)
        return agent_info.action.numpy()

    # env = HERGoalEnvWrapper(env)
    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(600):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
Esempio n. 13
0
def example():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=True,
                      seed=0,
                      action_mode="joint_positions",
                      observation_mode="pixel",
                      camera_indicies=[0, 1, 2])
    env.reset()
    for _ in range(5):
        obs, reward, done, info = env.step(env.action_space.sample())
    #show last images
    for i in range(6):
        plt.imshow(obs[i])
        plt.show()
    env.close()
Esempio n. 14
0
def example():
    task = generate_task(task_generator_id='pick_and_place')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    intervention_space = env.get_variable_space_used()
    for _ in range(100):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.low)
        intervention = {
            'tool_block': {
                'size':
                np.random.uniform(intervention_space['tool_block']['size'][0],
                                  intervention_space['tool_block']['size'][1])
            }
        }
        env.do_intervention(intervention)
    env.close()
Esempio n. 15
0
def example():
    task = generate_task(task_generator_id="creative_stacked_blocks")
    env = CausalWorld(task=task, enable_visualization=False, seed=0)
    actions = [env.action_space.sample() for _ in range(200)]
    env.reset()
    observations_1 = []
    rewards_1 = []
    for i in range(200):
        observations, rewards, _, _ = env.step(actions[i])
        if i == 100:
            state = env.get_state()
        observations_1.append(observations)
        rewards_1.append(rewards)
    env.set_state(state)
    for i in range(101, 200):
        observations, rewards, _, _ = env.step(actions[i])
        assert np.array_equal(observations_1[i], observations)
    env.close()
Esempio n. 16
0
def privileged_information():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.expose_potential_partial_solution()
    env.reset()
    for _ in range(10):
        goal_intervention_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
        for i in range(1000):
            obs, reward, done, info = env.step(env.action_space.low)
        print("now we solve it with privileged info")
        success_signal, obs = env.do_intervention(
            info['possible_solution_intervention'], check_bounds=False)
        print("Partial Solution Setting Intervention Succes Signal",
              success_signal)
        for i in range(500):
            obs, reward, done, info = env.step(env.action_space.low)
    env.close()
Esempio n. 17
0
def example():
    # Here you learn how to record/ log entire episodes into a directory
    # to reuse it later e.g. for reviewing logged episodes or using this
    # data for pre-training  policies.

    # Construct a data_recorder that keeps track of every change in the environment
    # We set the recording dumb frequency of episodes into log_files to 11 (default is 100)
    data_recorder = DataRecorder(output_directory='pushing_episodes',
                                 rec_dumb_frequency=11)

    # Pass the data recorder to the World
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      data_recorder=data_recorder)

    # Record some episodes
    for _ in range(23):
        env.reset()
        for _ in range(50):
            env.step(env.action_space.sample())
    env.close()

    # Load the logged episodes
    data = DataLoader(episode_directory='pushing_episodes')
    episode = data.get_episode(14)

    # Initialize a new environment according a specific episode and replay it
    task = generate_task(episode.task_name, **episode.task_params)
    env = CausalWorld(task, **episode.world_params, enable_visualization=True)
    env.set_starting_state(episode.initial_full_state,
                           check_bounds=False)
    for action in episode.robot_actions:
        env.step(action)
    env.close()

    # You can achieve the same by using the viewer module in one line
    viewer.view_episode(episode)
Esempio n. 18
0
 def test_reset_default_state(self):
     task = generate_task(task_generator_id="picking")
     env = CausalWorld(task=task, enable_visualization=False, seed=0)
     actions = [env.action_space.sample() for _ in range(200)]
     observations_1 = []
     rewards_1 = []
     env.reset()
     for i in range(200):
         observations, rewards, _, _ = env.step(actions[i])
         observations_1.append(observations)
         rewards_1.append(rewards)
     env.set_starting_state(
         {'goal_block': {
             'cylindrical_position': [0.1, np.pi, 0.1]
         }})
     for i in range(200):
         observations, rewards, _, _ = env.step(actions[i])
     env.reset_default_state()
     for i in range(200):
         observations, rewards, _, _ = env.step(actions[i])
         assert np.array_equal(observations_1[i], observations)
     env.close()
     return
Esempio n. 19
0
def example():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    counter = 0
    for _ in range(1):
        for i in range(210):
            obs, reward, done, info = env.step(env.action_space.low)
            if i % 50 == 0 and i > 0:
                print(i)
                intervention = {'goal_block': {'cartesian_position':
                                                   [0, -0.08+(0.04*counter),
                                                    0.0325],
                                               'color':[0, 0, 1]}}
                env.do_intervention(intervention, check_bounds=False)
                counter += 1
                print("intervention")
            if i == 201:
                intervention = {'goal_block': {
                    'cartesian_position': [0, 0.08,  0.0325],
                    'color': [0, 1, 0]}}
                env.do_intervention(intervention, check_bounds=False)
    env.close()
Esempio n. 20
0
def example():
    #initialize env
    task_gen = generate_task(task_generator_id='pushing')
    env = CausalWorld(task_gen, skip_frame=1, enable_visualization=True)
    env = DeltaActionEnvWrapper(env)
    env = CurriculumWrapper(
        env,
        intervention_actors=[VisualInterventionActorPolicy()],
        actives=[(0, 20, 1, 0)])

    for reset_idx in range(10):
        obs = env.reset()
        for time in range(15):
            obs, reward, done, info = env.step(action=np.zeros(9,))
    env.save_world('./')
    env.close()
    #now load it again

    env = load_world(tracker_relative_path='./', enable_visualization=True)
    for reset_idx in range(10):
        obs = env.reset()
        for time in range(15):
            obs, reward, done, info = env.step(action=np.zeros(9,))
Esempio n. 21
0
def run_mpc():
    task = generate_task(
        task_generator_id='picking',
        joint_positions=[-0.21737874, 0.55613149,
                         -1.09308519, -0.12868997,
                         0.52551013, -1.08006493,
                         -0.00221536, 0.46163487,
                         -1.00948735],
        tool_block_position=[0.0, 0, 0.035],
        fractional_reward_weight=1,
        dense_reward_weights=np.array([0, 10, 0,
                                       1, 1, 0, 0,
                                       0]))
    env = CausalWorld(task=task,
                      skip_frame=1,
                      enable_visualization=False,
                      seed=seed)
    true_model = SimulatorModel(_make_env, parallel_agents=parallel_agents)
    optimizer = CrossEntropyMethod(
        planning_horizon=horizon_length,
        max_iterations=max_iterations,
        population_size=num_of_particles,
        num_elite=num_elite,
        action_upper_bound=np.array(env.action_space.high),
        action_lower_bound=np.array(env.action_space.low),
        model=true_model)
    env.reset()
    actions = optimizer.get_actions()
    true_model.end_sim()
    recorder = VideoRecorder(env, 'picking.mp4')
    for i in range(horizon_length):
        for _ in range(skip_frame):
            recorder.capture_frame()
            obs, reward, done, info = env.step(actions[i])
    recorder.capture_frame()
    recorder.close()
    env.close()
Esempio n. 22
0
    def test_goal_intervention(self):
        task = generate_task(task_generator_id='picking')
        env = CausalWorld(task=task,
                          enable_visualization=False,
                          normalize_observations=False)
        for _ in range(10):
            invalid_interventions_before = env.get_tracker(
            ).invalid_intervention_steps
            new_goal = env.sample_new_goal()
            env.set_starting_state(interventions_dict=new_goal)
            invalid_interventions_after = env.get_tracker(
            ).invalid_intervention_steps
            for _ in range(2):
                for _ in range(100):
                    obs, reward, done, info = env.step(env.action_space.low)
                    #TODO: this shouldnt be the case when the benchmark is complete
                    #Its a hack for now
                    if invalid_interventions_before == invalid_interventions_after:
                        assert np.array_equal(
                            cyl2cart(new_goal['goal_block']
                                     ['cylindrical_position']), obs[-7:-4])
                env.reset()

        env.close()
Esempio n. 23
0
def end_effector_pos():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=False,
                      normalize_observations=False)
    obs = env.reset()
    for _ in range(100):
        goal_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_dict)
        obs, reward, done, info = env.step(control_policy(env, obs))
        for _ in range(250):
            obs, reward, done, info = env.step(control_policy(env, obs))
    env.close()
Esempio n. 24
0
def test_pd_gains():
    #control the robot using pd controller
    np.random.seed(0)
    task = generate_task(task_generator_id='pushing')
    skip_frame = 1
    env = CausalWorld(task=task,
                      enable_visualization=False,
                      skip_frame=skip_frame,
                      normalize_observations=False,
                      normalize_actions=False,
                      seed=0)
    zero_hold = int(5000 / skip_frame)  #reach desired position in 4 secs?
    obs = env.reset()
    #test bounds first

    for _ in range(zero_hold):
        chosen_action = np.zeros(9, )
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    for _ in range(zero_hold):
        chosen_action = env.action_space.high
        obs, reward, done, info = env.step(chosen_action)
    current_joint_positions = obs[1:10]
    if (((current_joint_positions - chosen_action) > 0.1).any()):
        raise AssertionError(
            "The pd controller failed to reach these values {} but reached instead {}"
            .format(chosen_action, current_joint_positions))

    # for i in range(200):
    #     #check for first finger
    #     chosen_action = np.random.uniform(env.action_space.low, env.action_space.high)
    #     chosen_action[3:] = env.action_space.low[3:]
    #     chosen_action[1] = 0
    #     chosen_action[2] = 0
    #     for _ in range(zero_hold):
    #         chosen_action = chosen_action
    #         obs, reward, done, info = env.step(chosen_action)
    #     current_joint_positions = obs[:9]
    #     if(((current_joint_positions - chosen_action) > 0.1).any()):
    #         raise AssertionError("The pd controller failed to reach these values {} but reached instead {}".
    #                              format(chosen_action, current_joint_positions))
    env.close()
Esempio n. 25
0
def smooth_action():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=True,
                      normalize_observations=True,
                      skip_frame=1)
    env = MovingAverageActionEnvWrapper(env)
    for _ in range(50):
        obs = env.reset()
        for _ in range(1000):
            desired_action = np.zeros([
                9,
            ])
            obs, reward, done, info = env.step(desired_action)
    env.close()
Esempio n. 26
0
def example():
    #initialize env
    task = generate_task(task_generator_id='pick_and_place')
    env = CausalWorld(task, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:
    # Goal intervention actor each episode after reset

    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=[(0, 1000000000, 1, 0)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(300):
            obs, reward, done, info = env.step(env.action_space.low)
    env.close()
Esempio n. 27
0
def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env = ObjectSelectorWrapper(env)
    for _ in range(50):
        obs = env.reset()
        #go up
        for i in range(70):
            obs, reward, done, info = env.step([0, 1, 0])
        # rotate yaw
        for i in range(20):
            obs, reward, done, info = env.step([0, 0, 1])
        for i in range(50):
            obs, reward, done, info = env.step([0, 5, 0])
        for i in range(20):
            obs, reward, done, info = env.step([0, 0, 1])
            # print(obs)
        for i in range(50):
            obs, reward, done, info = env.step([0, 2, 0])
            # print(obs)
    env.close()
Esempio n. 28
0
class TestPicking(unittest.TestCase):
    def setUp(self):
        self.task = generate_task(task_generator_id="picking")
        self.env = CausalWorld(task=self.task,
                               enable_visualization=False,
                               skip_frame=1,
                               action_mode="end_effector_positions",
                               normalize_actions=False,
                               normalize_observations=False)
        return

    def tearDown(self):
        self.env.close()
        return

    def test_determinism(self):
        self.env.set_action_mode('joint_positions')
        observations_1 = []
        rewards_1 = []
        horizon = 2000
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        obs = self.env.reset()
        observations_1.append(obs)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            obs = self.env.reset()
            observations_2.append(obs)
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                if not np.array_equal(observations_1[i], observations_2[i]):
                    print(observations_1[i] - observations_2[i])
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def lift_last_finger_first(self, current_obs):
        desired_action = current_obs[19:19 + 9]
        desired_action[6:] = [-0, -0.08, 0.4]
        for _ in range(250):
            obs, reward, done, info = self.env.step(desired_action)
        return desired_action

    def move_first_two_fingers(self, current_obs):
        desired_action = current_obs[19:19 + 9]
        desired_action[:6] = [
            0., 0.15313708, 0.05586292, 0.13262061, -0.07656854, 0.05586292
        ]
        for _ in range(250):
            obs, reward, done, info = self.env.step(desired_action)
        return obs

    def grip_block(self):
        grip_locations = get_suggested_grip_locations(
            self.env._task._stage.get_object('tool_block').get_size(),
            self.env._task._stage.get_object(
                'tool_block').world_to_cube_r_matrix())
        desired_action = np.zeros(9)
        desired_action[6:] = [-0, -0.08, 0.4]
        desired_action[:3] = grip_locations[0]
        desired_action[3:6] = grip_locations[1]
        # grasp the block now
        for _ in range(250):
            obs, reward, done, info = self.env.step(desired_action)
        return desired_action

    def lift_block(self, desired_grip):
        desired_action = desired_grip
        for _ in range(40):
            desired_action[2] += 0.005
            desired_action[5] += 0.005
            for _ in range(10):
                obs, reward, done, info = self.env.step(desired_action)
        return obs

    def test_02_mass(self):
        self.env.set_action_mode('end_effector_positions')
        intervention = {'tool_block': {'mass': 0.02}}
        self.env.do_intervention(interventions_dict=intervention)
        for _ in range(1):
            obs = self.env.reset()
            obs = self.move_first_two_fingers(obs)
            self.lift_last_finger_first(obs)
            desired_grip = self.grip_block()
            self.assertEqual(self.env.get_robot().get_tip_contact_states(),
                             [1, 1, 0], "contact states are not closed")
            final_obs = self.lift_block(desired_grip)
            self.assertGreater(final_obs[-22], 0.2,
                               "the block didn't get lifted")

    def test_08_mass(self):
        self.env.set_action_mode('end_effector_positions')
        intervention = {'tool_block': {'mass': 0.08}}
        self.env.do_intervention(interventions_dict=intervention)
        for _ in range(1):
            obs = self.env.reset()
            obs = self.move_first_two_fingers(obs)
            self.lift_last_finger_first(obs)
            desired_grip = self.grip_block()
            self.assertEqual(self.env.get_robot().get_tip_contact_states(),
                             [1, 1, 0], "contact states are not closed")
            final_obs = self.lift_block(desired_grip)
            self.assertGreater(final_obs[-22], 0.2,
                               "the block didn't get lifted")

    def test_1_mass(self):
        self.env.set_action_mode('end_effector_positions')
        intervention = {'tool_block': {'mass': 0.1}}
        self.env.do_intervention(interventions_dict=intervention)
        for _ in range(1):
            obs = self.env.reset()
            obs = self.move_first_two_fingers(obs)
            self.lift_last_finger_first(obs)
            desired_grip = self.grip_block()
            self.assertEqual(self.env.get_robot().get_tip_contact_states(),
                             [1, 1, 0], "contact states are not closed")
            final_obs = self.lift_block(desired_grip)
            self.assertGreater(final_obs[-22], 0.2,
                               "the block didn't get lifted")

    def test_determinism_w_interventions(self):
        self.env.set_action_mode('joint_positions')
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        new_goal = self.env.sample_new_goal()
        self.env.set_starting_state(interventions_dict=new_goal)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            self.env.reset()
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def test_determinism_w_in_episode_interventions(self):
        self.env.set_action_mode('joint_positions')
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)
        #now we will restart again and perform an in epsiode intervention
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            if i == 50:
                success_signal = self.env.do_intervention({
                    'tool_block': {
                        'cylindrical_position': [0.1, np.pi / 2, 0.0325]
                    }
                })
        observations_2 = []
        rewards_2 = []
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_2.append(obs)
            rewards_2.append(reward)
            if not np.array_equal(observations_1[i], observations_2[i]):
                print(observations_1[i] - observations_2[i])
            assert np.array_equal(observations_1[i], observations_2[i])
        assert rewards_1 == rewards_2

    def test_goal_intervention(self):
        task = generate_task(task_generator_id='picking')
        env = CausalWorld(task=task,
                          enable_visualization=False,
                          normalize_observations=False)
        for _ in range(10):
            invalid_interventions_before = env.get_tracker(
            ).invalid_intervention_steps
            new_goal = env.sample_new_goal()
            env.set_starting_state(interventions_dict=new_goal)
            invalid_interventions_after = env.get_tracker(
            ).invalid_intervention_steps
            for _ in range(2):
                for _ in range(100):
                    obs, reward, done, info = env.step(env.action_space.low)
                    #TODO: this shouldnt be the case when the benchmark is complete
                    #Its a hack for now
                    if invalid_interventions_before == invalid_interventions_after:
                        assert np.array_equal(
                            cyl2cart(new_goal['goal_block']
                                     ['cylindrical_position']), obs[-7:-4])
                env.reset()

        env.close()
Esempio n. 29
0
class TestCreativeStackedBlocks(unittest.TestCase):

    def setUp(self):
        self.task = generate_task(task_generator_id="creative_stacked_blocks")
        self.env = CausalWorld(task=self.task, enable_visualization=False)
        return

    def tearDown(self):
        self.env.close()
        return

    def test_determinism(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        obs = self.env.reset()
        observations_1.append(obs)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            obs = self.env.reset()
            observations_2.append(obs)
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                if not np.array_equal(observations_1[i], observations_2[i]):
                    print(
                        np.array(observations_1[i]) -
                        np.array(observations_2[i]))
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def test_determinism_w_interventions(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        new_goal = self.env.sample_new_goal()
        self.env.set_starting_state(interventions_dict=new_goal)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            self.env.reset()
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                if not np.array_equal(observations_1[i], observations_2[i]):
                    print(
                        np.array(observations_1[i]) -
                        np.array(observations_2[i]))
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def test_determinism_w_in_episode_interventions(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)
        #now we will restart again and perform an in epsiode intervention
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            if i == 50:
                success_signal = self.env.do_intervention(
                    {'tool_level_0_num_1': {
                        'cylindrical_position': [0, 0, 0.2]
                    }})
        observations_2 = []
        rewards_2 = []
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_2.append(obs)
            rewards_2.append(reward)
            assert np.array_equal(observations_1[i], observations_2[i])
        assert rewards_1 == rewards_2
Esempio n. 30
0
class TestReaching(unittest.TestCase):

    def setUp(self):
        self.task = generate_task(task_generator_id="reaching")
        self.env = CausalWorld(task=self.task,
                               enable_visualization=False,
                               action_mode='joint_positions',
                               normalize_observations=False,
                               normalize_actions=False)
        return

    def tearDown(self):
        self.env.close()
        return

    # def test_forward_kinemetics(self):
    #     horizon = 100
    #     obs = self.env.reset()
    #     desired_goal = obs[-9:]
    #     for i in range(horizon):
    #         obs, reward, done, info = self.env.step(desired_goal)

    # print(obs[19:28])

    def test_determinism(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        obs = self.env.reset()
        observations_1.append(obs)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            obs = self.env.reset()
            observations_2.append(obs)
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                if not np.array_equal(observations_1[i], observations_2[i]):
                    print(
                        np.array(observations_1[i]) -
                        np.array(observations_2[i]))
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def test_determinism_w_interventions(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        new_goal = self.env.sample_new_goal()
        self.env.set_starting_state(interventions_dict=new_goal)
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)

        for _ in range(10):
            observations_2 = []
            rewards_2 = []
            self.env.reset()
            for i in range(horizon):
                obs, reward, done, info = self.env.step(actions[i])
                observations_2.append(obs)
                rewards_2.append(reward)
                assert np.array_equal(observations_1[i], observations_2[i])
            assert rewards_1 == rewards_2

    def test_determinism_w_in_episode_interventions(self):
        observations_1 = []
        rewards_1 = []
        horizon = 100
        actions = [self.env.action_space.sample() for _ in range(horizon)]
        actions = np.array(actions)
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_1.append(obs)
            rewards_1.append(reward)
        #now we will restart again and perform an in epsiode intervention
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            if i == 50:
                new_goal = self.env.sample_new_goal()
                success_signal = self.env.do_intervention(new_goal)
        observations_2 = []
        rewards_2 = []
        self.env.reset()
        for i in range(horizon):
            obs, reward, done, info = self.env.step(actions[i])
            observations_2.append(obs)
            rewards_2.append(reward)
            assert np.array_equal(observations_1[i], observations_2[i])
        assert rewards_1 == rewards_2