def example(): #initialize env task_gen = generate_task(task_generator_id='pushing') env = CausalWorld(task_gen, skip_frame=10, enable_visualization=True) # define a custom curriculum of interventions: # No intervention actor is defined until episode number 5 # Goal intervention actor from episode number 5 to 10 after reset at time step 0 # Visual intervention actor from episode number 10 to 20 every two episodes after reset at time step 0 # Random intervention actor from episode number 20 to 25 after reset at time step 0 # Goal intervention actor from episode number 25 to 30 each at time step 50 env = CurriculumWrapper(env, intervention_actors=[ GoalInterventionActorPolicy(), VisualInterventionActorPolicy(), RandomInterventionActorPolicy(), GoalInterventionActorPolicy() ], actives=[(5, 10, 1, 0), (10, 20, 2, 0), (20, 25, 1, 0), (25, 30, 1, 50)]) for reset_idx in range(30): obs = env.reset() for time in range(100): desired_action = env.action_space.sample() obs, reward, done, info = env.step(action=desired_action) env.close()
def simulate_policy(): file = './her-sac-fetch-experiment/her-sac-fetch-experiment_2020_07_07_11_11_14_0000--s-0/params.pkl' data = torch.load(file) policy = data['evaluation/policy'] policy.reset() def policy_func(obs): # new_obs = np.hstack((obs['observation'], obs['desired_goal'])) a, agent_info = policy.get_action(obs) return a task = generate_task(task_generator_id='reaching') env = CausalWorld(task=task, enable_visualization=True, skip_frame=1, seed=0, max_episode_length=2500) env = CurriculumWrapper(env, intervention_actors=[GoalInterventionActorPolicy()], actives=[(0, 1000000000, 1, 0)]) # env = HERGoalEnvWrapper(env) for _ in range(100): total_reward = 0 o = env.reset() for _ in range(2500): o, reward, done, info = env.step(policy_func(o)) total_reward += reward print("total reward is :", total_reward) env.close()
def simulate_policy(): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=True, skip_frame=3, seed=0, max_episode_length=600) env = GymEnvWrapper(env) file = './itr_1097499.pkl' data = torch.load(file) agent_state_dict = data['agent_state_dict'] agent = SacAgent(initial_model_state_dict=agent_state_dict) agent.initialize(env_spaces=env.spaces) agent.eval_mode(itr=data['itr']) def policy_func(obs): # new_obs = np.hstack((obs['observation'], obs['desired_goal'])) agent_info = agent.step(torchify_buffer(obs), prev_action=None, prev_reward=None) return agent_info.action.numpy() # env = HERGoalEnvWrapper(env) for _ in range(100): total_reward = 0 o = env.reset() for _ in range(600): o, reward, done, info = env.step(policy_func(o)) total_reward += reward print("total reward is :", total_reward) env.close()
def example(): task = MyOwnTask() env = CausalWorld(task=task, enable_visualization=True) env.reset() for _ in range(2000): for _ in range(10): obs, reward, done, info = \ env.step(env.action_space.sample()) random_intervention_dict = env.do_single_random_intervention() env.close()
def goal_interventions(): task = generate_task(task_generator_id='stacked_blocks') env = CausalWorld(task=task, enable_visualization=True) env.reset() for _ in range(10): for i in range(200): obs, reward, done, info = env.step(env.action_space.sample()) goal_intervention_dict = env.sample_new_goal() print("new goal chosen: ", goal_intervention_dict) success_signal, obs = env.do_intervention(goal_intervention_dict) print("Goal Intervention success signal", success_signal) env.close()
def example(): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=True) env.reset() for _ in range(50): random_intervention_dict, success_signal, obs = \ env.do_single_random_intervention() print("The random intervention performed is ", random_intervention_dict) for i in range(100): obs, reward, done, info = env.step(env.action_space.sample()) env.close()
def test_timing_profile(self): from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv import time kuka_env = KukaGymEnv(renders=False, isDiscrete=False) # operates at 240 HZ task = generate_task(task_generator_id="pushing") causal_rl_env = CausalWorld( task=task, enable_visualization=False, seed=0, skip_frame=10, normalize_actions=False, normalize_observations=False) # operates at 250 HZ start = time.time() kuka_env.reset() end = time.time() kuka_reset_time = end - start start = time.time() causal_rl_env.reset() end = time.time() causal_rl_reset_time = end - start self.assertLess(causal_rl_reset_time, kuka_reset_time * 1.25) start = time.time() kuka_env.step(kuka_env.action_space.sample()) end = time.time() kuka_step_time = end - start start = time.time() causal_rl_env.step(causal_rl_env.action_space.sample()) end = time.time() causal_rl_step_time = end - start print("time 1", causal_rl_step_time) print("time 2", kuka_step_time) self.assertLess(causal_rl_step_time, kuka_step_time * 10) start = time.time() kuka_env.render() end = time.time() kuka_render_time = end - start start = time.time() causal_rl_env.render() end = time.time() causal_rl_render_time = end - start self.assertLess(causal_rl_render_time, kuka_render_time * 1.25) causal_rl_env.close() kuka_env.close() return
def without_intervention_split(): task = generate_task(task_generator_id='pushing') env = CausalWorld(task=task, enable_visualization=True) env.reset() for _ in range(2): for i in range(200): obs, reward, done, info = env.step(env.action_space.sample()) success_signal, obs = env.do_intervention( {'stage_color': np.random.uniform(0, 1, [ 3, ])}) print("Intervention success signal", success_signal) env.close()
def test_parallelism(self): task = generate_task(task_generator_id="stacked_blocks") env1 = CausalWorld(task=task, enable_visualization=False, seed=0) env1.reset() task2 = generate_task(task_generator_id="stacked_blocks") env2 = CausalWorld(task=task2, enable_visualization=False, seed=0) observations_env1_v1, rewards_env1_v1, _, _ = env1.step( env1.action_space.low) env2.reset() observations_env2_v1, rewards_env2_v1, _, _ = env2.step( env2.action_space.low) env1.close() env2.close() assert np.array_equal(observations_env2_v1, observations_env1_v1) return
def end_effector_pos(): task = generate_task(task_generator_id='reaching') env = CausalWorld(task=task, enable_visualization=True, action_mode="joint_positions", normalize_actions=False, normalize_observations=False) obs = env.reset() for _ in range(100): goal_dict = env.sample_new_goal() success_signal, obs = env.do_intervention(goal_dict) obs, reward, done, info = env.step(control_policy(env, obs)) for _ in range(250): obs, reward, done, info = env.step(control_policy(env, obs)) env.close()
def example(): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=True) env.set_starting_state( {'goal_block': { 'cartesian_position': [0.1, 0.1, 0.1] }}) for _ in range(500): obs, reward, done, info = env.step(env.action_space.sample()) env.reset_default_state() for _ in range(500): obs, reward, done, info = env.step(env.action_space.sample()) env.reset() for _ in range(500): obs, reward, done, info = env.step(env.action_space.sample()) env.close()
def with_intervention_split_2(): task = generate_task(task_generator_id='pushing', variables_space='space_b') env = CausalWorld(task=task, enable_visualization=False) interventions_space = task.get_intervention_space_a() env.reset() for _ in range(2): for i in range(200): obs, reward, done, info = env.step(env.action_space.sample()) success_signal, obs = env.do_intervention({ 'stage_color': np.random.uniform(interventions_space['stage_color'][0], interventions_space['stage_color'][1]) }) print("Intervention success signal", success_signal) env.close()
def test_pd_gains(): #control the robot using pd controller np.random.seed(0) task = generate_task(task_generator_id='pushing') skip_frame = 1 env = CausalWorld(task=task, enable_visualization=False, skip_frame=skip_frame, normalize_observations=False, normalize_actions=False, seed=0) zero_hold = int(5000 / skip_frame) #reach desired position in 4 secs? obs = env.reset() #test bounds first for _ in range(zero_hold): chosen_action = np.zeros(9, ) obs, reward, done, info = env.step(chosen_action) current_joint_positions = obs[1:10] if (((current_joint_positions - chosen_action) > 0.1).any()): raise AssertionError( "The pd controller failed to reach these values {} but reached instead {}" .format(chosen_action, current_joint_positions)) for _ in range(zero_hold): chosen_action = env.action_space.high obs, reward, done, info = env.step(chosen_action) current_joint_positions = obs[1:10] if (((current_joint_positions - chosen_action) > 0.1).any()): raise AssertionError( "The pd controller failed to reach these values {} but reached instead {}" .format(chosen_action, current_joint_positions)) # for i in range(200): # #check for first finger # chosen_action = np.random.uniform(env.action_space.low, env.action_space.high) # chosen_action[3:] = env.action_space.low[3:] # chosen_action[1] = 0 # chosen_action[2] = 0 # for _ in range(zero_hold): # chosen_action = chosen_action # obs, reward, done, info = env.step(chosen_action) # current_joint_positions = obs[:9] # if(((current_joint_positions - chosen_action) > 0.1).any()): # raise AssertionError("The pd controller failed to reach these values {} but reached instead {}". # format(chosen_action, current_joint_positions)) env.close()
def example(): task = generate_task(task_generator_id='stacked_blocks') env = CausalWorld(task=task, skip_frame=10, enable_visualization=True, seed=0, action_mode="joint_positions", observation_mode="pixel", camera_indicies=[0, 1, 2]) env.reset() for _ in range(5): obs, reward, done, info = env.step(env.action_space.sample()) #show last images for i in range(6): plt.imshow(obs[i]) plt.show() env.close()
def example(): task = generate_task(task_generator_id='pick_and_place') env = CausalWorld(task=task, enable_visualization=True) env.reset() intervention_space = env.get_variable_space_used() for _ in range(100): for i in range(200): obs, reward, done, info = env.step(env.action_space.low) intervention = { 'tool_block': { 'size': np.random.uniform(intervention_space['tool_block']['size'][0], intervention_space['tool_block']['size'][1]) } } env.do_intervention(intervention) env.close()
def smooth_action(): task = generate_task(task_generator_id='reaching') env = CausalWorld(task=task, enable_visualization=True, action_mode="joint_positions", normalize_actions=True, normalize_observations=True, skip_frame=1) env = MovingAverageActionEnvWrapper(env) for _ in range(50): obs = env.reset() for _ in range(1000): desired_action = np.zeros([ 9, ]) obs, reward, done, info = env.step(desired_action) env.close()
def example(): #initialize env task = generate_task(task_generator_id='pick_and_place') env = CausalWorld(task, skip_frame=10, enable_visualization=True) # define a custom curriculum of interventions: # Goal intervention actor each episode after reset env = CurriculumWrapper( env, intervention_actors=[GoalInterventionActorPolicy()], actives=[(0, 1000000000, 1, 0)]) for reset_idx in range(30): obs = env.reset() for time in range(300): obs, reward, done, info = env.step(env.action_space.low) env.close()
def example(): task = generate_task(task_generator_id="creative_stacked_blocks") env = CausalWorld(task=task, enable_visualization=False, seed=0) actions = [env.action_space.sample() for _ in range(200)] env.reset() observations_1 = [] rewards_1 = [] for i in range(200): observations, rewards, _, _ = env.step(actions[i]) if i == 100: state = env.get_state() observations_1.append(observations) rewards_1.append(rewards) env.set_state(state) for i in range(101, 200): observations, rewards, _, _ = env.step(actions[i]) assert np.array_equal(observations_1[i], observations) env.close()
def privileged_information(): task = generate_task(task_generator_id='pushing') env = CausalWorld(task=task, enable_visualization=True) env.expose_potential_partial_solution() env.reset() for _ in range(10): goal_intervention_dict = env.sample_new_goal() success_signal, obs = env.do_intervention(goal_intervention_dict) print("Goal Intervention success signal", success_signal) for i in range(1000): obs, reward, done, info = env.step(env.action_space.low) print("now we solve it with privileged info") success_signal, obs = env.do_intervention( info['possible_solution_intervention'], check_bounds=False) print("Partial Solution Setting Intervention Succes Signal", success_signal) for i in range(500): obs, reward, done, info = env.step(env.action_space.low) env.close()
def example(): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=True) env = ObjectSelectorWrapper(env) for _ in range(50): obs = env.reset() #go up for i in range(70): obs, reward, done, info = env.step([0, 1, 0]) # rotate yaw for i in range(20): obs, reward, done, info = env.step([0, 0, 1]) for i in range(50): obs, reward, done, info = env.step([0, 5, 0]) for i in range(20): obs, reward, done, info = env.step([0, 0, 1]) # print(obs) for i in range(50): obs, reward, done, info = env.step([0, 2, 0]) # print(obs) env.close()
def example(): # Here you learn how to record/ log entire episodes into a directory # to reuse it later e.g. for reviewing logged episodes or using this # data for pre-training policies. # Construct a data_recorder that keeps track of every change in the environment # We set the recording dumb frequency of episodes into log_files to 11 (default is 100) data_recorder = DataRecorder(output_directory='pushing_episodes', rec_dumb_frequency=11) # Pass the data recorder to the World task = generate_task(task_generator_id='pushing') env = CausalWorld(task=task, enable_visualization=True, data_recorder=data_recorder) # Record some episodes for _ in range(23): env.reset() for _ in range(50): env.step(env.action_space.sample()) env.close() # Load the logged episodes data = DataLoader(episode_directory='pushing_episodes') episode = data.get_episode(14) # Initialize a new environment according a specific episode and replay it task = generate_task(episode.task_name, **episode.task_params) env = CausalWorld(task, **episode.world_params, enable_visualization=True) env.set_starting_state(episode.initial_full_state, check_bounds=False) for action in episode.robot_actions: env.step(action) env.close() # You can achieve the same by using the viewer module in one line viewer.view_episode(episode)
def test_reset_default_state(self): task = generate_task(task_generator_id="picking") env = CausalWorld(task=task, enable_visualization=False, seed=0) actions = [env.action_space.sample() for _ in range(200)] observations_1 = [] rewards_1 = [] env.reset() for i in range(200): observations, rewards, _, _ = env.step(actions[i]) observations_1.append(observations) rewards_1.append(rewards) env.set_starting_state( {'goal_block': { 'cylindrical_position': [0.1, np.pi, 0.1] }}) for i in range(200): observations, rewards, _, _ = env.step(actions[i]) env.reset_default_state() for i in range(200): observations, rewards, _, _ = env.step(actions[i]) assert np.array_equal(observations_1[i], observations) env.close() return
def example(): task = generate_task(task_generator_id='pushing') env = CausalWorld(task=task, enable_visualization=True) env.reset() counter = 0 for _ in range(1): for i in range(210): obs, reward, done, info = env.step(env.action_space.low) if i % 50 == 0 and i > 0: print(i) intervention = {'goal_block': {'cartesian_position': [0, -0.08+(0.04*counter), 0.0325], 'color':[0, 0, 1]}} env.do_intervention(intervention, check_bounds=False) counter += 1 print("intervention") if i == 201: intervention = {'goal_block': { 'cartesian_position': [0, 0.08, 0.0325], 'color': [0, 1, 0]}} env.do_intervention(intervention, check_bounds=False) env.close()
def example(): #initialize env task_gen = generate_task(task_generator_id='pushing') env = CausalWorld(task_gen, skip_frame=1, enable_visualization=True) env = DeltaActionEnvWrapper(env) env = CurriculumWrapper( env, intervention_actors=[VisualInterventionActorPolicy()], actives=[(0, 20, 1, 0)]) for reset_idx in range(10): obs = env.reset() for time in range(15): obs, reward, done, info = env.step(action=np.zeros(9,)) env.save_world('./') env.close() #now load it again env = load_world(tracker_relative_path='./', enable_visualization=True) for reset_idx in range(10): obs = env.reset() for time in range(15): obs, reward, done, info = env.step(action=np.zeros(9,))
def run_mpc(): task = generate_task( task_generator_id='picking', joint_positions=[-0.21737874, 0.55613149, -1.09308519, -0.12868997, 0.52551013, -1.08006493, -0.00221536, 0.46163487, -1.00948735], tool_block_position=[0.0, 0, 0.035], fractional_reward_weight=1, dense_reward_weights=np.array([0, 10, 0, 1, 1, 0, 0, 0])) env = CausalWorld(task=task, skip_frame=1, enable_visualization=False, seed=seed) true_model = SimulatorModel(_make_env, parallel_agents=parallel_agents) optimizer = CrossEntropyMethod( planning_horizon=horizon_length, max_iterations=max_iterations, population_size=num_of_particles, num_elite=num_elite, action_upper_bound=np.array(env.action_space.high), action_lower_bound=np.array(env.action_space.low), model=true_model) env.reset() actions = optimizer.get_actions() true_model.end_sim() recorder = VideoRecorder(env, 'picking.mp4') for i in range(horizon_length): for _ in range(skip_frame): recorder.capture_frame() obs, reward, done, info = env.step(actions[i]) recorder.capture_frame() recorder.close() env.close()
def test_goal_intervention(self): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=False, normalize_observations=False) for _ in range(10): invalid_interventions_before = env.get_tracker( ).invalid_intervention_steps new_goal = env.sample_new_goal() env.set_starting_state(interventions_dict=new_goal) invalid_interventions_after = env.get_tracker( ).invalid_intervention_steps for _ in range(2): for _ in range(100): obs, reward, done, info = env.step(env.action_space.low) #TODO: this shouldnt be the case when the benchmark is complete #Its a hack for now if invalid_interventions_before == invalid_interventions_after: assert np.array_equal( cyl2cart(new_goal['goal_block'] ['cylindrical_position']), obs[-7:-4]) env.reset() env.close()
def test_determinism(self): task = generate_task(task_generator_id="stacked_blocks") observations_v1 = [] observations_v2 = [] observations_v3 = [] rewards_v1 = [] rewards_v2 = [] rewards_v3 = [] horizon = 30 env_v1 = CausalWorld(task=task, enable_visualization=False, seed=27) obs = env_v1.reset() observations_v1.append(obs) for _ in range(horizon): obs, reward, done, info = env_v1.step(env_v1.action_space.low) observations_v1.append(obs) rewards_v1.append(reward) env_v1.close() task = generate_task(task_generator_id="stacked_blocks") env_v2 = CausalWorld(task=task, enable_visualization=False, seed=27) obs = env_v2.reset() observations_v2.append(obs) for _ in range(horizon): obs, reward, done, info = env_v2.step(env_v2.action_space.low) observations_v2.append(obs) rewards_v2.append(reward) env_v2.close() task = generate_task(task_generator_id="stacked_blocks") env_v3 = CausalWorld(task=task, enable_visualization=False, seed=54) obs = env_v3.reset() observations_v3.append(obs) for _ in range(horizon): obs, reward, done, info = env_v3.step(env_v3.action_space.low) observations_v3.append(obs) rewards_v3.append(reward) env_v3.close() assert all( np.array_equal(observations_v1[i], observations_v2[i]) for i in range(horizon)) assert rewards_v1 == rewards_v2 assert all( np.array_equal(observations_v1[i], observations_v3[i]) for i in range(horizon)) assert rewards_v1 == rewards_v3
class TestPicking(unittest.TestCase): def setUp(self): self.task = generate_task(task_generator_id="picking") self.env = CausalWorld(task=self.task, enable_visualization=False, skip_frame=1, action_mode="end_effector_positions", normalize_actions=False, normalize_observations=False) return def tearDown(self): self.env.close() return def test_determinism(self): self.env.set_action_mode('joint_positions') observations_1 = [] rewards_1 = [] horizon = 2000 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) obs = self.env.reset() observations_1.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] obs = self.env.reset() observations_2.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) if not np.array_equal(observations_1[i], observations_2[i]): print(observations_1[i] - observations_2[i]) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def lift_last_finger_first(self, current_obs): desired_action = current_obs[19:19 + 9] desired_action[6:] = [-0, -0.08, 0.4] for _ in range(250): obs, reward, done, info = self.env.step(desired_action) return desired_action def move_first_two_fingers(self, current_obs): desired_action = current_obs[19:19 + 9] desired_action[:6] = [ 0., 0.15313708, 0.05586292, 0.13262061, -0.07656854, 0.05586292 ] for _ in range(250): obs, reward, done, info = self.env.step(desired_action) return obs def grip_block(self): grip_locations = get_suggested_grip_locations( self.env._task._stage.get_object('tool_block').get_size(), self.env._task._stage.get_object( 'tool_block').world_to_cube_r_matrix()) desired_action = np.zeros(9) desired_action[6:] = [-0, -0.08, 0.4] desired_action[:3] = grip_locations[0] desired_action[3:6] = grip_locations[1] # grasp the block now for _ in range(250): obs, reward, done, info = self.env.step(desired_action) return desired_action def lift_block(self, desired_grip): desired_action = desired_grip for _ in range(40): desired_action[2] += 0.005 desired_action[5] += 0.005 for _ in range(10): obs, reward, done, info = self.env.step(desired_action) return obs def test_02_mass(self): self.env.set_action_mode('end_effector_positions') intervention = {'tool_block': {'mass': 0.02}} self.env.do_intervention(interventions_dict=intervention) for _ in range(1): obs = self.env.reset() obs = self.move_first_two_fingers(obs) self.lift_last_finger_first(obs) desired_grip = self.grip_block() self.assertEqual(self.env.get_robot().get_tip_contact_states(), [1, 1, 0], "contact states are not closed") final_obs = self.lift_block(desired_grip) self.assertGreater(final_obs[-22], 0.2, "the block didn't get lifted") def test_08_mass(self): self.env.set_action_mode('end_effector_positions') intervention = {'tool_block': {'mass': 0.08}} self.env.do_intervention(interventions_dict=intervention) for _ in range(1): obs = self.env.reset() obs = self.move_first_two_fingers(obs) self.lift_last_finger_first(obs) desired_grip = self.grip_block() self.assertEqual(self.env.get_robot().get_tip_contact_states(), [1, 1, 0], "contact states are not closed") final_obs = self.lift_block(desired_grip) self.assertGreater(final_obs[-22], 0.2, "the block didn't get lifted") def test_1_mass(self): self.env.set_action_mode('end_effector_positions') intervention = {'tool_block': {'mass': 0.1}} self.env.do_intervention(interventions_dict=intervention) for _ in range(1): obs = self.env.reset() obs = self.move_first_two_fingers(obs) self.lift_last_finger_first(obs) desired_grip = self.grip_block() self.assertEqual(self.env.get_robot().get_tip_contact_states(), [1, 1, 0], "contact states are not closed") final_obs = self.lift_block(desired_grip) self.assertGreater(final_obs[-22], 0.2, "the block didn't get lifted") def test_determinism_w_interventions(self): self.env.set_action_mode('joint_positions') observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) new_goal = self.env.sample_new_goal() self.env.set_starting_state(interventions_dict=new_goal) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_determinism_w_in_episode_interventions(self): self.env.set_action_mode('joint_positions') observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) #now we will restart again and perform an in epsiode intervention self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) if i == 50: success_signal = self.env.do_intervention({ 'tool_block': { 'cylindrical_position': [0.1, np.pi / 2, 0.0325] } }) observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) if not np.array_equal(observations_1[i], observations_2[i]): print(observations_1[i] - observations_2[i]) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_goal_intervention(self): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=False, normalize_observations=False) for _ in range(10): invalid_interventions_before = env.get_tracker( ).invalid_intervention_steps new_goal = env.sample_new_goal() env.set_starting_state(interventions_dict=new_goal) invalid_interventions_after = env.get_tracker( ).invalid_intervention_steps for _ in range(2): for _ in range(100): obs, reward, done, info = env.step(env.action_space.low) #TODO: this shouldnt be the case when the benchmark is complete #Its a hack for now if invalid_interventions_before == invalid_interventions_after: assert np.array_equal( cyl2cart(new_goal['goal_block'] ['cylindrical_position']), obs[-7:-4]) env.reset() env.close()
class TestCreativeStackedBlocks(unittest.TestCase): def setUp(self): self.task = generate_task(task_generator_id="creative_stacked_blocks") self.env = CausalWorld(task=self.task, enable_visualization=False) return def tearDown(self): self.env.close() return def test_determinism(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) obs = self.env.reset() observations_1.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] obs = self.env.reset() observations_2.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) if not np.array_equal(observations_1[i], observations_2[i]): print( np.array(observations_1[i]) - np.array(observations_2[i])) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_determinism_w_interventions(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) new_goal = self.env.sample_new_goal() self.env.set_starting_state(interventions_dict=new_goal) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) if not np.array_equal(observations_1[i], observations_2[i]): print( np.array(observations_1[i]) - np.array(observations_2[i])) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_determinism_w_in_episode_interventions(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) #now we will restart again and perform an in epsiode intervention self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) if i == 50: success_signal = self.env.do_intervention( {'tool_level_0_num_1': { 'cylindrical_position': [0, 0, 0.2] }}) observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2
class TestReaching(unittest.TestCase): def setUp(self): self.task = generate_task(task_generator_id="reaching") self.env = CausalWorld(task=self.task, enable_visualization=False, action_mode='joint_positions', normalize_observations=False, normalize_actions=False) return def tearDown(self): self.env.close() return # def test_forward_kinemetics(self): # horizon = 100 # obs = self.env.reset() # desired_goal = obs[-9:] # for i in range(horizon): # obs, reward, done, info = self.env.step(desired_goal) # print(obs[19:28]) def test_determinism(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) obs = self.env.reset() observations_1.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] obs = self.env.reset() observations_2.append(obs) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) if not np.array_equal(observations_1[i], observations_2[i]): print( np.array(observations_1[i]) - np.array(observations_2[i])) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_determinism_w_interventions(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) new_goal = self.env.sample_new_goal() self.env.set_starting_state(interventions_dict=new_goal) for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) for _ in range(10): observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2 def test_determinism_w_in_episode_interventions(self): observations_1 = [] rewards_1 = [] horizon = 100 actions = [self.env.action_space.sample() for _ in range(horizon)] actions = np.array(actions) self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_1.append(obs) rewards_1.append(reward) #now we will restart again and perform an in epsiode intervention self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) if i == 50: new_goal = self.env.sample_new_goal() success_signal = self.env.do_intervention(new_goal) observations_2 = [] rewards_2 = [] self.env.reset() for i in range(horizon): obs, reward, done, info = self.env.step(actions[i]) observations_2.append(obs) rewards_2.append(reward) assert np.array_equal(observations_1[i], observations_2[i]) assert rewards_1 == rewards_2