def test(self): self.env_config['render'] = False self.env_config['seed'] = 2 self.env_config['random_orientation'] = False env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 5) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 4) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 3) env.saveToFile('save') env.close() env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() env.loadFromFile('save') action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 2) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 1) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 0) env.saveToFile('save') env.close() env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() env.loadFromFile('save') env.close()
def testPlanner(self): # self.env_config['render'] = True # # env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_4', self.env_config, {}) # total = 0 # s = 0 # step_times = [] # env.reset() # pbar = tqdm(total=1000) # while total < 1000: # t0 = time.time() # action = env.getNextAction() # t_plan = time.time() - t0 # states_, in_hands_, obs_, rewards, dones = env.step(action) # t_action = time.time() - t0 - t_plan # t = time.time() - t0 # step_times.append(t) # # if dones.sum(): # s += rewards.sum().int().item() # total += dones.sum().int().item() # # pbar.set_description( # '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}' # .format(s, total, float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times)) # ) # pbar.update(dones.sum().int().item()) # env.close() self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_4', self.env_config, {}) while True: env.reset()
def testPlanner(self): self.env_config['render'] = False self.env_config['random_orientation'] = True self.env_config['num_objects'] = 5 env = env_factory.createEnvs(1, 'pybullet', 'house_building_1', self.env_config, self.planner_config) total = 0 s = 0 step_times = [] env.reset() pbar = tqdm(total=1000) while total < 1000: t0 = time.time() action = env.getNextAction() t_plan = time.time() - t0 (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=True) t_action = time.time() - t0 - t_plan t = time.time() - t0 step_times.append(t) s += rewards.sum() if dones.sum(): total += dones.sum() pbar.set_description( '{:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}' .format( float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times))) pbar.update(dones.sum()) env.close()
def testPlanner(self): self.env_config['render'] = True self.env_config['seed'] = 1 env = env_factory.createEnvs(1, 'rl', 'pybullet', 'brick_stacking', self.env_config) env.reset() for i in range(3, -1, -1): action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step( action, auto_reset=False) self.assertEqual(env.getStepLeft(), i) env.close() # def testPlanner2(self): # self.env_config['render'] = False # self.env_config['reward_type'] = 'sparse' # self.env_config['random_orientation'] = True # # env = env_factory.createEnvs(10, 'rl', 'pybullet', 'brick_stacking', self.env_config, {}) # total = 0 # s = 0 # env.reset() # while total < 1000: # states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) # if dones.sum(): # s += rewards.sum().int().item() # total += dones.sum().int().item() # print('{}/{}'.format(s, total))
def testPlanner(self): self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'random_picking', self.env_config, {}) total = 0 s = 0 step_times = [] env.reset() pbar = tqdm(total=1000) while total < 1000: t0 = time.time() action = env.getNextAction() t_plan = time.time() - t0 states_, in_hands_, obs_, rewards, dones = env.step(action) t_action = time.time() - t0 - t_plan t = time.time() - t0 step_times.append(t) s += rewards.sum().int().item() if dones.sum(): total += dones.sum().int().item() pbar.set_description( '{:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}' .format(float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times)) ) pbar.update(dones.sum().int().item()) env.close()
def testPlanner(self): env = env_factory.createEnvs(1, 'rl', 'numpy', 'block_stacking', self.env_config) states_, in_hands_, obs_ = env.reset() plt.imshow(obs_.squeeze()) plt.show() for i in range(5, -1, -1): action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step( action, auto_reset=False) plt.imshow(obs_.squeeze()) plt.show() self.assertEqual(env.getStepLeft(), i) env.close() # def testPlanner2(self): # self.env_config['render'] = False # self.env_config['reward_type'] = 'sparse' # self.env_config['random_orientation'] = True # self.env_config['num_objects'] = 4 # # env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_1', self.env_config, {}) # total = 0 # s = 0 # env.reset() # while total < 1000: # states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) # if dones.sum(): # s += rewards.sum().int().item() # total += dones.sum().int().item() # print('{}/{}'.format(s, total))
def testPlanner2(self): self.env_config['render'] = False self.env_config['reward_type'] = 'sparse' self.env_config['random_orientation'] = True self.env_config['num_objects'] = 4 env = env_factory.createEnvs(20, 'rl', 'pybullet', 'block_stacking', self.env_config, {'half_rotation': True}) total = 0 s = 0 states, in_hands, obs = env.reset() while total < 1000: action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action) # pixel_x = ((action[0, 1] - self.workspace[0][0]) / self.heightmap_resolution).long() # pixel_y = ((action[0, 2] - self.workspace[1][0]) / self.heightmap_resolution).long() # pixel_x = torch.clamp(pixel_x, 0, 90 - 1).item() # pixel_y = torch.clamp(pixel_y, 0, 90 - 1).item() # fig, axs = plt.subplots(1, 2, figsize=(10,5)) # axs[0].imshow(obs.squeeze()) # axs[1].imshow(obs_.squeeze()) # axs[0].scatter(pixel_y, pixel_x, c='r') # axs[1].scatter(pixel_y, pixel_x, c='r') # fig.show() obs = obs_ if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total))
def testPlanner2(self): self.env_config['render'] = True self.env_config['seed'] = 0 self.env_config['physics_mode'] = 'fast' num_processes = 5 env = env_factory.createEnvs(num_processes, 'pybullet', 'block_stacking', self.env_config, self.planner_config) total = 0 s = 0 step_times = [] env.reset() pbar = tqdm(total=1000) while total < 1000: t0 = time.time() action = env.getNextAction() t_plan = time.time() - t0 (states_, in_hands_, obs_), rewards, dones = env.step(action) s += rewards.sum() total += dones.sum() t_action = time.time() - t0 - t_plan t = time.time() - t0 step_times.append(t) pbar.set_description( '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}' .format(s, total, float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times)) ) env.close()
def testPlanner2(self): self.env_config['render'] = False self.env_config['reward_type'] = 'sparse' self.env_config['random_orientation'] = True self.env_config['robot'] = 'kuka' env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_4', self.env_config, {}) total = 0 s = 0 env.reset() while total < 1000: t0 = time.time() action = env.getNextAction() # print('plan time: {}'.format(time.time()-t0)) t0 = time.time() states_, in_hands_, obs_, rewards, dones = env.step(action) # print('step time: {}'.format(time.time()-t0)) # plt.imshow(in_hands_.squeeze()) # plt.colorbar() # plt.show() if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total)) ## 0.837 kuka ## 0.951 ur5 ## 0.950 ur5 robotiq
def testSuccess(self): self.env_config['seed'] = 0 self.env_config['random_orientation'] = False num_random_o = 2 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 5) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 4) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 3) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 2) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 1) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 0) env.close()
def testReset(self): self.env_config['render'] = True num_processes = 1 env = env_factory.createEnvs(num_processes, 'pybullet', 'ramp_block_stacking', self.env_config, self.planner_config) while True: states, hand_obs, depths = env.reset() print(1)
def testPlanner(self): # env = createHouseBuilding3Env(PyBulletEnv, self.env_config)() env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() for i in range(5, -1, -1): action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), i) env.close()
def testPlanner(self): self.env_config['render'] = True num_processes = 1 env = env_factory.createEnvs(num_processes, 'pybullet', 'ramp_house_building_1', self.env_config, self.planner_config) while True: env.reset() print(1)
def testPlanner(self): self.env_config['render'] = True self.env_config['seed'] = 0 env = env_factory.createEnvs(1, 'pybullet', 'improvise_house_building_3_deconstruct', self.env_config, self.planner_config) env.reset() for i in range(5, -1, -1): action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(dones, 1) env.close()
def testBlockValidBrickOrRoofOnBlock(self): self.env_config['seed'] = 1 self.env_config['random_orientation'] = False num_random_o = 2 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config) env.reset() action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 5) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 4) env.save() position = list(env.getObjPositions())[0] action = torch.tensor([0, position[3+num_random_o][0], position[3+num_random_o][1], 0]).unsqueeze(0) states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 3) action = torch.tensor([1, position[0+num_random_o][0], position[0+num_random_o][1], 0]).unsqueeze(0) states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 4) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 3) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 2) env.restore() position = list(env.getObjPositions())[0] action = torch.tensor([0, position[2+num_random_o][0], position[2+num_random_o][1], 0]).unsqueeze(0) states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 5) action = torch.tensor([1, position[0+num_random_o][0], position[0+num_random_o][1], 0]).unsqueeze(0) states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 6) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 5) action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepLeft(), 4) env.close()
def testStepLeft(self): self.env_config['seed'] = 1 env = env_factory.createEnvs(1, 'pybullet', 'house_building_4', self.env_config, self.planner_config) env.reset() for i in range(9, -1, -1): action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), i) env.close()
def testPlanner(self): self.env_config['render'] = True env = env_factory.createEnvs(1, 'pybullet', 'block_stacking', self.env_config, self.planner_config) env.reset() for i in range(5, -1, -1): action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), i) self.assertEqual(rewards, 1) self.assertEqual(dones, 1) env.close()
def testStepLeft(self): num_random_o = 0 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'pybullet', 'house_building_2', self.env_config, self.planner_config) env.reset() positions = env.getObjectPositions()[0] # pick up the roof action = [[ 0, positions[2 + num_random_o][0], positions[2 + num_random_o][1], 0 ]] (states_, in_hands_, obs_), rewards, dones = env.step(np.array(action), auto_reset=False) self.assertEqual(env.getStepsLeft(), 5) self.assertEqual(dones, 0) (states_, in_hands_, obs_), rewards, dones = env.step(env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepsLeft(), 4) self.assertEqual(dones, 0) positions = env.getObjectPositions()[0] action = [[ 0, positions[1 + num_random_o][0], positions[1 + num_random_o][1], 0 ]] (states_, in_hands_, obs_), rewards, dones = env.step(np.array(action), auto_reset=False) self.assertEqual(env.getStepsLeft(), 3) self.assertEqual(dones, 0) (states_, in_hands_, obs_), rewards, dones = env.step(env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepsLeft(), 2) self.assertEqual(dones, 0) (states_, in_hands_, obs_), rewards, dones = env.step(env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepsLeft(), 1) self.assertEqual(dones, 0) (states_, in_hands_, obs_), rewards, dones = env.step(env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepsLeft(), 0) self.assertEqual(dones, 1) env.close()
def testBlockNotValidRoofOnBrickOnBlock(self): self.env_config['seed'] = 0 self.env_config['random_orientation'] = False self.env_config['render'] = True env = env_factory.createEnvs(1, 'pybullet', 'house_building_3', self.env_config, self.planner_config) env.reset() env.save() position = list(env.getObjectPositions())[0] action = np.array([[0, position[1][0], position[1][1], 0]]) (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 7) action = np.array([[1, position[2][0], position[2][1], 0]]) (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 8) position = list(env.getObjectPositions())[0] action = np.array([[0, position[0][0], position[0][1], 0]]) (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 9) action = np.array([[1, position[2][0], position[2][1], 0]]) (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 10) action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 9) action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 8) action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 7) action = env.getNextAction() (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) self.assertEqual(env.getStepsLeft(), 6) env.close()
def test(self): self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_5', self.env_config, {}) env.reset() position = env.getObjPositions()[0] action = [0, position[0][0], position[0][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 0) position = env.getObjPositions()[0] action = [1, position[2][0], position[2][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 1) position = env.getObjPositions()[0] action = [0, position[0][0], position[0][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, -1) position = env.getObjPositions()[0] action = [1, position[2][0], position[2][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 1) position = env.getObjPositions()[0] action = [0, position[1][0], position[1][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 0) position = env.getObjPositions()[0] action = [1, position[3][0], position[3][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 1) position = env.getObjPositions()[0] action = [0, position[2][0], position[2][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 0) self.assertEqual(rewards, 0) position = env.getObjPositions()[0] action = [1, position[4][0], position[4][1], 0] states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(dones, 1) self.assertEqual(rewards, 1)
def testPlanner(self): self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_2', self.env_config, {}) total = 0 s = 0 env.reset() while total < 1000: states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total)) env.close()
def testPlanner(self): self.env_config['seed'] = 0 num_random_o = 2 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_4', self.env_config) env.reset() for i in range(9, -1, -1): action = env.getNextAction() states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False) if i == 0: self.assertTrue(dones) else: self.assertFalse(dones) self.assertEqual(env.getStepLeft(), i) env.close()
def testPlanner2(self): self.env_config['render'] = False self.env_config['reward_type'] = 'sparse' self.env_config['random_orientation'] = True self.env_config['robot'] = 'ur5_robotiq' env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_3', self.env_config, {}) total = 0 s = 0 env.reset() while total < 1000: states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) # plt.imshow(in_hands_.squeeze()) # plt.show() if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total))
def testPlanner(self): self.env_config['render'] = False num_processes = 20 env = env_factory.createEnvs( num_processes, 'pybullet', 'ramp_improvise_house_building_3_deconstruct', self.env_config, self.planner_config) total = 0 s = 0 step_times = [] env.reset() pbar = tqdm(total=1000) steps = [0 for i in range(num_processes)] while total < 1000: t0 = time.time() action = env.getNextAction() t_plan = time.time() - t0 (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False) t_action = time.time() - t0 - t_plan t = time.time() - t0 step_times.append(t) steps = list(map(lambda x: x + 1, steps)) num_objects = [len(p) for p in env.getObjectPositions()] for i in range(num_processes): if dones[i]: if steps[i] == 2 * (num_objects[i] - 1): s += 1 total += 1 steps[i] = 0 done_idxes = np.nonzero(dones)[0] if done_idxes.shape[0] != 0: env.reset_envs(done_idxes) pbar.set_description( '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}' .format(s, total, float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times))) pbar.update(total - pbar.n) env.close()
def testPlanner2(self): self.env_config['render'] = False self.env_config['reward_type'] = 'sparse' self.env_config['random_orientation'] = True self.env_config['num_objects'] = 5 self.env_config['num_random_objects'] = 3 env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_1', self.env_config, self.planner_config) total = 0 s = 0 env.reset() while total < 1000: states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction()) if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total))
import unittest import time import numpy as np import torch import matplotlib.pyplot as plt from helping_hands_rl_envs.envs.house_building_3_env import createHouseBuilding3Env from helping_hands_rl_envs.envs.pybullet_env import PyBulletEnv from helping_hands_rl_envs import env_factory workspace = np.asarray([[0.35, 0.65], [-0.15, 0.15], [0, 0.50]]) env_config = {'workspace': workspace, 'max_steps': 10, 'obs_size': 90, 'render': True, 'fast_mode': True, 'seed': 0, 'action_sequence': 'pxyr', 'num_objects': 4, 'random_orientation': True, 'reward_type': 'sparse', 'simulate_grasp': True, 'perfect_grasp': False, 'robot': 'ur5_robotiq', 'workspace_check': 'point', 'in_hand_mode': 'raw'} env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', env_config, {}) total = 0 s = 0 env.reset() while total < 1000: states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) # plt.imshow(in_hands_.squeeze()) # plt.show() if dones.sum(): s += rewards.sum().int().item() total += dones.sum().int().item() print('{}/{}'.format(s, total))
def testStepLeft(self): num_random_o = 2 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_2', self.env_config, {}) env.reset() positions = env.getObjPositions()[0] # pick up the roof action = [ 0, positions[2 + num_random_o][0], positions[2 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 5) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepLeft(), 4) self.assertEqual(dones, 0) positions = env.getObjPositions()[0] action = [ 0, positions[1 + num_random_o][0], positions[1 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 3) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepLeft(), 2) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepLeft(), 1) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepLeft(), 0) self.assertEqual(dones, 1) env.close() # def testPlanner2(self): # self.env_config['render'] = False # self.env_config['reward_type'] = 'sparse' # self.env_config['random_orientation'] = True # env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_2', self.env_config, {}) # total = 0 # s = 0 # env.reset() # while total < 1000: # states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction()) # if dones.sum(): # s += rewards.sum().int().item() # total += dones.sum().int().item() # print('{}/{}'.format(s, total))
import time import numpy as np import matplotlib.pyplot as plt import torch import helping_hands_rl_envs.env_factory as env_factory workspace = np.array([[0.35, 0.65], [-0.15, 0.15], [0, 1]]) env_config = {'workspace': workspace, 'max_steps': 10, 'obs_size': 128, 'action_sequence': 'pxy', 'num_cubes': 2, 'render': True, 'fast_mode': True, 'random_orientation': True} planner_config = {'pos_noise': 0.0} envs = env_factory.createEnvs(1, 'data', 'pybullet', 'brick_stacking', env_config, planner_config=planner_config) state, hand_obs, obs = envs.reset() done = False while not done: plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=0.1); plt.show() action = envs.getNextAction() state_, hand_obs_, obs_, reward, done, valid = envs.step(action) obs = obs_ hand_obs = hand_obs_ plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=0.1); plt.show()
import time import numpy as np import matplotlib.pyplot as plt import torch import helping_hands_rl_envs.env_factory as env_factory workspace = np.array([[0, 128], [0, 128], [0, 100]]) env_config = { 'workspace': workspace, 'max_steps': 10, 'obs_size': 128, 'action_sequence': 'pxy', 'num_objects': 2, 'render': False } envs = env_factory.createEnvs(1, 'data', 'numpy', 'block_stacking', env_config) state, obs = envs.reset() done = False while not done: plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=15) plt.show() action = envs.getNextAction() state_, obs_, reward, done = envs.step(action) obs = obs_ plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=15) plt.show()
def testStepLeft(self): num_random_o = 2 self.env_config['num_random_objects'] = num_random_o self.env_config['render'] = True env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_1', self.env_config, self.planner_config) env.reset() position = env.getObjPositions()[0] action = [ 0, position[0 + num_random_o][0], position[0 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 5) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction()) self.assertEqual(env.getStepLeft(), 4) self.assertEqual(dones, 0) position = env.getObjPositions()[0] action = [ 0, position[1 + num_random_o][0], position[1 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 3) self.assertEqual(dones, 0) position = env.getObjPositions()[0] action = [ 1, position[1 + num_random_o][0], position[1 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 2) self.assertEqual(dones, 0) position = env.getObjPositions()[0] action = [ 0, position[1 + num_random_o][0], position[1 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 3) self.assertEqual(dones, 0) position = env.getObjPositions()[0] action = [ 1, position[1 + num_random_o][0], position[1 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 2) self.assertEqual(dones, 0) position = env.getObjPositions()[0] action = [ 0, position[0 + num_random_o][0], position[0 + num_random_o][1], 0 ] states_, in_hands_, obs_, rewards, dones = env.step( torch.tensor(action).unsqueeze(0), auto_reset=False) self.assertEqual(env.getStepLeft(), 1) self.assertEqual(dones, 0) states_, in_hands_, obs_, rewards, dones = env.step( env.getNextAction(), auto_reset=False) self.assertEqual(env.getStepLeft(), 0) self.assertEqual(dones, 1) env.close()