def test(self):
        self.env_config['render'] = False
        self.env_config['seed'] = 2
        self.env_config['random_orientation'] = False

        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3',
                                     self.env_config)
        env.reset()

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 5)

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 4)

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 3)

        env.saveToFile('save')
        env.close()
        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3',
                                     self.env_config)
        env.reset()
        env.loadFromFile('save')

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 2)

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 1)

        action = env.getNextAction()
        states_, in_hands_, obs_, rewards, dones = env.step(action,
                                                            auto_reset=False)
        self.assertEqual(env.getStepLeft(), 0)

        env.saveToFile('save')
        env.close()
        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3',
                                     self.env_config)
        env.reset()
        env.loadFromFile('save')

        env.close()
Beispiel #2
0
  def testPlanner(self):
    # self.env_config['render'] = True
    #
    # env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_4', self.env_config, {})
    # total = 0
    # s = 0
    # step_times = []
    # env.reset()
    # pbar = tqdm(total=1000)
    # while total < 1000:
    #   t0 = time.time()
    #   action = env.getNextAction()
    #   t_plan = time.time() - t0
    #   states_, in_hands_, obs_, rewards, dones = env.step(action)
    #   t_action = time.time() - t0 - t_plan
    #   t = time.time() - t0
    #   step_times.append(t)
    #
    #   if dones.sum():
    #     s += rewards.sum().int().item()
    #     total += dones.sum().int().item()
    #
    #   pbar.set_description(
    #     '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}'
    #       .format(s, total, float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times))
    #   )
    #   pbar.update(dones.sum().int().item())
    # env.close()

    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_4', self.env_config, {})
    while True:
      env.reset()
    def testPlanner(self):
        self.env_config['render'] = False
        self.env_config['random_orientation'] = True
        self.env_config['num_objects'] = 5

        env = env_factory.createEnvs(1, 'pybullet', 'house_building_1',
                                     self.env_config, self.planner_config)
        total = 0
        s = 0
        step_times = []
        env.reset()
        pbar = tqdm(total=1000)
        while total < 1000:
            t0 = time.time()
            action = env.getNextAction()
            t_plan = time.time() - t0
            (states_, in_hands_,
             obs_), rewards, dones = env.step(action, auto_reset=True)
            t_action = time.time() - t0 - t_plan
            t = time.time() - t0
            step_times.append(t)

            s += rewards.sum()

            if dones.sum():
                total += dones.sum()

                pbar.set_description(
                    '{:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}'
                    .format(
                        float(s) / total if total != 0 else 0, t_plan,
                        t_action, np.mean(step_times)))
            pbar.update(dones.sum())
        env.close()
    def testPlanner(self):
        self.env_config['render'] = True
        self.env_config['seed'] = 1
        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'brick_stacking',
                                     self.env_config)
        env.reset()
        for i in range(3, -1, -1):
            action = env.getNextAction()
            states_, in_hands_, obs_, rewards, dones = env.step(
                action, auto_reset=False)
            self.assertEqual(env.getStepLeft(), i)
        env.close()

    # def testPlanner2(self):
    # self.env_config['render'] = False
    # self.env_config['reward_type'] = 'sparse'
    # self.env_config['random_orientation'] = True
    #
    # env = env_factory.createEnvs(10, 'rl', 'pybullet', 'brick_stacking', self.env_config, {})
    # total = 0
    # s = 0
    # env.reset()
    # while total < 1000:
    #   states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
    #   if dones.sum():
    #     s += rewards.sum().int().item()
    #     total += dones.sum().int().item()
    #     print('{}/{}'.format(s, total))
Beispiel #5
0
  def testPlanner(self):
    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'random_picking', self.env_config, {})
    total = 0
    s = 0
    step_times = []
    env.reset()
    pbar = tqdm(total=1000)
    while total < 1000:
      t0 = time.time()
      action = env.getNextAction()
      t_plan = time.time() - t0
      states_, in_hands_, obs_, rewards, dones = env.step(action)
      t_action = time.time() - t0 - t_plan
      t = time.time() - t0
      step_times.append(t)

      s += rewards.sum().int().item()

      if dones.sum():
        total += dones.sum().int().item()

        pbar.set_description(
          '{:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}'
            .format(float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times))
        )
      pbar.update(dones.sum().int().item())
    env.close()
    def testPlanner(self):
        env = env_factory.createEnvs(1, 'rl', 'numpy', 'block_stacking',
                                     self.env_config)
        states_, in_hands_, obs_ = env.reset()
        plt.imshow(obs_.squeeze())
        plt.show()
        for i in range(5, -1, -1):
            action = env.getNextAction()
            states_, in_hands_, obs_, rewards, dones = env.step(
                action, auto_reset=False)
            plt.imshow(obs_.squeeze())
            plt.show()
            self.assertEqual(env.getStepLeft(), i)
        env.close()

    # def testPlanner2(self):
    #   self.env_config['render'] = False
    #   self.env_config['reward_type'] = 'sparse'
    #   self.env_config['random_orientation'] = True
    #   self.env_config['num_objects'] = 4
    #
    #   env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_1', self.env_config, {})
    #   total = 0
    #   s = 0
    #   env.reset()
    #   while total < 1000:
    #     states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
    #     if dones.sum():
    #       s += rewards.sum().int().item()
    #       total += dones.sum().int().item()
    #       print('{}/{}'.format(s, total))
Beispiel #7
0
    def testPlanner2(self):
        self.env_config['render'] = False
        self.env_config['reward_type'] = 'sparse'
        self.env_config['random_orientation'] = True
        self.env_config['num_objects'] = 4

        env = env_factory.createEnvs(20, 'rl', 'pybullet', 'block_stacking',
                                     self.env_config, {'half_rotation': True})
        total = 0
        s = 0
        states, in_hands, obs = env.reset()
        while total < 1000:
            action = env.getNextAction()
            states_, in_hands_, obs_, rewards, dones = env.step(action)

            # pixel_x = ((action[0, 1] - self.workspace[0][0]) / self.heightmap_resolution).long()
            # pixel_y = ((action[0, 2] - self.workspace[1][0]) / self.heightmap_resolution).long()
            # pixel_x = torch.clamp(pixel_x, 0, 90 - 1).item()
            # pixel_y = torch.clamp(pixel_y, 0, 90 - 1).item()
            # fig, axs = plt.subplots(1, 2, figsize=(10,5))
            # axs[0].imshow(obs.squeeze())
            # axs[1].imshow(obs_.squeeze())
            # axs[0].scatter(pixel_y, pixel_x, c='r')
            # axs[1].scatter(pixel_y, pixel_x, c='r')
            # fig.show()

            obs = obs_
            if dones.sum():
                s += rewards.sum().int().item()
                total += dones.sum().int().item()
                print('{}/{}'.format(s, total))
  def testPlanner2(self):
    self.env_config['render'] = True
    self.env_config['seed'] = 0
    self.env_config['physics_mode'] = 'fast'
    num_processes = 5
    env = env_factory.createEnvs(num_processes, 'pybullet', 'block_stacking', self.env_config, self.planner_config)
    total = 0
    s = 0
    step_times = []
    env.reset()
    pbar = tqdm(total=1000)
    while total < 1000:
      t0 = time.time()
      action = env.getNextAction()
      t_plan = time.time() - t0
      (states_, in_hands_, obs_), rewards, dones = env.step(action)
      s += rewards.sum()
      total += dones.sum()
      t_action = time.time() - t0 - t_plan
      t = time.time() - t0
      step_times.append(t)

      pbar.set_description(
        '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}'
          .format(s, total, float(s) / total if total != 0 else 0, t_plan, t_action, np.mean(step_times))
      )
    env.close()
  def testPlanner2(self):
    self.env_config['render'] = False
    self.env_config['reward_type'] = 'sparse'
    self.env_config['random_orientation'] = True
    self.env_config['robot'] = 'kuka'
    env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_4', self.env_config, {})
    total = 0
    s = 0
    env.reset()
    while total < 1000:
      t0 = time.time()
      action = env.getNextAction()
      # print('plan time: {}'.format(time.time()-t0))
      t0 = time.time()
      states_, in_hands_, obs_, rewards, dones = env.step(action)
      # print('step time: {}'.format(time.time()-t0))
      # plt.imshow(in_hands_.squeeze())
      # plt.colorbar()
      # plt.show()
      if dones.sum():
        s += rewards.sum().int().item()
        total += dones.sum().int().item()
        print('{}/{}'.format(s, total))

    ## 0.837 kuka
    ## 0.951 ur5
    ## 0.950 ur5 robotiq
Beispiel #10
0
  def testSuccess(self):
    self.env_config['seed'] = 0
    self.env_config['random_orientation'] = False
    num_random_o = 2
    self.env_config['num_random_objects'] = num_random_o
    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config)
    env.reset()

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 5)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 4)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 3)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 2)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 1)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 0)

    env.close()
 def testReset(self):
     self.env_config['render'] = True
     num_processes = 1
     env = env_factory.createEnvs(num_processes, 'pybullet',
                                  'ramp_block_stacking', self.env_config,
                                  self.planner_config)
     while True:
         states, hand_obs, depths = env.reset()
         print(1)
Beispiel #12
0
 def testPlanner(self):
   # env = createHouseBuilding3Env(PyBulletEnv, self.env_config)()
   env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config)
   env.reset()
   for i in range(5, -1, -1):
     action = env.getNextAction()
     states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
     self.assertEqual(env.getStepLeft(), i)
   env.close()
 def testPlanner(self):
     self.env_config['render'] = True
     num_processes = 1
     env = env_factory.createEnvs(num_processes, 'pybullet',
                                  'ramp_house_building_1', self.env_config,
                                  self.planner_config)
     while True:
         env.reset()
         print(1)
 def testPlanner(self):
   self.env_config['render'] = True
   self.env_config['seed'] = 0
   env = env_factory.createEnvs(1, 'pybullet', 'improvise_house_building_3_deconstruct', self.env_config, self.planner_config)
   env.reset()
   for i in range(5, -1, -1):
     action = env.getNextAction()
     (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False)
   self.assertEqual(dones, 1)
   env.close()
Beispiel #15
0
  def testBlockValidBrickOrRoofOnBlock(self):
    self.env_config['seed'] = 1
    self.env_config['random_orientation'] = False
    num_random_o = 2
    self.env_config['num_random_objects'] = num_random_o
    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', self.env_config)
    env.reset()

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 5)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 4)

    env.save()
    position = list(env.getObjPositions())[0]
    action = torch.tensor([0, position[3+num_random_o][0], position[3+num_random_o][1], 0]).unsqueeze(0)
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 3)

    action = torch.tensor([1, position[0+num_random_o][0], position[0+num_random_o][1], 0]).unsqueeze(0)
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 4)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 3)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 2)
    env.restore()

    position = list(env.getObjPositions())[0]
    action = torch.tensor([0, position[2+num_random_o][0], position[2+num_random_o][1], 0]).unsqueeze(0)
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 5)

    action = torch.tensor([1, position[0+num_random_o][0], position[0+num_random_o][1], 0]).unsqueeze(0)
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 6)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 5)

    action = env.getNextAction()
    states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
    self.assertEqual(env.getStepLeft(), 4)

    env.close()
Beispiel #16
0
 def testStepLeft(self):
     self.env_config['seed'] = 1
     env = env_factory.createEnvs(1, 'pybullet', 'house_building_4',
                                  self.env_config, self.planner_config)
     env.reset()
     for i in range(9, -1, -1):
         action = env.getNextAction()
         (states_, in_hands_,
          obs_), rewards, dones = env.step(action, auto_reset=False)
         self.assertEqual(env.getStepsLeft(), i)
     env.close()
 def testPlanner(self):
   self.env_config['render'] = True
   env = env_factory.createEnvs(1, 'pybullet', 'block_stacking', self.env_config, self.planner_config)
   env.reset()
   for i in range(5, -1, -1):
     action = env.getNextAction()
     (states_, in_hands_, obs_), rewards, dones = env.step(action, auto_reset=False)
     self.assertEqual(env.getStepsLeft(), i)
   self.assertEqual(rewards, 1)
   self.assertEqual(dones, 1)
   env.close()
Beispiel #18
0
    def testStepLeft(self):
        num_random_o = 0
        self.env_config['num_random_objects'] = num_random_o
        self.env_config['render'] = True
        env = env_factory.createEnvs(1, 'pybullet', 'house_building_2',
                                     self.env_config, self.planner_config)
        env.reset()

        positions = env.getObjectPositions()[0]
        # pick up the roof
        action = [[
            0, positions[2 + num_random_o][0], positions[2 + num_random_o][1],
            0
        ]]
        (states_, in_hands_, obs_), rewards, dones = env.step(np.array(action),
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 5)
        self.assertEqual(dones, 0)

        (states_, in_hands_,
         obs_), rewards, dones = env.step(env.getNextAction(),
                                          auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 4)
        self.assertEqual(dones, 0)

        positions = env.getObjectPositions()[0]
        action = [[
            0, positions[1 + num_random_o][0], positions[1 + num_random_o][1],
            0
        ]]
        (states_, in_hands_, obs_), rewards, dones = env.step(np.array(action),
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 3)
        self.assertEqual(dones, 0)

        (states_, in_hands_,
         obs_), rewards, dones = env.step(env.getNextAction(),
                                          auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 2)
        self.assertEqual(dones, 0)

        (states_, in_hands_,
         obs_), rewards, dones = env.step(env.getNextAction(),
                                          auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 1)
        self.assertEqual(dones, 0)

        (states_, in_hands_,
         obs_), rewards, dones = env.step(env.getNextAction(),
                                          auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 0)
        self.assertEqual(dones, 1)

        env.close()
    def testBlockNotValidRoofOnBrickOnBlock(self):
        self.env_config['seed'] = 0
        self.env_config['random_orientation'] = False
        self.env_config['render'] = True

        env = env_factory.createEnvs(1, 'pybullet', 'house_building_3',
                                     self.env_config, self.planner_config)
        env.reset()

        env.save()
        position = list(env.getObjectPositions())[0]
        action = np.array([[0, position[1][0], position[1][1], 0]])
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 7)

        action = np.array([[1, position[2][0], position[2][1], 0]])
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 8)

        position = list(env.getObjectPositions())[0]
        action = np.array([[0, position[0][0], position[0][1], 0]])
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 9)

        action = np.array([[1, position[2][0], position[2][1], 0]])
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 10)

        action = env.getNextAction()
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 9)

        action = env.getNextAction()
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 8)

        action = env.getNextAction()
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 7)

        action = env.getNextAction()
        (states_, in_hands_, obs_), rewards, dones = env.step(action,
                                                              auto_reset=False)
        self.assertEqual(env.getStepsLeft(), 6)

        env.close()
Beispiel #20
0
  def test(self):
    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_5', self.env_config, {})
    env.reset()
    position = env.getObjPositions()[0]
    action = [0, position[0][0], position[0][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 0)

    position = env.getObjPositions()[0]
    action = [1, position[2][0], position[2][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 1)

    position = env.getObjPositions()[0]
    action = [0, position[0][0], position[0][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, -1)

    position = env.getObjPositions()[0]
    action = [1, position[2][0], position[2][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 1)

    position = env.getObjPositions()[0]
    action = [0, position[1][0], position[1][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 0)

    position = env.getObjPositions()[0]
    action = [1, position[3][0], position[3][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 1)

    position = env.getObjPositions()[0]
    action = [0, position[2][0], position[2][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 0)
    self.assertEqual(rewards, 0)

    position = env.getObjPositions()[0]
    action = [1, position[4][0], position[4][1], 0]
    states_, in_hands_, obs_, rewards, dones = env.step(torch.tensor(action).unsqueeze(0), auto_reset=False)
    self.assertEqual(dones, 1)
    self.assertEqual(rewards, 1)
Beispiel #21
0
  def testPlanner(self):
    self.env_config['render'] = True

    env = env_factory.createEnvs(1, 'rl', 'pybullet', 'improvise_house_building_2', self.env_config, {})
    total = 0
    s = 0
    env.reset()
    while total < 1000:
      states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
      if dones.sum():
        s += rewards.sum().int().item()
        total += dones.sum().int().item()
        print('{}/{}'.format(s, total))
    env.close()
 def testPlanner(self):
   self.env_config['seed'] = 0
   num_random_o = 2
   self.env_config['num_random_objects'] = num_random_o
   self.env_config['render'] = True
   env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_4', self.env_config)
   env.reset()
   for i in range(9, -1, -1):
     action = env.getNextAction()
     states_, in_hands_, obs_, rewards, dones = env.step(action, auto_reset=False)
     if i == 0:
       self.assertTrue(dones)
     else:
       self.assertFalse(dones)
     self.assertEqual(env.getStepLeft(), i)
   env.close()
Beispiel #23
0
 def testPlanner2(self):
   self.env_config['render'] = False
   self.env_config['reward_type'] = 'sparse'
   self.env_config['random_orientation'] = True
   self.env_config['robot'] = 'ur5_robotiq'
   env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_3', self.env_config, {})
   total = 0
   s = 0
   env.reset()
   while total < 1000:
     states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
     # plt.imshow(in_hands_.squeeze())
     # plt.show()
     if dones.sum():
       s += rewards.sum().int().item()
       total += dones.sum().int().item()
       print('{}/{}'.format(s, total))
Beispiel #24
0
    def testPlanner(self):
        self.env_config['render'] = False
        num_processes = 20
        env = env_factory.createEnvs(
            num_processes, 'pybullet',
            'ramp_improvise_house_building_3_deconstruct', self.env_config,
            self.planner_config)
        total = 0
        s = 0
        step_times = []
        env.reset()
        pbar = tqdm(total=1000)
        steps = [0 for i in range(num_processes)]
        while total < 1000:
            t0 = time.time()
            action = env.getNextAction()
            t_plan = time.time() - t0
            (states_, in_hands_,
             obs_), rewards, dones = env.step(action, auto_reset=False)
            t_action = time.time() - t0 - t_plan
            t = time.time() - t0
            step_times.append(t)

            steps = list(map(lambda x: x + 1, steps))
            num_objects = [len(p) for p in env.getObjectPositions()]

            for i in range(num_processes):
                if dones[i]:
                    if steps[i] == 2 * (num_objects[i] - 1):
                        s += 1
                    total += 1
                    steps[i] = 0
            done_idxes = np.nonzero(dones)[0]
            if done_idxes.shape[0] != 0:
                env.reset_envs(done_idxes)

            pbar.set_description(
                '{}/{}, SR: {:.3f}, plan time: {:.2f}, action time: {:.2f}, avg step time: {:.2f}'
                .format(s, total,
                        float(s) / total if total != 0 else 0, t_plan,
                        t_action, np.mean(step_times)))
            pbar.update(total - pbar.n)
        env.close()
    def testPlanner2(self):
        self.env_config['render'] = False
        self.env_config['reward_type'] = 'sparse'
        self.env_config['random_orientation'] = True
        self.env_config['num_objects'] = 5
        self.env_config['num_random_objects'] = 3

        env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_1',
                                     self.env_config, self.planner_config)
        total = 0
        s = 0
        env.reset()
        while total < 1000:
            states_, in_hands_, obs_, rewards, dones = env.step(
                env.getNextAction())
            if dones.sum():
                s += rewards.sum().int().item()
                total += dones.sum().int().item()
                print('{}/{}'.format(s, total))
Beispiel #26
0
import unittest
import time
import numpy as np
import torch
import matplotlib.pyplot as plt

from helping_hands_rl_envs.envs.house_building_3_env import createHouseBuilding3Env
from helping_hands_rl_envs.envs.pybullet_env import PyBulletEnv
from helping_hands_rl_envs import env_factory

workspace = np.asarray([[0.35, 0.65],
                          [-0.15, 0.15],
                          [0, 0.50]])
env_config = {'workspace': workspace, 'max_steps': 10, 'obs_size': 90, 'render': True, 'fast_mode': True,
              'seed': 0, 'action_sequence': 'pxyr', 'num_objects': 4, 'random_orientation': True,
              'reward_type': 'sparse', 'simulate_grasp': True, 'perfect_grasp': False, 'robot': 'ur5_robotiq',
              'workspace_check': 'point', 'in_hand_mode': 'raw'}

env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_3', env_config, {})
total = 0
s = 0
env.reset()
while total < 1000:
  states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
  # plt.imshow(in_hands_.squeeze())
  # plt.show()
  if dones.sum():
    s += rewards.sum().int().item()
    total += dones.sum().int().item()
    print('{}/{}'.format(s, total))
Beispiel #27
0
    def testStepLeft(self):
        num_random_o = 2
        self.env_config['num_random_objects'] = num_random_o
        self.env_config['render'] = True
        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_2',
                                     self.env_config, {})
        env.reset()

        positions = env.getObjPositions()[0]
        # pick up the roof
        action = [
            0, positions[2 + num_random_o][0], positions[2 + num_random_o][1],
            0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 5)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction(), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 4)
        self.assertEqual(dones, 0)

        positions = env.getObjPositions()[0]
        action = [
            0, positions[1 + num_random_o][0], positions[1 + num_random_o][1],
            0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 3)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction(), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 2)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction(), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 1)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction(), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 0)
        self.assertEqual(dones, 1)

        env.close()

    # def testPlanner2(self):
    #   self.env_config['render'] = False
    #   self.env_config['reward_type'] = 'sparse'
    #   self.env_config['random_orientation'] = True
    #   env = env_factory.createEnvs(10, 'rl', 'pybullet', 'house_building_2', self.env_config, {})
    #   total = 0
    #   s = 0
    #   env.reset()
    #   while total < 1000:
    #     states_, in_hands_, obs_, rewards, dones = env.step(env.getNextAction())
    #     if dones.sum():
    #       s += rewards.sum().int().item()
    #       total += dones.sum().int().item()
    #       print('{}/{}'.format(s, total))
Beispiel #28
0
import time
import numpy as np
import matplotlib.pyplot as plt
import torch

import helping_hands_rl_envs.env_factory as env_factory

workspace = np.array([[0.35, 0.65], [-0.15, 0.15], [0, 1]])
env_config = {'workspace': workspace, 'max_steps': 10, 'obs_size': 128, 'action_sequence': 'pxy',
              'num_cubes': 2, 'render': True, 'fast_mode': True, 'random_orientation': True}
planner_config = {'pos_noise': 0.0}
envs = env_factory.createEnvs(1, 'data', 'pybullet', 'brick_stacking', env_config, planner_config=planner_config)

state, hand_obs, obs = envs.reset()
done = False
while not done:
  plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=0.1); plt.show()
  action = envs.getNextAction()
  state_, hand_obs_, obs_, reward, done, valid = envs.step(action)

  obs = obs_
  hand_obs = hand_obs_

plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=0.1); plt.show()
Beispiel #29
0
import time
import numpy as np
import matplotlib.pyplot as plt
import torch

import helping_hands_rl_envs.env_factory as env_factory

workspace = np.array([[0, 128], [0, 128], [0, 100]])
env_config = {
    'workspace': workspace,
    'max_steps': 10,
    'obs_size': 128,
    'action_sequence': 'pxy',
    'num_objects': 2,
    'render': False
}
envs = env_factory.createEnvs(1, 'data', 'numpy', 'block_stacking', env_config)

state, obs = envs.reset()
done = False
while not done:
    plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=15)
    plt.show()
    action = envs.getNextAction()
    state_, obs_, reward, done = envs.step(action)

    obs = obs_
plt.imshow(obs.squeeze(), cmap='gray', vmin=0.0, vmax=15)
plt.show()
    def testStepLeft(self):
        num_random_o = 2
        self.env_config['num_random_objects'] = num_random_o
        self.env_config['render'] = True
        env = env_factory.createEnvs(1, 'rl', 'pybullet', 'house_building_1',
                                     self.env_config, self.planner_config)
        env.reset()

        position = env.getObjPositions()[0]
        action = [
            0, position[0 + num_random_o][0], position[0 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 5)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction())
        self.assertEqual(env.getStepLeft(), 4)
        self.assertEqual(dones, 0)

        position = env.getObjPositions()[0]
        action = [
            0, position[1 + num_random_o][0], position[1 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 3)
        self.assertEqual(dones, 0)

        position = env.getObjPositions()[0]
        action = [
            1, position[1 + num_random_o][0], position[1 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 2)
        self.assertEqual(dones, 0)

        position = env.getObjPositions()[0]
        action = [
            0, position[1 + num_random_o][0], position[1 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 3)
        self.assertEqual(dones, 0)

        position = env.getObjPositions()[0]
        action = [
            1, position[1 + num_random_o][0], position[1 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 2)
        self.assertEqual(dones, 0)

        position = env.getObjPositions()[0]
        action = [
            0, position[0 + num_random_o][0], position[0 + num_random_o][1], 0
        ]
        states_, in_hands_, obs_, rewards, dones = env.step(
            torch.tensor(action).unsqueeze(0), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 1)
        self.assertEqual(dones, 0)

        states_, in_hands_, obs_, rewards, dones = env.step(
            env.getNextAction(), auto_reset=False)
        self.assertEqual(env.getStepLeft(), 0)
        self.assertEqual(dones, 1)
        env.close()