Ejemplo n.º 1
0
    def reward(self):
        """Get delta rewards for current timestep.

        Returns:
          A tuple consisting of the scalar (delta) reward, plus `extras`
            dict which has extra task-dependent info from the process of
            computing rewards that gives us finer-grained details. Use
            `extras` for further data analysis.
        """
        reward, info = 0, {}

        # Unpack next goal step.
        objs, matches, targs, _, _, metric, params, max_reward = self.goals[0]

        # Evaluate by matching object poses.
        if metric == 'pose':
            step_reward = 0
            for i in range(len(objs)):
                object_id, (symmetry, _) = objs[i]
                pose = p.getBasePositionAndOrientation(object_id)
                targets_i = np.argwhere(matches[i, :]).reshape(-1)
                for j in targets_i:
                    target_pose = targs[j]
                    if self.is_match(pose, target_pose, symmetry):
                        step_reward += max_reward / len(objs)
                        break

        # Evaluate by measuring object intersection with zone.
        elif metric == 'zone':
            zone_pts, total_pts = 0, 0
            obj_pts, zones = params
            for zone_pose, zone_size in zones:

                # Count valid points in zone.
                for obj_id in obj_pts:
                    pts = obj_pts[obj_id]
                    obj_pose = p.getBasePositionAndOrientation(obj_id)
                    world_to_zone = utils.invert(zone_pose)
                    obj_to_zone = utils.multiply(world_to_zone, obj_pose)
                    pts = np.float32(utils.apply(obj_to_zone, pts))
                    if len(zone_size) > 1:
                        valid_pts = np.logical_and.reduce([
                            pts[0, :] > -zone_size[0] / 2, pts[0, :] < zone_size[0] / 2,
                            pts[1, :] > -zone_size[1] / 2, pts[1, :] < zone_size[1] / 2,
                            pts[2, :] < self.bounds[2, 1]])

                    zone_pts += np.sum(np.float32(valid_pts))
                    total_pts += pts.shape[1]
            step_reward = max_reward * (zone_pts / total_pts)

        # Get cumulative rewards and return delta.
        reward = self.progress + step_reward - self._rewards
        self._rewards = self.progress + step_reward

        # Move to next goal step if current goal step is complete.
        if np.abs(max_reward - step_reward) < 0.01:
            self.progress += max_reward  # Update task progress.
            self.goals.pop(0)

        return reward, info
Ejemplo n.º 2
0
    def reset(self, env):
        super().reset(env)

        # Add stand.
        base_size = (0.12, 0.36, 0.01)
        base_urdf = 'hanoi/stand.urdf'
        base_pose = self.get_random_pose(env, base_size)
        env.add_object(base_urdf, base_pose, 'fixed')

        # Rod positions in base coordinates.
        rod_pos = ((0, -0.12, 0.03), (0, 0, 0.03), (0, 0.12, 0.03))

        # Add disks.
        disks = []
        n_disks = 3
        for i in range(n_disks):
            disk_urdf = 'hanoi/disk%d.urdf' % i
            pos = utils.apply(base_pose, rod_pos[0])
            z = 0.015 * (n_disks - i - 2)
            pos = (pos[0], pos[1], pos[2] + z)
            disks.append(env.add_object(disk_urdf, (pos, base_pose[1])))

        # Solve Hanoi sequence with dynamic programming.
        hanoi_steps = []  # [[object index, from rod, to rod], ...]

        def solve_hanoi(n, t0, t1, t2):
            if n == 0:
                hanoi_steps.append([n, t0, t1])
                return
            solve_hanoi(n - 1, t0, t2, t1)
            hanoi_steps.append([n, t0, t1])
            solve_hanoi(n - 1, t2, t1, t0)

        solve_hanoi(n_disks - 1, 0, 2, 1)

        # Goal: pick and place disks using Hanoi sequence.
        for step in hanoi_steps:
            disk_id = disks[step[0]]
            targ_pos = rod_pos[step[2]]
            targ_pos = utils.apply(base_pose, targ_pos)
            targ_pose = (targ_pos, (0, 0, 0, 1))
            self.goals.append(
                ([(disk_id, (0, None))], np.int32([[1]]), [targ_pose], False,
                 True, 'pose', None, 1 / len(hanoi_steps)))
Ejemplo n.º 3
0
    def reset(self, env):
        super().reset(env)

        # Add base.
        base_size = (0.05, 0.15, 0.005)
        base_urdf = 'stacking/stand.urdf'
        base_pose = self.get_random_pose(env, base_size)
        env.add_object(base_urdf, base_pose, 'fixed')

        # Block colors.
        colors = [
            utils.COLORS['purple'], utils.COLORS['blue'],
            utils.COLORS['green'], utils.COLORS['yellow'],
            utils.COLORS['orange'], utils.COLORS['red']
        ]

        # Add blocks.
        objs = []
        # sym = np.pi / 2
        block_size = (0.04, 0.04, 0.04)
        block_urdf = 'stacking/block.urdf'
        for i in range(6):
            block_pose = self.get_random_pose(env, block_size)
            block_id = env.add_object(block_urdf, block_pose)
            p.changeVisualShape(block_id, -1, rgbaColor=colors[i] + [1])
            objs.append((block_id, (np.pi / 2, None)))

        # Associate placement locations for goals.
        place_pos = [(0, -0.05, 0.03), (0, 0, 0.03), (0, 0.05, 0.03),
                     (0, -0.025, 0.08), (0, 0.025, 0.08), (0, 0, 0.13)]
        targs = [(utils.apply(base_pose, i), base_pose[1]) for i in place_pos]

        # Goal: blocks are stacked in a pyramid (bottom row: green, blue, purple).
        self.goals.append((objs[:3], np.ones(
            (3, 3)), targs[:3], False, True, 'pose', None, 1 / 2))

        # Goal: blocks are stacked in a pyramid (middle row: yellow, orange).
        self.goals.append((objs[3:5], np.ones(
            (2, 2)), targs[3:5], False, True, 'pose', None, 1 / 3))

        # Goal: blocks are stacked in a pyramid (top row: red).
        self.goals.append((objs[5:], np.ones(
            (1, 1)), targs[5:], False, True, 'pose', None, 1 / 6))
Ejemplo n.º 4
0
    def reset(self, env):
        super().reset(env)

        # Add kit.
        kit_size = (0.39, 0.3, 0.0005)
        kit_urdf = 'kitting/kit.urdf'
        kit_pose = self.get_pose(env, kit_size)
        env.add_object(kit_urdf, kit_pose, 'fixed')

        if self.mode == 'train':
            n_objects = 3
            obj_shapes = np.random.choice(self.train_set, n_objects)
        else:
            if self.homogeneous:
                n_objects = 2
                obj_shapes = [np.random.choice(self.test_set)] * n_objects
            else:
                n_objects = 2
                obj_shapes = np.random.choice(self.test_set, n_objects)

        colors = [
            utils.COLORS['purple'], utils.COLORS['blue'], utils.COLORS['green'],
            utils.COLORS['yellow'], utils.COLORS['red']
        ]

        symmetry = [
            2 * np.pi, 2 * np.pi, 2 * np.pi / 3, np.pi / 2, np.pi / 2, 2 * np.pi,
            np.pi, 2 * np.pi / 5, np.pi, np.pi / 2, 2 * np.pi / 5, 0, 2 * np.pi,
            2 * np.pi, 2 * np.pi, 2 * np.pi, 0, 2 * np.pi / 6, 2 * np.pi, 2 * np.pi
        ]

        # Build kit.
        targets = []
        if self.mode == 'test':
            targ_pos = [[0, -0.05, -0.001],
                        [0, 0.05, -0.001]]
        else:
            targ_pos = [[-0.125, 0.12, -0.001],
                        [0.072, 0.07, -0.001],
                        [0.125, -0.13, -0.001]]

        template = 'kitting/object-template.urdf'
        for i in range(n_objects):
            shape = os.path.join(self.assets_root, 'kitting',
                                 f'{obj_shapes[i]:02d}.obj')
            if obj_shapes[i] == 7:
                scale = [0.006, 0.006, 0.0007]
            else:
                scale = [0.006, 0.006, 0.002]  # .0005
            pos = utils.apply(kit_pose, targ_pos[i])
            if self.mode == 'train':
                theta = np.random.rand() * 2 * np.pi
                rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta))
            else:
                rot = utils.eulerXYZ_to_quatXYZW((0, 0, 1.57))
            replace = {'FNAME': (shape,), 'SCALE': scale, 'COLOR': (0.2, 0.2, 0.2)}
            urdf = self.fill_template(template, replace)
            env.add_object(urdf, (pos, rot), 'fixed')
            os.remove(urdf)
            targets.append((pos, rot))

        # Add objects.
        objects = []
        matches = []
        for i in range(n_objects):
            shape = obj_shapes[i]
            size = (0.08, 0.08, 0.02)
            pose = self.get_random_pose(env, size)
            fname = f'{shape:02d}.obj'
            fname = os.path.join(self.assets_root, 'kitting', fname)
            scale = [0.006, 0.006, 0.002]
            replace = {'FNAME': (fname,), 'SCALE': scale, 'COLOR': colors[i]}
            urdf = self.fill_template(template, replace)
            block_id = env.add_object(urdf, pose)
            os.remove(urdf)
            objects.append((block_id, (symmetry[shape], None)))
            match = np.zeros(len(targets))
            match[np.argwhere(obj_shapes == shape).reshape(-1)] = 1
            matches.append(match)
        matches = np.int32(matches)
        self.goals.append((objects, matches, targets, False, True, 'pose', None, 1))
Ejemplo n.º 5
0
  def reset(self, env):
    super().reset(env)

    n_parts = 20
    radius = 0.005
    length = 2 * radius * n_parts * np.sqrt(2)

    # Add 3-sided square.
    square_size = (length, length, 0)
    square_pose = self.get_random_pose(env, square_size)
    square_template = 'square/square-template.urdf'
    replace = {'DIM': (length,), 'HALF': (length / 2 - 0.005,)}
    urdf = self.fill_template(square_template, replace)
    env.add_object(urdf, square_pose, 'fixed')
    os.remove(urdf)

    # Get corner points of square.
    corner0 = (length / 2, length / 2, 0.001)
    corner1 = (-length / 2, length / 2, 0.001)
    corner0 = utils.apply(square_pose, corner0)
    corner1 = utils.apply(square_pose, corner1)

    # Add cable (series of articulated small blocks).
    increment = (np.float32(corner1) - np.float32(corner0)) / n_parts
    position, _ = self.get_random_pose(env, (0.1, 0.1, 0.1))
    position = np.float32(position)
    part_shape = p.createCollisionShape(p.GEOM_BOX, halfExtents=[radius] * 3)
    part_visual = p.createVisualShape(p.GEOM_SPHERE, radius=radius * 1.5)
    parent_id = -1
    targets = []
    objects = []
    for i in range(n_parts):
      position[2] += np.linalg.norm(increment)
      part_id = p.createMultiBody(0.1, part_shape, part_visual,
                                  basePosition=position)
      if parent_id > -1:
        constraint_id = p.createConstraint(
            parentBodyUniqueId=parent_id,
            parentLinkIndex=-1,
            childBodyUniqueId=part_id,
            childLinkIndex=-1,
            jointType=p.JOINT_POINT2POINT,
            jointAxis=(0, 0, 0),
            parentFramePosition=(0, 0, np.linalg.norm(increment)),
            childFramePosition=(0, 0, 0))
        p.changeConstraint(constraint_id, maxForce=100)
      if (i > 0) and (i < n_parts - 1):
        color = utils.COLORS['red'] + [1]
        p.changeVisualShape(part_id, -1, rgbaColor=color)
      env.obj_ids['rigid'].append(part_id)
      parent_id = part_id
      target_xyz = np.float32(corner0) + i * increment + increment / 2
      objects.append((part_id, (0, None)))
      targets.append((target_xyz, (0, 0, 0, 1)))

    matches = np.clip(np.eye(n_parts) + np.eye(n_parts)[::-1], 0, 1)

    self.goals.append((objects, matches, targets,
                       False, False, 'pose', None, 1))

    for i in range(480):
      p.stepSimulation()
Ejemplo n.º 6
0
    def reset(self, env):
        super().reset(env)

        # Add kit.
        kit_size = (0.28, 0.2, 0.005)
        kit_urdf = 'kitting/kit.urdf'
        kit_pose = self.get_random_pose(env, kit_size)
        env.add_object(kit_urdf, kit_pose, 'fixed')

        n_objects = 5
        if self.mode == 'train':
            obj_shapes = np.random.choice(self.train_set, n_objects)
        else:
            if self.homogeneous:
                obj_shapes = [np.random.choice(self.test_set)] * n_objects
            else:
                obj_shapes = np.random.choice(self.test_set, n_objects)

        colors = [
            utils.COLORS['purple'], utils.COLORS['blue'],
            utils.COLORS['green'], utils.COLORS['yellow'], utils.COLORS['red']
        ]

        symmetry = [
            2 * np.pi, 2 * np.pi, 2 * np.pi / 3, np.pi / 2, np.pi / 2,
            2 * np.pi, np.pi, 2 * np.pi / 5, np.pi, np.pi / 2, 2 * np.pi / 5,
            0, 2 * np.pi, 2 * np.pi, 2 * np.pi, 2 * np.pi, 0, 2 * np.pi / 6,
            2 * np.pi, 2 * np.pi
        ]

        # Build kit.
        targets = []
        targ_pos = [[-0.09, 0.045, 0.0014], [0, 0.045, 0.0014],
                    [0.09, 0.045, 0.0014], [-0.045, -0.045, 0.0014],
                    [0.045, -0.045, 0.0014]]
        template = 'kitting/object-template.urdf'
        for i in range(n_objects):
            shape = os.path.join(self.assets_root, 'kitting',
                                 f'{obj_shapes[i]:02d}.obj')
            scale = [0.003, 0.003, 0.0001]  # .0005
            pos = utils.apply(kit_pose, targ_pos[i])
            theta = np.random.rand() * 2 * np.pi
            rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta))
            replace = {
                'FNAME': (shape, ),
                'SCALE': scale,
                'COLOR': (0.2, 0.2, 0.2)
            }
            urdf = self.fill_template(template, replace)
            env.add_object(urdf, (pos, rot), 'fixed')
            os.remove(urdf)
            targets.append((pos, rot))

        # Add objects.
        objects = []
        matches = []
        # objects, syms, matcheses = [], [], []
        for i in range(n_objects):
            shape = obj_shapes[i]
            size = (0.08, 0.08, 0.02)
            pose = self.get_random_pose(env, size)
            fname = f'{shape:02d}.obj'
            fname = os.path.join(self.assets_root, 'kitting', fname)
            scale = [0.003, 0.003, 0.001]  # .0005
            replace = {'FNAME': (fname, ), 'SCALE': scale, 'COLOR': colors[i]}
            urdf = self.fill_template(template, replace)
            block_id = env.add_object(urdf, pose)
            os.remove(urdf)
            objects.append((block_id, (symmetry[shape], None)))
            # objects[block_id] = symmetry[shape]
            match = np.zeros(len(targets))
            match[np.argwhere(obj_shapes == shape).reshape(-1)] = 1
            matches.append(match)
            # print(targets)
            # exit()
            # matches.append(list(np.argwhere(obj_shapes == shape).reshape(-1)))
        matches = np.int32(matches)
        # print(matcheses)
        # exit()

        # Add goal.
        # self.goals.append((objects, syms, targets, 'matches', 'pose', 1.))

        # Goal: objects are placed in their respective kit locations.
        # print(objects)
        # print(matches)
        # print(targets)
        # exit()
        self.goals.append(
            (objects, matches, targets, False, True, 'pose', None, 1))