def reward(self): """Get delta rewards for current timestep. Returns: A tuple consisting of the scalar (delta) reward, plus `extras` dict which has extra task-dependent info from the process of computing rewards that gives us finer-grained details. Use `extras` for further data analysis. """ reward, info = 0, {} # Unpack next goal step. objs, matches, targs, _, _, metric, params, max_reward = self.goals[0] # Evaluate by matching object poses. if metric == 'pose': step_reward = 0 for i in range(len(objs)): object_id, (symmetry, _) = objs[i] pose = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) for j in targets_i: target_pose = targs[j] if self.is_match(pose, target_pose, symmetry): step_reward += max_reward / len(objs) break # Evaluate by measuring object intersection with zone. elif metric == 'zone': zone_pts, total_pts = 0, 0 obj_pts, zones = params for zone_pose, zone_size in zones: # Count valid points in zone. for obj_id in obj_pts: pts = obj_pts[obj_id] obj_pose = p.getBasePositionAndOrientation(obj_id) world_to_zone = utils.invert(zone_pose) obj_to_zone = utils.multiply(world_to_zone, obj_pose) pts = np.float32(utils.apply(obj_to_zone, pts)) if len(zone_size) > 1: valid_pts = np.logical_and.reduce([ pts[0, :] > -zone_size[0] / 2, pts[0, :] < zone_size[0] / 2, pts[1, :] > -zone_size[1] / 2, pts[1, :] < zone_size[1] / 2, pts[2, :] < self.bounds[2, 1]]) zone_pts += np.sum(np.float32(valid_pts)) total_pts += pts.shape[1] step_reward = max_reward * (zone_pts / total_pts) # Get cumulative rewards and return delta. reward = self.progress + step_reward - self._rewards self._rewards = self.progress + step_reward # Move to next goal step if current goal step is complete. if np.abs(max_reward - step_reward) < 0.01: self.progress += max_reward # Update task progress. self.goals.pop(0) return reward, info
def reset(self, env): super().reset(env) # Add stand. base_size = (0.12, 0.36, 0.01) base_urdf = 'hanoi/stand.urdf' base_pose = self.get_random_pose(env, base_size) env.add_object(base_urdf, base_pose, 'fixed') # Rod positions in base coordinates. rod_pos = ((0, -0.12, 0.03), (0, 0, 0.03), (0, 0.12, 0.03)) # Add disks. disks = [] n_disks = 3 for i in range(n_disks): disk_urdf = 'hanoi/disk%d.urdf' % i pos = utils.apply(base_pose, rod_pos[0]) z = 0.015 * (n_disks - i - 2) pos = (pos[0], pos[1], pos[2] + z) disks.append(env.add_object(disk_urdf, (pos, base_pose[1]))) # Solve Hanoi sequence with dynamic programming. hanoi_steps = [] # [[object index, from rod, to rod], ...] def solve_hanoi(n, t0, t1, t2): if n == 0: hanoi_steps.append([n, t0, t1]) return solve_hanoi(n - 1, t0, t2, t1) hanoi_steps.append([n, t0, t1]) solve_hanoi(n - 1, t2, t1, t0) solve_hanoi(n_disks - 1, 0, 2, 1) # Goal: pick and place disks using Hanoi sequence. for step in hanoi_steps: disk_id = disks[step[0]] targ_pos = rod_pos[step[2]] targ_pos = utils.apply(base_pose, targ_pos) targ_pose = (targ_pos, (0, 0, 0, 1)) self.goals.append( ([(disk_id, (0, None))], np.int32([[1]]), [targ_pose], False, True, 'pose', None, 1 / len(hanoi_steps)))
def reset(self, env): super().reset(env) # Add base. base_size = (0.05, 0.15, 0.005) base_urdf = 'stacking/stand.urdf' base_pose = self.get_random_pose(env, base_size) env.add_object(base_urdf, base_pose, 'fixed') # Block colors. colors = [ utils.COLORS['purple'], utils.COLORS['blue'], utils.COLORS['green'], utils.COLORS['yellow'], utils.COLORS['orange'], utils.COLORS['red'] ] # Add blocks. objs = [] # sym = np.pi / 2 block_size = (0.04, 0.04, 0.04) block_urdf = 'stacking/block.urdf' for i in range(6): block_pose = self.get_random_pose(env, block_size) block_id = env.add_object(block_urdf, block_pose) p.changeVisualShape(block_id, -1, rgbaColor=colors[i] + [1]) objs.append((block_id, (np.pi / 2, None))) # Associate placement locations for goals. place_pos = [(0, -0.05, 0.03), (0, 0, 0.03), (0, 0.05, 0.03), (0, -0.025, 0.08), (0, 0.025, 0.08), (0, 0, 0.13)] targs = [(utils.apply(base_pose, i), base_pose[1]) for i in place_pos] # Goal: blocks are stacked in a pyramid (bottom row: green, blue, purple). self.goals.append((objs[:3], np.ones( (3, 3)), targs[:3], False, True, 'pose', None, 1 / 2)) # Goal: blocks are stacked in a pyramid (middle row: yellow, orange). self.goals.append((objs[3:5], np.ones( (2, 2)), targs[3:5], False, True, 'pose', None, 1 / 3)) # Goal: blocks are stacked in a pyramid (top row: red). self.goals.append((objs[5:], np.ones( (1, 1)), targs[5:], False, True, 'pose', None, 1 / 6))
def reset(self, env): super().reset(env) # Add kit. kit_size = (0.39, 0.3, 0.0005) kit_urdf = 'kitting/kit.urdf' kit_pose = self.get_pose(env, kit_size) env.add_object(kit_urdf, kit_pose, 'fixed') if self.mode == 'train': n_objects = 3 obj_shapes = np.random.choice(self.train_set, n_objects) else: if self.homogeneous: n_objects = 2 obj_shapes = [np.random.choice(self.test_set)] * n_objects else: n_objects = 2 obj_shapes = np.random.choice(self.test_set, n_objects) colors = [ utils.COLORS['purple'], utils.COLORS['blue'], utils.COLORS['green'], utils.COLORS['yellow'], utils.COLORS['red'] ] symmetry = [ 2 * np.pi, 2 * np.pi, 2 * np.pi / 3, np.pi / 2, np.pi / 2, 2 * np.pi, np.pi, 2 * np.pi / 5, np.pi, np.pi / 2, 2 * np.pi / 5, 0, 2 * np.pi, 2 * np.pi, 2 * np.pi, 2 * np.pi, 0, 2 * np.pi / 6, 2 * np.pi, 2 * np.pi ] # Build kit. targets = [] if self.mode == 'test': targ_pos = [[0, -0.05, -0.001], [0, 0.05, -0.001]] else: targ_pos = [[-0.125, 0.12, -0.001], [0.072, 0.07, -0.001], [0.125, -0.13, -0.001]] template = 'kitting/object-template.urdf' for i in range(n_objects): shape = os.path.join(self.assets_root, 'kitting', f'{obj_shapes[i]:02d}.obj') if obj_shapes[i] == 7: scale = [0.006, 0.006, 0.0007] else: scale = [0.006, 0.006, 0.002] # .0005 pos = utils.apply(kit_pose, targ_pos[i]) if self.mode == 'train': theta = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) else: rot = utils.eulerXYZ_to_quatXYZW((0, 0, 1.57)) replace = {'FNAME': (shape,), 'SCALE': scale, 'COLOR': (0.2, 0.2, 0.2)} urdf = self.fill_template(template, replace) env.add_object(urdf, (pos, rot), 'fixed') os.remove(urdf) targets.append((pos, rot)) # Add objects. objects = [] matches = [] for i in range(n_objects): shape = obj_shapes[i] size = (0.08, 0.08, 0.02) pose = self.get_random_pose(env, size) fname = f'{shape:02d}.obj' fname = os.path.join(self.assets_root, 'kitting', fname) scale = [0.006, 0.006, 0.002] replace = {'FNAME': (fname,), 'SCALE': scale, 'COLOR': colors[i]} urdf = self.fill_template(template, replace) block_id = env.add_object(urdf, pose) os.remove(urdf) objects.append((block_id, (symmetry[shape], None))) match = np.zeros(len(targets)) match[np.argwhere(obj_shapes == shape).reshape(-1)] = 1 matches.append(match) matches = np.int32(matches) self.goals.append((objects, matches, targets, False, True, 'pose', None, 1))
def reset(self, env): super().reset(env) n_parts = 20 radius = 0.005 length = 2 * radius * n_parts * np.sqrt(2) # Add 3-sided square. square_size = (length, length, 0) square_pose = self.get_random_pose(env, square_size) square_template = 'square/square-template.urdf' replace = {'DIM': (length,), 'HALF': (length / 2 - 0.005,)} urdf = self.fill_template(square_template, replace) env.add_object(urdf, square_pose, 'fixed') os.remove(urdf) # Get corner points of square. corner0 = (length / 2, length / 2, 0.001) corner1 = (-length / 2, length / 2, 0.001) corner0 = utils.apply(square_pose, corner0) corner1 = utils.apply(square_pose, corner1) # Add cable (series of articulated small blocks). increment = (np.float32(corner1) - np.float32(corner0)) / n_parts position, _ = self.get_random_pose(env, (0.1, 0.1, 0.1)) position = np.float32(position) part_shape = p.createCollisionShape(p.GEOM_BOX, halfExtents=[radius] * 3) part_visual = p.createVisualShape(p.GEOM_SPHERE, radius=radius * 1.5) parent_id = -1 targets = [] objects = [] for i in range(n_parts): position[2] += np.linalg.norm(increment) part_id = p.createMultiBody(0.1, part_shape, part_visual, basePosition=position) if parent_id > -1: constraint_id = p.createConstraint( parentBodyUniqueId=parent_id, parentLinkIndex=-1, childBodyUniqueId=part_id, childLinkIndex=-1, jointType=p.JOINT_POINT2POINT, jointAxis=(0, 0, 0), parentFramePosition=(0, 0, np.linalg.norm(increment)), childFramePosition=(0, 0, 0)) p.changeConstraint(constraint_id, maxForce=100) if (i > 0) and (i < n_parts - 1): color = utils.COLORS['red'] + [1] p.changeVisualShape(part_id, -1, rgbaColor=color) env.obj_ids['rigid'].append(part_id) parent_id = part_id target_xyz = np.float32(corner0) + i * increment + increment / 2 objects.append((part_id, (0, None))) targets.append((target_xyz, (0, 0, 0, 1))) matches = np.clip(np.eye(n_parts) + np.eye(n_parts)[::-1], 0, 1) self.goals.append((objects, matches, targets, False, False, 'pose', None, 1)) for i in range(480): p.stepSimulation()
def reset(self, env): super().reset(env) # Add kit. kit_size = (0.28, 0.2, 0.005) kit_urdf = 'kitting/kit.urdf' kit_pose = self.get_random_pose(env, kit_size) env.add_object(kit_urdf, kit_pose, 'fixed') n_objects = 5 if self.mode == 'train': obj_shapes = np.random.choice(self.train_set, n_objects) else: if self.homogeneous: obj_shapes = [np.random.choice(self.test_set)] * n_objects else: obj_shapes = np.random.choice(self.test_set, n_objects) colors = [ utils.COLORS['purple'], utils.COLORS['blue'], utils.COLORS['green'], utils.COLORS['yellow'], utils.COLORS['red'] ] symmetry = [ 2 * np.pi, 2 * np.pi, 2 * np.pi / 3, np.pi / 2, np.pi / 2, 2 * np.pi, np.pi, 2 * np.pi / 5, np.pi, np.pi / 2, 2 * np.pi / 5, 0, 2 * np.pi, 2 * np.pi, 2 * np.pi, 2 * np.pi, 0, 2 * np.pi / 6, 2 * np.pi, 2 * np.pi ] # Build kit. targets = [] targ_pos = [[-0.09, 0.045, 0.0014], [0, 0.045, 0.0014], [0.09, 0.045, 0.0014], [-0.045, -0.045, 0.0014], [0.045, -0.045, 0.0014]] template = 'kitting/object-template.urdf' for i in range(n_objects): shape = os.path.join(self.assets_root, 'kitting', f'{obj_shapes[i]:02d}.obj') scale = [0.003, 0.003, 0.0001] # .0005 pos = utils.apply(kit_pose, targ_pos[i]) theta = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) replace = { 'FNAME': (shape, ), 'SCALE': scale, 'COLOR': (0.2, 0.2, 0.2) } urdf = self.fill_template(template, replace) env.add_object(urdf, (pos, rot), 'fixed') os.remove(urdf) targets.append((pos, rot)) # Add objects. objects = [] matches = [] # objects, syms, matcheses = [], [], [] for i in range(n_objects): shape = obj_shapes[i] size = (0.08, 0.08, 0.02) pose = self.get_random_pose(env, size) fname = f'{shape:02d}.obj' fname = os.path.join(self.assets_root, 'kitting', fname) scale = [0.003, 0.003, 0.001] # .0005 replace = {'FNAME': (fname, ), 'SCALE': scale, 'COLOR': colors[i]} urdf = self.fill_template(template, replace) block_id = env.add_object(urdf, pose) os.remove(urdf) objects.append((block_id, (symmetry[shape], None))) # objects[block_id] = symmetry[shape] match = np.zeros(len(targets)) match[np.argwhere(obj_shapes == shape).reshape(-1)] = 1 matches.append(match) # print(targets) # exit() # matches.append(list(np.argwhere(obj_shapes == shape).reshape(-1))) matches = np.int32(matches) # print(matcheses) # exit() # Add goal. # self.goals.append((objects, syms, targets, 'matches', 'pose', 1.)) # Goal: objects are placed in their respective kit locations. # print(objects) # print(matches) # print(targets) # exit() self.goals.append( (objects, matches, targets, False, True, 'pose', None, 1))