def act(self, obs, info): """Run inference and return best action.""" act = {'camera_config': self.camera_config, 'primitive': None} # Get observations and run pick prediction gt_obs = self.info_to_gt_obs(info) pick_prediction = self.pick_model(gt_obs[None, Ellipsis]) if self.use_mdn: pi, mu, var = pick_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) pick_prediction = mdn_utils.sample_from_pdf(pi, mu, var) pick_prediction = pick_prediction[:, 0, :] pick_prediction = pick_prediction[0] # unbatch # Get observations and run place prediction obs_with_pick = np.hstack((gt_obs, pick_prediction)) # since the pick at train time is always 0.0, # the predictions are unstable if not exactly 0 obs_with_pick[-1] = 0.0 place_prediction = self.place_model(obs_with_pick[None, Ellipsis]) if self.use_mdn: pi, mu, var = place_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) place_prediction = mdn_utils.sample_from_pdf(pi, mu, var) place_prediction = place_prediction[:, 0, :] place_prediction = place_prediction[0] prediction = np.hstack((pick_prediction, place_prediction)) # just go exactly to objects, from observations # p0_position = np.hstack((gt_obs[3:5], 0.02)) # p0_rotation = utils.eulerXYZ_to_quatXYZW( # (0, 0, -gt_obs[5]*self.theta_scale)) # p1_position = np.hstack((gt_obs[0:2], 0.02)) # p1_rotation = utils.eulerXYZ_to_quatXYZW( # (0, 0, -gt_obs[2]*self.theta_scale)) # just go exactly to objects, predicted p0_position = np.hstack((prediction[0:2], 0.02)) p0_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[2] * self.theta_scale)) p1_position = np.hstack((prediction[3:5], 0.02)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[5] * self.theta_scale)) # Select task-specific motion primitive. act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params return act
def act(self, obs, info=None, goal=None): # pylint: disable=unused-argument """Run inference and return best action given visual observations.""" tf.keras.backend.set_learning_phase(0) # Get heightmap from RGB-D images. img = self.get_image(obs) # Attention model forward pass. pick_conf = self.attention.forward(img) argmax = np.argmax(pick_conf) argmax = np.unravel_index(argmax, shape=pick_conf.shape) p0_pix = argmax[:2] p0_theta = argmax[2] * (2 * np.pi / pick_conf.shape[2]) # Transport model forward pass. place_conf = self.transport.forward(img, p0_pix) argmax = np.argmax(place_conf) argmax = np.unravel_index(argmax, shape=place_conf.shape) p1_pix = argmax[:2] p1_theta = argmax[2] * (2 * np.pi / place_conf.shape[2]) # Pixels to end effector poses. hmap = img[:, :, 3] p0_xyz = utils.pix_to_xyz(p0_pix, hmap, self.bounds, self.pix_size) p1_xyz = utils.pix_to_xyz(p1_pix, hmap, self.bounds, self.pix_size) p0_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta)) return {'pose0': (p0_xyz, p0_xyzw), 'pose1': (p1_xyz, p1_xyzw)}
def act(self, obs, gt_act, info): """Run inference and return best action given visual observations.""" del gt_act del info self.regression_model.set_batch_size(1) act = {'camera_config': self.camera_config, 'primitive': None} if not obs: return act # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2)[None, Ellipsis] # or just use rgb # input_image = colormap[None, ...] # Regression prediction = self.regression_model.forward(input_image) if self.use_mdn: mdn_prediction = prediction pi, mu, var = mdn_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) prediction = mdn_utils.sample_from_pdf(pi, mu, var) prediction = prediction[:, 0, :] prediction = prediction[0] p0_position = np.hstack((prediction[0:2], 0.02)) p1_position = np.hstack((prediction[3:5], 0.02)) p0_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[2] * self.theta_scale)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[5] * self.theta_scale)) act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params self.regression_model.set_batch_size(self.batch_size) return act
def act(self, obs, info): """Run inference and return best action.""" del obs act = {'camera_config': self.camera_config, 'primitive': None} # Get observations and run predictions. gt_obs = self.info_to_gt_obs(info) # just for visualization gt_act_center = self.info_to_gt_obs(info) # pylint: disable=unused-variable prediction = self.model(gt_obs[None, Ellipsis]) if self.use_mdn: mdn_prediction = prediction pi, mu, var = mdn_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) prediction = mdn_utils.sample_from_pdf(pi, mu, var) prediction = prediction[:, 0, :] prediction = prediction[0] # unbatch # self.plot_act_mdn(gt_act_center[None, ...], mdn_prediction) # just go exactly to objects, from observations # p0_position = np.hstack((gt_obs[3:5], 0.02)) # p0_rotation = utils.eulerXYZ_to_quatXYZW( # (0, 0, -gt_obs[5] * self.theta_scale)) # p1_position = np.hstack((gt_obs[0:2], 0.02)) # p1_rotation = utils.eulerXYZ_to_quatXYZW( # (0, 0, -gt_obs[2] * self.theta_scale)) # just go exactly to objects, predicted p0_position = np.hstack((prediction[0:2], 0.02)) p0_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[2] * self.theta_scale)) p1_position = np.hstack((prediction[3:5], 0.02)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[5] * self.theta_scale)) # Select task-specific motion primitive. act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params return act
def reset(self, env): super().reset(env) # Add goal zone. zone_size = (0.12, 0.12, 0) zone_pose = self.get_random_pose(env, zone_size) env.add_object('assets/zone/zone.urdf', zone_pose, 'fixed') # Add pile of small blocks. obj_pts = {} obj_ids = [] for _ in range(50): rx = self.bounds[0, 0] + 0.15 + np.random.rand() * 0.2 ry = self.bounds[1, 0] + 0.4 + np.random.rand() * 0.2 xyz = (rx, ry, 0.01) theta = np.random.rand() * 2 * np.pi xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) obj_id = env.add_object('assets/block/small.urdf', (xyz, xyzw)) obj_pts[obj_id] = self.get_object_points(obj_id) obj_ids.append((obj_id, (0, None))) # Goal: all small blocks must be in zone. # goal = Goal(list(obj_pts.keys()), [0] * len(obj_pts), [zone_pose]) # metric = Metric('zone', (obj_pts, [(zone_pose, zone_size)]), 1.) # self.goals.append((goal, metric)) self.goals.append((obj_ids, np.ones((50, 1)), [zone_pose], True, False, 'zone', (obj_pts, [(zone_pose, zone_size)]), 1))
def act(self, obs, info): """Run inference and return best action given visual observations.""" del info act = {'camera_config': self.camera_config, 'primitive': None} if not obs: return act # [Optional] Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # pylint: disable=unused-variable # Do something here. # Dummy behavior: move to the middle of the workspace. p0_position = (self.bounds[:, 1] - self.bounds[:, 0]) / 2 p0_position += self.bounds[:, 0] p1_position = p0_position rotation = utils.eulerXYZ_to_quatXYZW((0, 0, 0)) # Select task-specific motion primitive. act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (p0_position, rotation), 'pose1': (p1_position, rotation) } act['params'] = params return act
def __call__(self, movej, movep, ee, pose0, pose1): """Execute pick and place primitive. Args: movej: function to move robot joints. movep: function to move robot end effector pose. ee: robot end effector. pose0: SE(3) picking pose. pose1: SE(3) placing pose. Returns: timeout: robot movement timed out if True. """ pick_pose, place_pose = pose0, pose1 # Execute picking primitive. prepick_to_pick = ((0, 0, 0.32), (0, 0, 0, 1)) postpick_to_pick = ((0, 0, self.height), (0, 0, 0, 1)) prepick_pose = utils.multiply(pick_pose, prepick_to_pick) postpick_pose = utils.multiply(pick_pose, postpick_to_pick) timeout = movep(prepick_pose) # Move towards pick pose until contact is detected. delta = (np.float32([0, 0, -0.001]), utils.eulerXYZ_to_quatXYZW((0, 0, 0))) targ_pose = prepick_pose while not ee.detect_contact(): # and target_pose[2] > 0: targ_pose = utils.multiply(targ_pose, delta) timeout |= movep(targ_pose) if timeout: return True # Activate end effector, move up, and check picking success. ee.activate() timeout |= movep(postpick_pose, self.speed) pick_success = ee.check_grasp() # Execute placing primitive if pick is successful. if pick_success: preplace_to_place = ((0, 0, self.height), (0, 0, 0, 1)) postplace_to_place = ((0, 0, 0.32), (0, 0, 0, 1)) preplace_pose = utils.multiply(place_pose, preplace_to_place) postplace_pose = utils.multiply(place_pose, postplace_to_place) targ_pose = preplace_pose while not ee.detect_contact(): targ_pose = utils.multiply(targ_pose, delta) timeout |= movep(targ_pose, self.speed) if timeout: return True ee.release() timeout |= movep(postplace_pose) # Move to prepick pose if pick is not successful. else: ee.release() timeout |= movep(prepick_pose) return timeout
def get_random_pose_6dof(self, env, obj_size): pos, rot = super(BlockInsertionSixDof, self).get_random_pose(env, obj_size) z = (np.random.rand() / 10) + 0.03 pos = (pos[0], pos[1], obj_size[2] / 2 + z) roll = (np.random.rand() - 0.5) * np.pi / 2 pitch = (np.random.rand() - 0.5) * np.pi / 2 yaw = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((roll, pitch, yaw)) return pos, rot
def reset(self, env): super().reset(env) # Generate randomly shaped box. box_size = self.get_random_size(0.05, 0.15, 0.05, 0.15, 0.01, 0.06) # Add corner. dimx = (box_size[0] / 2 - 0.025 + 0.0025, box_size[0] / 2 + 0.0025) dimy = (box_size[1] / 2 + 0.0025, box_size[1] / 2 - 0.025 + 0.0025) corner_template = 'assets/corner/corner-template.urdf' replace = {'DIMX': dimx, 'DIMY': dimy} corner_urdf = self.fill_template(corner_template, replace) corner_size = (box_size[0], box_size[1], 0) corner_pose = self.get_random_pose(env, corner_size) env.add_object(corner_urdf, corner_pose, 'fixed') os.remove(corner_urdf) # Add possible placing poses. theta = utils.quatXYZW_to_eulerXYZ(corner_pose[1])[2] fip_rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta + np.pi)) pose1 = (corner_pose[0], fip_rot) alt_x = (box_size[0] / 2) - (box_size[1] / 2) alt_y = (box_size[1] / 2) - (box_size[0] / 2) alt_pos = (alt_x, alt_y, 0) alt_rot0 = utils.eulerXYZ_to_quatXYZW((0, 0, np.pi / 2)) alt_rot1 = utils.eulerXYZ_to_quatXYZW((0, 0, 3 * np.pi / 2)) pose2 = utils.multiply(corner_pose, (alt_pos, alt_rot0)) pose3 = utils.multiply(corner_pose, (alt_pos, alt_rot1)) # Add box. box_template = 'assets/box/box-template.urdf' box_urdf = self.fill_template(box_template, {'DIM': box_size}) box_pose = self.get_random_pose(env, box_size) box_id = env.add_object(box_urdf, box_pose) os.remove(box_urdf) self.color_random_brown(box_id) # Goal: box is aligned with corner (1 of 4 possible poses). self.goals.append(([(box_id, (2 * np.pi, None))], np.int32([[1, 1, 1, 1]]), [corner_pose, pose1, pose2, pose3], False, True, 'pose', None, 1))
def act(self, obs, gt_act, info): """Run inference and return best action.""" del gt_act assert False, 'this needs to have the ordering switched for act inference -- is now xytheta, rpz' # pylint: disable=line-too-long act = {'camera_config': self.camera_config, 'primitive': None} # Get observations and run predictions. gt_obs = self.info_to_gt_obs(info) prediction = self.model(gt_obs[None, Ellipsis]) if self.use_mdn: mdn_prediction = prediction pi, mu, var = mdn_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) prediction = mdn_utils.sample_from_pdf(pi, mu, var) prediction = prediction[:, 0, :] prediction = prediction[0] # unbatch p0_position = np.hstack((prediction[0:2], 0.02)) p0_rotation = utils.eulerXYZ_to_quatXYZW( (0, 0, -prediction[2] * self.theta_scale)) p1_position = prediction[3:6] p1_rotation = utils.eulerXYZ_to_quatXYZW( (prediction[6] * self.theta_scale, prediction[7] * self.theta_scale, -prediction[8] * self.theta_scale)) # Select task-specific motion primitive. act['primitive'] = 'pick_place_6dof' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params return act
def push(movej, movep, ee, pose0, pose1): # pylint: disable=unused-argument """Execute pushing primitive. Args: movej: function to move robot joints. movep: function to move robot end effector pose. ee: robot end effector. pose0: SE(3) starting pose. pose1: SE(3) ending pose. Returns: timeout: robot movement timed out if True. """ # Adjust push start and end positions. pos0 = np.float32((pose0[0][0], pose0[0][1], 0.005)) pos1 = np.float32((pose1[0][0], pose1[0][1], 0.005)) vec = np.float32(pos1) - np.float32(pos0) length = np.linalg.norm(vec) vec = vec / length pos0 -= vec * 0.02 pos1 -= vec * 0.05 # Align spatula against push direction. theta = np.arctan2(vec[1], vec[0]) rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) over0 = (pos0[0], pos0[1], 0.31) over1 = (pos1[0], pos1[1], 0.31) # Execute push. timeout = movep((over0, rot)) timeout |= movep((pos0, rot)) n_push = np.int32(np.floor(np.linalg.norm(pos1 - pos0) / 0.01)) for _ in range(n_push): target = pos0 + vec * n_push * 0.01 timeout |= movep((target, rot), speed=0.003) timeout |= movep((pos1, rot), speed=0.003) timeout |= movep((over1, rot)) return timeout
def spawn_box(self): """Palletizing: spawn another box in the workspace if it is empty.""" workspace_empty = True if self.goals: for obj, _ in self.goals[0][0]: obj_pose = p.getBasePositionAndOrientation(obj) workspace_empty = workspace_empty and ((obj_pose[0][1] < -0.5) or (obj_pose[0][1] > 0)) if not self.steps: self.goals = [] print('Palletized boxes toppled. Terminating episode.') return if workspace_empty: obj = self.steps[0] theta = np.random.random() * 2 * np.pi rotation = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) p.resetBasePositionAndOrientation(obj, [0.5, -0.25, 0.1], rotation) self.steps.pop(0) # Wait until spawned box settles. for _ in range(480): p.stepSimulation()
def get_random_pose(self, env, obj_size): """Get random collision-free object pose within workspace bounds.""" # Get erosion size of object in pixels. max_size = np.sqrt(obj_size[0]**2 + obj_size[1]**2) erode_size = int(np.round(max_size / self.pix_size)) _, hmap, obj_mask = self.get_true_image(env) # Randomly sample an object pose within free-space pixels. free = np.ones(obj_mask.shape, dtype=np.uint8) for obj_ids in env.obj_ids.values(): for obj_id in obj_ids: free[obj_mask == obj_id] = 0 free[0, :], free[:, 0], free[-1, :], free[:, -1] = 0, 0, 0, 0 free = cv2.erode(free, np.ones((erode_size, erode_size), np.uint8)) if np.sum(free) == 0: return pix = utils.sample_distribution(np.float32(free)) pos = utils.pix_to_xyz(pix, hmap, self.bounds, self.pix_size) pos = (pos[0], pos[1], obj_size[2] / 2) theta = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) return pos, rot
def get_random_pose(self, env, obj_size): pose = super(BlockInsertionTranslation, self).get_random_pose(env, obj_size) pos, rot = pose rot = utils.eulerXYZ_to_quatXYZW((0, 0, np.pi / 2)) return pos, rot
def act(self, obs, info): """Run inference and return best action.""" act = {'camera_config': self.camera_config, 'primitive': None} # Get observations and run pick prediction gt_obs = self.info_to_gt_obs(info) pick_prediction = self.pick_model(gt_obs[None, Ellipsis]) if self.use_mdn: pi, mu, var = pick_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) pick_prediction = mdn_utils.sample_from_pdf(pi, mu, var) pick_prediction = pick_prediction[:, 0, :] pick_prediction = pick_prediction[0] # unbatch # Get observations and run place prediction obs_with_pick = np.hstack((gt_obs, pick_prediction)).astype(np.float32) # since the pick at train time is always 0.0, # the predictions are unstable if not exactly 0 obs_with_pick[-1] = 0.0 place_se2_prediction = self.place_se2_model(obs_with_pick[None, Ellipsis]) if self.use_mdn: pi, mu, var = place_se2_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) place_se2_prediction = mdn_utils.sample_from_pdf(pi, mu, var) place_se2_prediction = place_se2_prediction[:, 0, :] place_se2_prediction = place_se2_prediction[0] # Get observations and run rpz prediction obs_with_pick_place_se2 = np.hstack( (obs_with_pick, place_se2_prediction)).astype(np.float32) place_rpz_prediction = self.place_rpz_model(obs_with_pick_place_se2[None, Ellipsis]) if self.use_mdn: pi, mu, var = place_rpz_prediction # prediction = mdn_utils.pick_max_mean(pi, mu, var) place_rpz_prediction = mdn_utils.sample_from_pdf(pi, mu, var) place_rpz_prediction = place_rpz_prediction[:, 0, :] place_rpz_prediction = place_rpz_prediction[0] p0_position = np.hstack((pick_prediction[0:2], 0.02)) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, 0)) p1_position = np.hstack( (place_se2_prediction[0:2], place_rpz_prediction[2])) p1_rotation = utils.eulerXYZ_to_quatXYZW( (place_rpz_prediction[0] * self.theta_scale, place_rpz_prediction[1] * self.theta_scale, -place_se2_prediction[2] * self.theta_scale)) # Select task-specific motion primitive. act['primitive'] = 'pick_place_6dof' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params return act
def act(self, obs, info): """Run inference and return best action given visual observations.""" del info act = {'camera_config': self.camera_config, 'primitive': None} if not obs: return act # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Get top-k pixels from pick and place heatmaps. k = 100 pick_heatmap = self.pick_model.forward( input_image, apply_softmax=True).squeeze() place_heatmap = self.place_model.forward( input_image, apply_softmax=True).squeeze() descriptors = np.float32(self.match_model.forward(input_image)) # V4 pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0) place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0) pick_topk = np.int32( np.unravel_index( np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T pick_pixel = pick_topk[-1, :] from skimage.feature import peak_local_max # pylint: disable=g-import-not-at-top place_peaks = peak_local_max(place_heatmap, num_peaks=1) distances = np.ones((place_peaks.shape[0], self.num_rotations)) * 10 pick_descriptor = descriptors[0, pick_pixel[0], pick_pixel[1], :].reshape(1, -1) for i in range(place_peaks.shape[0]): peak = place_peaks[i, :] place_descriptors = descriptors[:, peak[0], peak[1], :] distances[i, :] = np.linalg.norm( place_descriptors - pick_descriptor, axis=1) ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) p0_pixel = pick_pixel p0_theta = 0 p1_pixel = place_peaks[ibest[0], :] p1_theta = ibest[1] * (2 * np.pi / self.num_rotations) # # V3 # pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0) # place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0) # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # pick_pixel = pick_topk[-1, :] # place_pixel = place_topk[-1, :] # pick_descriptor = descriptors[0, pick_pixel[0], # pick_pixel[1], :].reshape(1, -1) # place_descriptor = descriptors[:, place_pixel[0], place_pixel[1], :] # distances = np.linalg.norm(place_descriptor - pick_descriptor, axis=1) # irotation = np.argmin(distances) # p0_pixel = pick_pixel # p0_theta = 0 # p1_pixel = place_pixel # p1_theta = irotation * (2 * np.pi / self.num_rotations) # # V2 # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # pick_pixel = pick_topk[-1, :] # pick_descriptor = descriptors[0, pick_pixel[0], # pick_pixel[1], :].reshape(1, 1, 1, -1) # distances = np.linalg.norm(descriptors - pick_descriptor, axis=3) # distances = np.transpose(distances, [1, 2, 0]) # max_distance = int(np.round(np.max(distances))) # for i in range(self.num_rotations): # distances[:, :, i] = cv2.circle(distances[:, :, i], # (pick_pixel[1], pick_pixel[0]), 50, # max_distance, -1) # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) # p0_pixel = pick_pixel # p0_theta = 0 # p1_pixel = ibest[:2] # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations) # # V1 # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # distances = np.zeros((k, k, self.num_rotations)) # for ipick in range(k): # pick_descriptor = descriptors[0, pick_topk[ipick, 0], # pick_topk[ipick, 1], :].reshape(1, -1) # for iplace in range(k): # place_descriptors = descriptors[:, place_topk[iplace, 0], # place_topk[iplace, 1], :] # distances[ipick, iplace, :] = np.linalg.norm( # place_descriptors - pick_descriptor, axis=1) # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) # p0_pixel = pick_topk[ibest[0], :] # p0_theta = 0 # p1_pixel = place_topk[ibest[1], :] # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations) # Pixels to end effector poses. p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds, self.pixel_size) p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds, self.pixel_size) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta)) act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation) } act['params'] = params return act
def reset(self, env): super().reset(env) # Add pallet. zone_size = (0.3, 0.25, 0.25) zone_urdf = 'assets/pallet/pallet.urdf' rotation = utils.eulerXYZ_to_quatXYZW((0, 0, 0)) zone_pose = ((0.5, 0.25, 0.02), rotation) env.add_object(zone_urdf, zone_pose, 'fixed') # Add stack of boxes on pallet. margin = 0.01 object_ids = [] object_points = {} stack_size = (0.19, 0.19, 0.19) box_template = 'assets/box/box-template.urdf' stack_dim = np.int32([2, 3, 3]) # stack_dim = np.random.randint(low=2, high=4, size=3) box_size = (stack_size - (stack_dim - 1) * margin) / stack_dim for z in range(stack_dim[2]): # Transpose every layer. stack_dim[0], stack_dim[1] = stack_dim[1], stack_dim[0] box_size[0], box_size[1] = box_size[1], box_size[0] for y in range(stack_dim[1]): for x in range(stack_dim[0]): position = (x + 0.5, y + 0.5, z + 0.5) * box_size position[0] += x * margin - stack_size[0] / 2 position[1] += y * margin - stack_size[1] / 2 position[2] += z * margin + 0.03 pose = (position, (0, 0, 0, 1)) pose = utils.multiply(zone_pose, pose) urdf = self.fill_template(box_template, {'DIM': box_size}) box_id = env.add_object(urdf, pose) os.remove(urdf) object_ids.append((box_id, (0, None))) self.color_random_brown(box_id) object_points[box_id] = self.get_object_points(box_id) # Randomly select top box on pallet and save ground truth pose. targets = [] self.steps = [] boxes = [i[0] for i in object_ids] while boxes: _, height, object_mask = self.get_true_image(env) top = np.argwhere(height > (np.max(height) - 0.03)) rpixel = top[int(np.floor(np.random.random() * len(top)))] # y, x box_id = int(object_mask[rpixel[0], rpixel[1]]) if box_id in boxes: position, rotation = p.getBasePositionAndOrientation(box_id) rposition = np.float32(position) + np.float32([0, -10, 0]) p.resetBasePositionAndOrientation(box_id, rposition, rotation) self.steps.append(box_id) targets.append((position, rotation)) boxes.remove(box_id) self.steps.reverse() # Time-reversed depalletizing. self.goals.append( (object_ids, np.eye(len(object_ids)), targets, False, True, 'zone', (object_points, [(zone_pose, zone_size)]), 1)) self.spawn_box()
def act(self, obs, info, compute_error=False, gt_act=None): """Run inference and return best action given visual observations.""" # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Attention model forward pass. attention = self.attention_model.forward(input_image) argmax = np.argmax(attention) argmax = np.unravel_index(argmax, shape=attention.shape) p0_pixel = argmax[:2] p0_theta = argmax[2] * (2 * np.pi / attention.shape[2]) # Transport model forward pass. transport = self.transport_model.forward(input_image, p0_pixel) _, z, roll, pitch = self.rpz_model.forward(input_image, p0_pixel) argmax = np.argmax(transport) argmax = np.unravel_index(argmax, shape=transport.shape) # Index into 3D discrete tensor, grab z, roll, pitch activations z_best = z[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] roll_best = roll[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] pitch_best = pitch[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] # Send through regressors for each of z, roll, pitch z_best = self.rpz_model.z_regressor(z_best)[0, 0] roll_best = self.rpz_model.roll_regressor(roll_best)[0, 0] pitch_best = self.rpz_model.pitch_regressor(pitch_best)[0, 0] p1_pixel = argmax[:2] p1_theta = argmax[2] * (2 * np.pi / transport.shape[2]) # Pixels to end effector poses. p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds, self.pixel_size) p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds, self.pixel_size) p1_position = (p1_position[0], p1_position[1], z_best) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (roll_best, pitch_best, -p1_theta)) if compute_error: gt_p0_position, gt_p0_rotation = gt_act['params']['pose0'] gt_p1_position, gt_p1_rotation = gt_act['params']['pose1'] gt_p0_pixel = np.array( utils.xyz_to_pix(gt_p0_position, self.bounds, self.pixel_size)) gt_p1_pixel = np.array( utils.xyz_to_pix(gt_p1_position, self.bounds, self.pixel_size)) self.p0_pixel_error(np.linalg.norm(gt_p0_pixel - np.array(p0_pixel))) self.p1_pixel_error(np.linalg.norm(gt_p1_pixel - np.array(p1_pixel))) gt_p0_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p0_rotation)[2]) gt_p1_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p1_rotation)[2]) self.p0_theta_error( abs((np.rad2deg(gt_p0_theta - p0_theta) + 180) % 360 - 180)) self.p1_theta_error( abs((np.rad2deg(gt_p1_theta - p1_theta) + 180) % 360 - 180)) return None return {'pose0': (p0_position, p0_rotation), 'pose1': (p1_position, p1_rotation)}
def reset(self, env): super().reset(env) # Add kit. kit_size = (0.28, 0.2, 0.005) kit_urdf = 'assets/kitting/kit.urdf' kit_pose = self.get_random_pose(env, kit_size) env.add_object(kit_urdf, kit_pose, 'fixed') n_objects = 5 if self.mode == 'train': obj_shapes = np.random.choice(self.train_set, n_objects) else: if self.homogeneous: obj_shapes = [np.random.choice(self.test_set)] * n_objects else: obj_shapes = np.random.choice(self.test_set, n_objects) colors = [ utils.COLORS['purple'], utils.COLORS['blue'], utils.COLORS['green'], utils.COLORS['yellow'], utils.COLORS['red'] ] symmetry = [ 2 * np.pi, 2 * np.pi, 2 * np.pi / 3, np.pi / 2, np.pi / 2, 2 * np.pi, np.pi, 2 * np.pi / 5, np.pi, np.pi / 2, 2 * np.pi / 5, 0, 2 * np.pi, 2 * np.pi, 2 * np.pi, 2 * np.pi, 0, 2 * np.pi / 6, 2 * np.pi, 2 * np.pi ] # Build kit. targets = [] targ_pos = [[-0.09, 0.045, 0.0014], [0, 0.045, 0.0014], [0.09, 0.045, 0.0014], [-0.045, -0.045, 0.0014], [0.045, -0.045, 0.0014]] template = 'assets/kitting/object-template.urdf' for i in range(n_objects): shape = f'{obj_shapes[i]:02d}.obj' scale = [0.003, 0.003, 0.0001] # .0005 pos = utils.apply(kit_pose, targ_pos[i]) theta = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) replace = {'FNAME': (shape,), 'SCALE': scale, 'COLOR': (0.2, 0.2, 0.2)} urdf = self.fill_template(template, replace) env.add_object(urdf, (pos, rot), 'fixed') os.remove(urdf) targets.append((pos, rot)) # Add objects. objects = [] matches = [] # objects, syms, matcheses = [], [], [] for i in range(n_objects): shape = obj_shapes[i] size = (0.08, 0.08, 0.02) pose = self.get_random_pose(env, size) fname = f'{shape:02d}.obj' scale = [0.003, 0.003, 0.001] # .0005 replace = {'FNAME': (fname,), 'SCALE': scale, 'COLOR': colors[i]} urdf = self.fill_template(template, replace) block_id = env.add_object(urdf, pose) os.remove(urdf) objects.append((block_id, (symmetry[shape], None))) # objects[block_id] = symmetry[shape] match = np.zeros(len(targets)) match[np.argwhere(obj_shapes == shape).reshape(-1)] = 1 matches.append(match) # print(targets) # exit() # matches.append(list(np.argwhere(obj_shapes == shape).reshape(-1))) matches = np.int32(matches) # print(matcheses) # exit() # Add goal. # self.goals.append((objects, syms, targets, 'matches', 'pose', 1.)) # Goal: objects are placed in their respective kit locations. # print(objects) # print(matches) # print(targets) # exit() self.goals.append((objects, matches, targets, False, True, 'pose', None, 1))
def act(obs, info): # pylint: disable=unused-argument """Calculate action.""" # Oracle uses perfect RGB-D orthographic images and segmentation masks. _, hmap, obj_mask = self.get_true_image(env) # Unpack next goal step. objs, matches, targs, replace, rotations, _, _, _ = self.goals[0] # Match objects to targets without replacement. if not replace: # Modify a copy of the match matrix. matches = matches.copy() # Ignore already matched objects. for i in range(len(objs)): object_id, (symmetry, _) = objs[i] pose = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) for j in targets_i: if self.is_match(pose, targs[j], symmetry): matches[i, :] = 0 matches[:, j] = 0 # Get objects to be picked (prioritize farthest from nearest neighbor). nn_dists = [] nn_targets = [] for i in range(len(objs)): object_id, (symmetry, _) = objs[i] xyz, _ = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) if len(targets_i) > 0: # pylint: disable=g-explicit-length-test targets_xyz = np.float32([targs[j][0] for j in targets_i]) dists = np.linalg.norm(targets_xyz - np.float32(xyz).reshape(1, 3), axis=1) nn = np.argmin(dists) nn_dists.append(dists[nn]) nn_targets.append(targets_i[nn]) # Handle ignored objects. else: nn_dists.append(0) nn_targets.append(-1) order = np.argsort(nn_dists)[::-1] # Filter out matched objects. order = [i for i in order if nn_dists[i] > 0] pick_mask = None for pick_i in order: pick_mask = np.uint8(obj_mask == objs[pick_i][0]) # Erode to avoid picking on edges. # pick_mask = cv2.erode(pick_mask, np.ones((3, 3), np.uint8)) if np.sum(pick_mask) > 0: break # Trigger task reset if no object is visible. if pick_mask is None or np.sum(pick_mask) == 0: self.goals = [] print( 'Object for pick is not visible. Skipping demonstration.') return # Get picking pose. pick_prob = np.float32(pick_mask) pick_pix = utils.sample_distribution(pick_prob) # For "deterministic" demonstrations on insertion-easy, use this: # pick_pix = (160,80) pick_pos = utils.pix_to_xyz(pick_pix, hmap, self.bounds, self.pix_size) pick_pose = (pick_pos, (0, 0, 0, 1)) # Get placing pose. targ_pose = targs[nn_targets[pick_i]] # pylint: disable=undefined-loop-variable obj_pose = p.getBasePositionAndOrientation(objs[pick_i][0]) # pylint: disable=undefined-loop-variable if not self.sixdof: obj_euler = utils.quatXYZW_to_eulerXYZ(obj_pose[1]) obj_quat = utils.eulerXYZ_to_quatXYZW((0, 0, obj_euler[2])) obj_pose = (obj_pose[0], obj_quat) world_to_pick = utils.invert(pick_pose) obj_to_pick = utils.multiply(world_to_pick, obj_pose) pick_to_obj = utils.invert(obj_to_pick) place_pose = utils.multiply(targ_pose, pick_to_obj) # Rotate end effector? if not rotations: place_pose = (place_pose[0], (0, 0, 0, 1)) return {'pose0': pick_pose, 'pose1': place_pose}