def Csv_6D_pose(rgb_img, depth_img): iteration = 4 bs = 1 # knn = KNearestNeighbor(1) points, choose, img = testdataset.getitem_by_array(rgb_img, depth_img) if choose.ndim < 3: return [] # print("choose.ndim =", choose.ndim) obj_id = torch.LongTensor([0]).unsqueeze(0) points, choose, img, obj_id = Variable(points).cuda(), Variable(choose).cuda(), Variable(img).cuda(), Variable(obj_id).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, obj_id) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, obj_id) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final print("final prediction: quaternion + translation") my_pred = np.asarray(my_pred, dtype='float') print(list(my_pred)) return list(my_pred)
def update_PROJ(self, ned, yaw_rad, pitch_rad, roll_rad): cam_yaw, cam_pitch, cam_roll = self.get_ypr() body2cam = transformations.quaternion_from_euler( cam_yaw * d2r, cam_pitch * d2r, cam_roll * d2r, 'rzyx') # this function modifies the parameters you pass in so, avoid # getting our data changed out from under us, by forcing copies # (a = b, wasn't sufficient, but a = float(b) forced a copy. tmp_yaw = float(yaw_rad) tmp_pitch = float(pitch_rad) tmp_roll = float(roll_rad) ned2body = transformations.quaternion_from_euler( tmp_yaw, tmp_pitch, tmp_roll, 'rzyx') #body2ned = transformations.quaternion_inverse(ned2body) #print 'ned2body(q):', ned2body ned2cam_q = transformations.quaternion_multiply(ned2body, body2cam) ned2cam = np.matrix( transformations.quaternion_matrix(np.array(ned2cam_q))[:3, :3]).T #print 'ned2cam:', ned2cam R = ned2proj.dot(ned2cam) rvec, jac = cv2.Rodrigues(R) tvec = -np.matrix(R) * np.matrix(ned).T R, jac = cv2.Rodrigues(rvec) # is this R the same as the earlier R? self.PROJ = np.concatenate((R, tvec), axis=1) #print 'PROJ:', PROJ #print lat_deg, lon_deg, altitude, ref[0], ref[1], ref[2] #print ned return self.PROJ
def refine(self, obj_id, pose, emb, cloud, iterations=1): class_id = self.obj_list.index(obj_id) index = torch.LongTensor([class_id]) index = Variable(index).cuda() pose_ref = pose.copy() for _ in range(iterations): # transform cloud according to pose estimate R = Variable(torch.from_numpy(pose_ref[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) ts = Variable(torch.from_numpy(pose_ref[:3, 3].astype(np.float32))).cuda().view(1, 3)\ .repeat(self.num_points, 1).contiguous().view(1, self.num_points, 3) new_cloud = torch.bmm((cloud - ts), R).contiguous() # predict delta pose pred_q, pred_t = self.refiner(new_cloud, emb, index) pred_q = pred_q.view(1, 1, -1) pred_q = pred_q / (torch.norm(pred_q, dim=2).view(1, 1, 1)) pred_q = pred_q.view(-1).cpu().data.numpy() pred_t = pred_t.view(-1).cpu().data.numpy() # apply delta to get new pose estimate pose_delta = quaternion_matrix(pred_q) pose_delta[:3, 3] = pred_t pose_ref = pose_ref @ pose_delta return pose_ref
def run_epoch(model, loss_fn, loader, dtype, rot_repr, loss_type="regression", train=True, model_points=None, optimizer=None): """ Train the model for one epoch. """ total_distance, total_loss, num_samples = 0.0, 0.0, 0.0 if train: model.train() else: model.eval() avg_dists = [] for i, data in enumerate(loader, 0): img, depth, boxes2d, boxes2d_proj, boxes3d, label, pose_r, pose_t, pose, cam, idx = data x_var = Variable(img.type(dtype)) scores = model(x_var) gts = (pose_r.type(dtype), pose_t.type(dtype), pose.type(dtype), boxes2d.type(dtype), boxes2d_proj.type(dtype), boxes3d.type(dtype)) loss = calc_loss(scores, loss_fn, rot_repr, dtype, gts, loss_type=loss_type) total_loss += loss.item() if train: optimizer.zero_grad() loss.backward() optimizer.step() else: preds_rot = rotReprToRotMat(scores, rot_repr) for i in range(preds_rot.shape[0]): rot1 = preds_rot[i].detach().cpu().numpy() rot1_4 = np.eye(4) rot1_4[:3, :3] = rot1 rot2 = quaternion_matrix(pose_r[i]) dist = compare_rotation(model_points, rot1_4[:3, :4], rot2[:3, :4]) avg_dists.append(dist) num_samples += 1 avg_loss = total_loss / len(loader) # Divide by unber of epoches for if train: return avg_loss else: avg_dist = np.sum(avg_dists) / len(avg_dists) return avg_dist, avg_loss
def get_pose(root_dset, obj_category, item, art_index, frame_order, mode='train', num_parts=5): # pose infos parts_world_pos = [None] * num_parts parts_world_orn = [None] * num_parts parts_model2world = [None] * num_parts if mode == 'demo': sub_dir0 = root_dset + '/demo/' + obj_category + '/' + item + '/' + art_index else: sub_dir0 = root_dset + '/render/' + obj_category + '/' + item + '/' + art_index meta_file = open(sub_dir0 + '/gt.yml', 'r') meta_instance = yaml.load(meta_file) pose_dict = meta_instance['frame_{}'.format(frame_order)] viewMat = np.array(pose_dict['viewMat']).reshape(4, 4).T projMat = np.array(pose_dict['projMat']).reshape(4, 4).T # parts_world_pos[0] = np.array([0, 0, 0]) parts_world_orn[0] = np.array([0, 0, 0, 1]) for link in range(0, num_parts): # parts_urdf_box[link] = np.array(urdf_dict['link']['box'][link]) if link > 0: parts_world_pos[link] = np.array( pose_dict['obj'][link - 1][4]).astype(np.float32) parts_world_orn[link] = np.array( pose_dict['obj'][link - 1][5]).astype(np.float32) # manual correction if obj_category == 'bike': parts_world_pos[1], parts_world_pos[2] = parts_world_pos[ 2], parts_world_pos[1] parts_world_orn[1], parts_world_orn[2] = parts_world_orn[ 2], parts_world_orn[1] # target rotation, translation, and target points for k in range(num_parts): # matrix computation center_world_orn = parts_world_orn[k] center_world_orn = np.array([ center_world_orn[3], center_world_orn[0], center_world_orn[1], center_world_orn[2] ]) my_model2world_r = quaternion_matrix( center_world_orn)[:4, :4] # [w, x, y, z] my_model2world_t = parts_world_pos[k] my_model2world_mat = my_model2world_r for m in range(3): my_model2world_mat[m, 3] = my_model2world_t[m] my_world2camera_mat = viewMat my_camera2clip_mat = projMat my_model2camera_mat = np.dot(my_world2camera_mat, my_model2world_mat) parts_model2world[k] = my_model2world_mat return parts_model2world, viewMat, projMat
def refinePose(self, emb, cloud, object_label, init_t, init_r, iterations=2): init_t = init_t.cpu().data.numpy() init_r = init_r.cpu().data.numpy() for ite in range(0, iteration): T = Variable(torch.from_numpy( init_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, self.num_points, 3) init_mat = quaternion_matrix(init_r) R = Variable(torch.from_numpy(init_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) init_mat[0:3, 3] = init_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = self.refiner(new_cloud, emb, object_label) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) delta_r = pred_r.view(-1).cpu().data.numpy() delta_t = pred_t.view(-1).cpu().data.numpy() delta_mat = quaternion_matrix(delta_r) delta_mat[0:3, 3] = delta_t refined_mat = np.dot(init_mat, delta_mat) refined_r = copy.deepcopy(refined_mat) refined_r[0:3, 3] = 0 refined_r = quaternion_from_matrix(refined_r, True) refined_t = np.array( [refined_mat[0][3], refined_mat[1][3], refined_mat[2][3]]) init_r = r_final init_t = t_final return refined_t, refined_t
def refine_posenet(self, refine_args): iteration = refine_args.iteration my_t, my_r = refine_args.t, refine_args.r num_points = refine_args.num_points cloud = refine_args.cloud refiner = refine_args.refiner_network emb = refine_args.emb index = refine_args.index for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() time_refiner = time.time() pred_r, pred_t = refiner(new_cloud, emb, index) print("--- RE %s seconds ---" % (time.time() - time_refiner)) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final
def quatToRotRepr(quat, rot_repr, input=None): # First generate rot = Rotation.from_quat(quat.reshape(-1)) if rot_repr == "quat": return quat.reshape(-1).numpy() elif rot_repr == "mat": return quaternion_matrix(quat)[:3, :3].reshape(-1) elif rot_repr == "bbox": return input.reshape(-1).numpy() elif rot_repr == "rodr": rotvec = rot.as_rotvec().reshape(-1) return rotvec elif rot_repr == "euler": angles = rot.as_euler('xzy', degrees=False) angles = normalizeEulerAngle(angles) angles = angles.copy() return angles.reshape(-1) else: raise ValueError("Unknown rot_repr: %s" % rot_repr)
pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) my_result_wo_refine.append(my_pred.tolist()) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0
def __preprocess_and_save__(self, index): obj_category = self.list_obj[index] ins = self.list_instance[index] obj = self.objnamelist.index(obj_category) art_status = self.list_status[index] frame_order = self.list_rank[index] label = self.list_label[index] h5_save_path = (self.root_dir + "/hdf5/" + obj_category + "/" + ins + "/" + art_status) if not os.path.exists(h5_save_path): os.makedirs(h5_save_path) h5_save_name = h5_save_path + "/{}.h5".format(frame_order) num_parts = self.urdf_dict[obj_category][ins]["num_links"] new_num_parts = num_parts - 1 model_offsets = self.urdf_dict[obj_category][ins]["link"] joint_offsets = self.urdf_dict[obj_category][ins]["joint"] parts_model_point = [None] * new_num_parts parts_world_point = [None] * new_num_parts parts_target_point = [None] * new_num_parts parts_cloud_cam = [None] * new_num_parts parts_cloud_world = [None] * new_num_parts parts_cloud_canon = [None] * new_num_parts parts_cloud_urdf = [None] * new_num_parts parts_cloud_norm = [None] * new_num_parts parts_world_pos = [None] * new_num_parts parts_world_orn = [None] * new_num_parts parts_urdf_pos = [None] * new_num_parts parts_urdf_orn = [None] * new_num_parts parts_urdf_box = [None] * new_num_parts parts_model2world = [None] * new_num_parts parts_canon2urdf = [None] * new_num_parts parts_target_r = [None] * new_num_parts parts_target_t = [None] * new_num_parts parts_mask = [None] * new_num_parts choose_x = [None] * new_num_parts choose_y = [None] * new_num_parts choose_to_whole = [None] * new_num_parts # rgb/depth/label print("current image: ", self.list_rgb[index]) img = Image.open(self.list_rgb[index]) img = np.array(img) # .astype(np.uint8) depth = np.array(h5py.File(self.list_depth[index], "r")["data"]) label = np.array(Image.open(self.list_label[index])) # pose infos pose_dict = self.meta_dict[obj_category][ins][art_status][ "frame_{}".format(frame_order)] urdf_dict = self.urdf_dict[obj_category][ins] viewMat = np.array(pose_dict["viewMat"]).reshape(4, 4).T projMat = np.array(pose_dict["projMat"]).reshape(4, 4).T parts_world_pos[0] = np.array([0, 0, 0]) parts_world_orn[0] = np.array([0, 0, 0, 1]) # SAPIEN: skip the virtual base part for link in range(1, num_parts): if link > 0: parts_world_pos[link - 1] = np.array( pose_dict["obj"][link - 1][4]).astype(np.float32) parts_world_orn[link - 1] = np.array( pose_dict["obj"][link - 1][5]).astype(np.float32) for link in range(1, num_parts): if link == 1 and num_parts == 2: parts_urdf_pos[link] = np.array(urdf_dict["joint"]["xyz"][ link - 1]) # todo, accumulate joints offsets != link offsets else: # Only change this branch for SAPIEN data parts_urdf_pos[link - 1] = -np.array( urdf_dict["link"]["xyz"][link][0]) parts_urdf_orn[link - 1] = np.array( urdf_dict["link"]["rpy"][link][0]) for k in range(1, num_parts): center_world_orn = parts_world_orn[k - 1] center_world_orn = np.array([ center_world_orn[3], center_world_orn[0], center_world_orn[1], center_world_orn[2], ]) my_model2world_r = quaternion_matrix( center_world_orn)[:4, :4] # [w, x, y, z] my_model2world_t = parts_world_pos[k - 1] my_model2world_mat = np.copy(my_model2world_r) for m in range(3): my_model2world_mat[m, 3] = my_model2world_t[m] my_world2camera_mat = viewMat my_camera2clip_mat = projMat my_model2camera_mat = np.dot(my_world2camera_mat, my_model2world_mat) parts_model2world[k - 1] = my_model2world_mat # depth to cloud data # SAPIEN, throw away the label 0 for virtual base link mask = np.array((label[:, :] < num_parts) & (label[:, :] > 0)).astype( np.uint8) mask_whole = np.copy(mask) for n in range(1, num_parts): parts_mask[n - 1] = np.array((label[:, :] == (n))).astype(np.uint8) choose_to_whole[n - 1] = np.where(parts_mask[n - 1] > 0) # >>>>>>>>>>------- rendering target pcloud from depth image --------<<<<<<<<<# # first get projected map # ymap = self.ymap # xmap = self.xmap # h = self.height # w = self.width # u_map = ymap * 2 / w - 1 # v_map = (512 - xmap) * 2 / h - 1 # v1_map = xmap * 2 / h - 1 # w_channel = -depth # projected_map = np.stack( # [u_map * w_channel, v_map * w_channel, depth, w_channel] # ).transpose([1, 2, 0]) # projected_map1 = np.stack( # [u_map * w_channel, v1_map * w_channel, depth, w_channel] # ).transpose([1, 2, 0]) for s in range(1, num_parts): x_set, y_set = choose_to_whole[s - 1] if len(x_set) < 10: print(ins, art_status, frame_order) print( "data is empty, skipping!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) return None else: choose_x[s - 1] = x_set choose_y[s - 1] = y_set # ---------------> from projected map into target part_cloud # order: cam->world->canon) # MotionNet Modification point_num = np.shape(choose_x[s - 1])[0] cloud_cam = [] cloud_cam_real = [] for i in range(point_num): x = choose_x[s - 1][i] y = choose_y[s - 1][i] y_real = 512 - y cloud_cam.append([ (y + projMat[0, 2]) * depth[x, y] / projMat[0, 0], -(x + projMat[1, 2]) * depth[x, y] / (-projMat[1, 1]), -depth[x, y], ]) cloud_cam_real.append([ (y + projMat[0, 2]) * depth[x, y] / projMat[0, 0], -(x + projMat[1, 2]) * depth[x, y] / (-projMat[1, 1]), -depth[x, y], ]) # cloud_cam.append( # [ # (x + projMat[0, 2]) * depth[x, y_real] / projMat[0, 0], # -(y_real + projMat[1, 2]) * depth[x, y_real] / (-projMat[1, 1]), # -depth[x, y_real], # ] # ) # pdb.set_trace() # print(cloud_cam) # break cloud_cam = np.array(cloud_cam) cloud_cam_real = np.array(cloud_cam_real) # pdb.set_trace() # MotionNet Modification # projected_points = projected_map[choose_x[s-1][:].astype(np.uint16), choose_y[s-1][:].astype(np.uint16), :] # projected_points = np.reshape(projected_points, [-1, 4]) # depth_channel = - projected_points[:, 3:4] # cloud_cam = np.dot(projected_points[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T)) # projected_points1 = projected_map1[choose_x[s-1][:].astype(np.uint16), choose_y[s-1][:].astype(np.uint16), :] # projected_points1 = np.reshape(projected_points1, [-1, 4]) # cloud_cam_real = np.dot(projected_points1[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T)) # cloud_cam_real = np.concatenate((cloud_cam_real, depth_channel), axis=1) # cloud_cam = np.concatenate((cloud_cam, depth_channel), axis=1) cloud_cam_full = np.concatenate( (cloud_cam, np.ones((cloud_cam.shape[0], 1))), axis=1) # modify, todo camera_pose_mat = np.linalg.pinv(viewMat.T) # MotionNet modification # camera_pose_mat[:3, :] = -camera_pose_mat[:3, :] cloud_world = np.dot(cloud_cam_full, camera_pose_mat) cloud_canon = np.dot(cloud_world, np.linalg.pinv(parts_model2world[s - 1].T)) # canon points should be points coordinates centered in the inertial frame parts_cloud_cam[s - 1] = cloud_cam_real[:, :3] parts_cloud_world[s - 1] = cloud_world[:, :3] parts_cloud_canon[s - 1] = cloud_canon[:, :3] for k in range(1, num_parts): center_joint_orn = parts_urdf_orn[k - 1] my_canon2urdf_r = euler_matrix( center_joint_orn[0], center_joint_orn[1], center_joint_orn[2])[:4, :4] # [w, x, y, z] my_canon2urdf_t = parts_urdf_pos[k - 1] my_canon2urdf_mat = my_canon2urdf_r for m in range(3): my_canon2urdf_mat[m, 3] = my_canon2urdf_t[m] part_points_space = np.concatenate( ( parts_cloud_canon[k - 1], np.ones((parts_cloud_canon[k - 1].shape[0], 1)), ), axis=1, ) parts_cloud_urdf[k - 1] = np.dot(part_points_space, my_canon2urdf_mat.T) # >>>>>>>>>>>>>>> go to NPCS space # NPCS here is not stored into hdf5 for link in range(1, num_parts): tight_w = max(parts_cloud_urdf[link - 1][:, 0]) - min( parts_cloud_urdf[link - 1][:, 0]) tight_l = max(parts_cloud_urdf[link - 1][:, 1]) - min( parts_cloud_urdf[link - 1][:, 1]) tight_h = max(parts_cloud_urdf[link - 1][:, 2]) - min( parts_cloud_urdf[link - 1][:, 2]) norm_factor = np.sqrt(1) / np.sqrt(tight_w**2 + tight_l**2 + tight_h**2) base_p = np.array([ min(parts_cloud_urdf[link - 1][:, 0]), min(parts_cloud_urdf[link - 1][:, 1]), min(parts_cloud_urdf[link - 1][:, 2]), ]).reshape(1, 3) extre_p = np.array([ max(parts_cloud_urdf[link - 1][:, 0]), max(parts_cloud_urdf[link - 1][:, 1]), max(parts_cloud_urdf[link - 1][:, 2]), ]).reshape(1, 3) center_p = (extre_p - base_p) / 2 * norm_factor parts_cloud_norm[link - 1] = ( (parts_cloud_urdf[link - 1][:, :3] - base_p) * norm_factor + np.array([0.5, 0.5, 0.5]).reshape(1, 3) - center_p.reshape(1, 3)) x_set_pcloud = np.concatenate(choose_x, axis=0) y_set_pcloud = np.concatenate(choose_y, axis=0) # save into h5 for rgb_img, input_pts, mask, correpsonding urdf_points print("Writing to ", h5_save_name) hf = h5py.File(h5_save_name, "w") hf.create_dataset("rgb", data=img) hf.create_dataset("mask", data=mask) cloud_cam = hf.create_group("gt_points") for part_i, points in enumerate(parts_cloud_cam): cloud_cam.create_dataset(str(part_i), data=points) coord_gt = hf.create_group("gt_coords") for part_i, points in enumerate(parts_cloud_urdf): coord_gt.create_dataset(str(part_i), data=points) hf.close() ################# for debug only, let me know if you have questions ################# if self.is_debug: figure = plt.figure(dpi=200) ax = plt.subplot(121) plt.imshow(img) plt.title('RGB image') ax1 = plt.subplot(122) plt.imshow(depth) plt.title('depth image') plt.show() plot3d_pts( [parts_cloud_cam], [['part {}'.format(i) for i in range(len(parts_cloud_cam))]], s=5, title_name=['camera coords']) plot3d_pts( [parts_cloud_world], [['part {}'.format(i) for i in range(len(parts_cloud_world))]], s=5, title_name=['world coords']) plot3d_pts( [parts_cloud_canon], [['part {}'.format(i) for i in range(len(parts_cloud_canon))]], s=5, title_name=['canon coords']) plot3d_pts( [parts_cloud_urdf], [['part {}'.format(i) for i in range(len(parts_cloud_urdf))]], s=5, title_name=['urdf coords']) return None
Variable(model_points).cuda(), \ Variable(idx).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0
def pose_predict(img, depth, rois): label_pub = rospy.Publisher('/label', Image, queue_size=10) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) class_list = [ '002_master_chef_can', '003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can', '008_pudding_box', '009_gelatin_box', '010_potted_meat_can', '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser', '024_bowl', '035_power_drill', '036_wood_block', '037_scissors', '040_large_marker', '051_large_clamp', '052_extra_large_clamp', '061_foam_brick' ] try: object_number = len(rois) #lst = posecnn_rois[:,0:1].flatten() #lst = np.unique(label) my_result_wo_refine = [] my_result = [] for idx in range(object_number): #itemid = lst[idx] itemid = class_list.index(rois[idx].Class) + 1 #itemid = class_list.index(rois[idx].Class) +3 print(object_number, itemid, rois[idx]) try: label, pub_label = seg_predict(img) pub_label = pub_label * 50 label_pub.publish(bridge.cv2_to_imgmsg(pub_label, '8UC1')) ####################### with Detection algorithm ################################# # rmin, rmax, cmin,cmax = get_bbox(rois,idx) ##################################################################################### mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) mask = mask_label * mask_depth rmin, rmax, cmin, cmax = get_bbox(mask_label) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = norm( torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # making pose matrix rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3]) rot_to_angle = rot_to_angle.reshape(1, 3) my_t = my_t.reshape(1, 3) rot_t = np.concatenate([rot_to_angle, my_t], axis=0) # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], # [0, 0, 1]])) #tl = np.array([100,100,100]) #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696], # [0, 0, 1]])) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final open_cv_image = img.copy() open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) dof = quaternion_matrix(my_r) dof[0:3, 3] = my_t object_poses = { 'tx': my_t[0][0], 'ty': my_t[0][1], 'tz': my_t[0][2], 'qx': my_r[0], 'qy': my_r[1], 'qz': my_r[2], 'qw': my_r[3] } my_result.append(object_poses) open_cv_image = img.copy() open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3, 0:3], dof[0:3, 3], cam_mat, dist) # 13 = mug open_cv_image = draw(open_cv_image, imgpts, itemid) except ZeroDivisionError: open_cv_image = None print('Fail') except CvBridgeError as e: print(e) return my_result, open_cv_image
def __preprocess_and_save__(self, index): obj_category = self.list_obj[index] ins = self.list_instance[index] obj = self.objnamelist.index(obj_category) art_status = self.list_status[index] frame_order = self.list_rank[index] label = self.list_label[index] h5_save_path = self.root_dir + '/hdf5/' + obj_category + '/' + ins + '/' + art_status if (not os.path.exists(h5_save_path)): os.makedirs(h5_save_path) h5_save_name = h5_save_path + '/{}.h5'.format(frame_order) num_parts = self.urdf_dict[obj_category][ins]['num_links'] model_offsets = self.urdf_dict[obj_category][ins]['link'] joint_offsets = self.urdf_dict[obj_category][ins]['joint'] parts_model_point = [None] * num_parts parts_world_point = [None] * num_parts parts_target_point = [None] * num_parts parts_cloud_cam = [None] * num_parts parts_cloud_world = [None] * num_parts parts_cloud_canon = [None] * num_parts parts_cloud_urdf = [None] * num_parts parts_cloud_norm = [None] * num_parts parts_world_pos = [None] * num_parts parts_world_orn = [None] * num_parts parts_urdf_pos = [None] * num_parts parts_urdf_orn = [None] * num_parts parts_urdf_box = [None] * num_parts parts_model2world = [None] * num_parts parts_canon2urdf = [None] * num_parts parts_target_r = [None] * num_parts parts_target_t = [None] * num_parts parts_mask = [None] * num_parts choose_x = [None] * num_parts choose_y = [None] * num_parts choose_to_whole = [None] * num_parts # rgb/depth/label print('current image: ', self.list_rgb[index]) img = Image.open(self.list_rgb[index]) img = np.array(img) #.astype(np.uint8) depth = np.array(h5py.File(self.list_depth[index], 'r')['data']) label = np.array(Image.open(self.list_label[index])) # pose infos pose_dict = self.meta_dict[obj_category][ins][art_status][ 'frame_{}'.format(frame_order)] urdf_dict = self.urdf_dict[obj_category][ins] viewMat = np.array(pose_dict['viewMat']).reshape(4, 4).T projMat = np.array(pose_dict['projMat']).reshape(4, 4).T parts_world_pos[0] = np.array([0, 0, 0]) parts_world_orn[0] = np.array([0, 0, 0, 1]) for link in range(0, num_parts): if link > 0: parts_world_pos[link] = np.array( pose_dict['obj'][link - 1][4]).astype(np.float32) parts_world_orn[link] = np.array( pose_dict['obj'][link - 1][5]).astype(np.float32) for link in range(num_parts): if link == 1 and num_parts == 2: parts_urdf_pos[link] = np.array(urdf_dict['joint']['xyz'][ link - 1]) # todo, accumulate joints pffsets != link offsets else: parts_urdf_pos[link] = -np.array( urdf_dict['link']['xyz'][link]) parts_urdf_orn[link] = np.array(urdf_dict['link']['rpy'][link]) for k in range(num_parts): center_world_orn = parts_world_orn[k] center_world_orn = np.array([ center_world_orn[3], center_world_orn[0], center_world_orn[1], center_world_orn[2] ]) my_model2world_r = quaternion_matrix( center_world_orn)[:4, :4] # [w, x, y, z] my_model2world_t = parts_world_pos[k] my_model2world_mat = np.copy(my_model2world_r) for m in range(3): my_model2world_mat[m, 3] = my_model2world_t[m] my_world2camera_mat = viewMat my_camera2clip_mat = projMat my_model2camera_mat = np.dot(my_world2camera_mat, my_model2world_mat) parts_model2world[k] = my_model2world_mat # depth to cloud data mask = np.array((label[:, :] < num_parts) & (label[:, :] > -1)).astype( np.uint8) mask_whole = np.copy(mask) for n in range(num_parts): parts_mask[n] = np.array((label[:, :] == (n))).astype(np.uint8) choose_to_whole[n] = np.where(parts_mask[n] > 0) #>>>>>>>>>>------- rendering target pcloud from depth image --------<<<<<<<<<# # first get projected map ymap = self.ymap xmap = self.xmap h = self.height w = self.width u_map = ymap * 2 / w - 1 v_map = (512 - xmap) * 2 / h - 1 v1_map = xmap * 2 / h - 1 w_channel = -depth projected_map = np.stack( [u_map * w_channel, v_map * w_channel, depth, w_channel]).transpose([1, 2, 0]) projected_map1 = np.stack( [u_map * w_channel, v1_map * w_channel, depth, w_channel]).transpose([1, 2, 0]) for s in range(num_parts): x_set, y_set = choose_to_whole[s] if len(x_set) < 10: print('data is empty, skipping!!!') return None else: choose_x[s] = x_set choose_y[s] = y_set # ---------------> from projected map into target part_cloud # order: cam->world->canon) projected_points = projected_map[ choose_x[s][:].astype(np.uint16), choose_y[s][:].astype(np.uint16), :] projected_points = np.reshape(projected_points, [-1, 4]) depth_channel = -projected_points[:, 3:4] cloud_cam = np.dot( projected_points[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T)) projected_points1 = projected_map1[ choose_x[s][:].astype(np.uint16), choose_y[s][:].astype(np.uint16), :] projected_points1 = np.reshape(projected_points1, [-1, 4]) cloud_cam_real = np.dot( projected_points1[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T)) cloud_cam_real = np.concatenate((cloud_cam_real, depth_channel), axis=1) cloud_cam = np.concatenate((cloud_cam, depth_channel), axis=1) cloud_cam_full = np.concatenate( (cloud_cam, np.ones((cloud_cam.shape[0], 1))), axis=1) # modify, todo camera_pose_mat = np.linalg.pinv(viewMat.T) camera_pose_mat[:3, :] = -camera_pose_mat[:3, :] cloud_world = np.dot(cloud_cam_full, camera_pose_mat) cloud_canon = np.dot(cloud_world, np.linalg.pinv(parts_model2world[s].T)) # canon points should be points coordinates centered in the inertial frame parts_cloud_cam[s] = cloud_cam_real[:, :3] parts_cloud_world[s] = cloud_world[:, :3] parts_cloud_canon[s] = cloud_canon[:, :3] for k in range(num_parts): center_joint_orn = parts_urdf_orn[k] my_canon2urdf_r = euler_matrix( center_joint_orn[0], center_joint_orn[1], center_joint_orn[2])[:4, :4] # [w, x, y, z] my_canon2urdf_t = parts_urdf_pos[k] my_canon2urdf_mat = my_canon2urdf_r for m in range(3): my_canon2urdf_mat[m, 3] = my_canon2urdf_t[m] part_points_space = np.concatenate( (parts_cloud_canon[k], np.ones((parts_cloud_canon[k].shape[0], 1))), axis=1) parts_cloud_urdf[k] = np.dot(part_points_space, my_canon2urdf_mat.T) #>>>>>>>>>>>>>>> go to PNCS space for link in range(num_parts): tight_w = max(parts_cloud_urdf[link][:, 0]) - min( parts_cloud_urdf[link][:, 0]) tight_l = max(parts_cloud_urdf[link][:, 1]) - min( parts_cloud_urdf[link][:, 1]) tight_h = max(parts_cloud_urdf[link][:, 2]) - min( parts_cloud_urdf[link][:, 2]) norm_factor = np.sqrt(1) / np.sqrt(tight_w**2 + tight_l**2 + tight_h**2) base_p = np.array([ min(parts_cloud_urdf[link][:, 0]), min(parts_cloud_urdf[link][:, 1]), min(parts_cloud_urdf[link][:, 2]) ]).reshape(1, 3) extre_p = np.array([ max(parts_cloud_urdf[link][:, 0]), max(parts_cloud_urdf[link][:, 1]), max(parts_cloud_urdf[link][:, 2]) ]).reshape(1, 3) center_p = (extre_p - base_p) / 2 * norm_factor parts_cloud_norm[link] = (parts_cloud_urdf[link][:, :3] - base_p) * norm_factor + np.array( [0.5, 0.5, 0.5]).reshape( 1, 3) - center_p.reshape(1, 3) x_set_pcloud = np.concatenate(choose_x, axis=0) y_set_pcloud = np.concatenate(choose_y, axis=0) # save into h5 for rgb_img, input_pts, mask, correpsonding urdf_points print('Writing to ', h5_save_name) hf = h5py.File(h5_save_name, 'w') hf.create_dataset('rgb', data=img) hf.create_dataset('mask', data=mask) cloud_cam = hf.create_group('gt_points') for part_i, points in enumerate(parts_cloud_cam): cloud_cam.create_dataset(str(part_i), data=points) coord_gt = hf.create_group('gt_coords') for part_i, points in enumerate(parts_cloud_urdf): coord_gt.create_dataset(str(part_i), data=points) hf.close() ################# for debug only, let me know if you have questions ################# if self.is_debug: figure = plt.figure(dpi=200) ax = plt.subplot(121) plt.imshow(img) plt.title('RGB image') ax1 = plt.subplot(122) plt.imshow(depth) plt.title('depth image') plt.show() plot3d_pts( [parts_cloud_cam], [['part {}'.format(i) for i in range(len(parts_cloud_cam))]], s=5, title_name=['camera coords']) plot3d_pts( [parts_cloud_world], [['part {}'.format(i) for i in range(len(parts_cloud_world))]], s=5, title_name=['world coords']) plot3d_pts( [parts_cloud_canon], [['part {}'.format(i) for i in range(len(parts_cloud_canon))]], s=5, title_name=['canon coords']) plot3d_pts( [parts_cloud_urdf], [['part {}'.format(i) for i in range(len(parts_cloud_urdf))]], s=5, title_name=['urdf coords']) return None
pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) ''' pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) #SACHIT TRY LEFT MULTIPLY T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view( 1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t my_r_unrefined = quaternion_matrix(my_r)[:3, :3] my_t_unrefined = my_t + 0 if istracked: my_r = my_r_last my_t = my_t_last my_r_unrefined = quaternion_matrix(my_r)[:3, :3] my_t_unrefined = my_t + 0 emb = last_emb ICP = False if ICP:
def pose(self): # get mask and segmentation mask, bbox, viz = self.draw_seg(self.batch_predict()) pred = mask pred = pred * 255 pred = np.transpose(pred, (1, 2, 0)) # (CxHxW)->(HxWxC) # convert img into tensor rgb_original = self.rgb norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.rgb = Variable(norm(torch.from_numpy(self.rgb.astype( np.float32)))).cuda() all_masks = [] mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) for b in range(len(bbox)): mask = mask_depth * mask_label[:, :, b] rmin = int(bbox[b, 0]) rmax = int(bbox[b, 1]) cmin = int(bbox[b, 2]) cmax = int(bbox[b, 3]) img = np.transpose(rgb_original, (0, 1, 2)) #CxHxW img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) == 0: cc = torch.LongTensor([0]) return (cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') # visualize each masks # plt.imshow(mask), plt.show() depth_masked = self.depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) cam_scale = 1.0 pt2 = depth_masked / cam_scale pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_ = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img_ = Variable(img_).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator( img_, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 0) #1 pred_t = pred_t.view(bs * num_points, 1, 3) # print("max confidence", how_max) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot( my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # POSITION # ndds has cm units my_t = np.array(my_t) # my_t = np.array([my_t[0], my_t[1], 1-my_t[2]]) # print('estimated translation is:{0}'.format(my_t)) # ROTATION my_r = quaternion_matrix(my_r)[:3, :3] # my_r = np.dot(my_r, np.array([[1, 0, 0], [0, 0, -1], [0, -1, 0]])) # print('estimated rotation is\n:{0}'.format(my_r)) # Draw estimated pose 3Dbox target = np.dot(self.scaled, my_r.T) #my_r.T target = np.add(target, my_t) self.draw_cube(target, viz) # Norm pose NormPos = np.linalg.norm((my_t), ord=1) print("Pos:{0}".format(my_t)) plt.figure(figsize=(10, 10)), plt.imshow(viz), plt.show() return viz
def pose_predict(img, depth, rois): class_list = [ '002_master_chef_can', '003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can', '008_pudding_box', '009_gelatin_box', '010_potted_meat_can', '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser', '024_bowl', '035_power_drill', '036_wood_block', '037_scissors', '040_large_marker', '051_large_clamp', '052_extra_large_clamp', '061_foam_brick' ] try: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) object_number = len(rois) bridge = CvBridge() #lst = posecnn_rois[:,0:1].flatten() #lst = np.unique(label) my_result_wo_refine = [] my_result = [] for idx in range(object_number): #itemid = lst[idx] #itemid = class_list.index(rois[idx].Class) +1 itemid = class_list.index(rois[idx].Class) + 3 try: label = seg_predict(img) cv2.imwrite( '/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label) rmin, rmax, cmin, cmax = get_bbox(rois, idx) # bounding box cutting #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0)) #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) #mask = mask_label * mask_depth # only image mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) mask = mask_label * mask_depth #rmin, rmax, cmin, cmax = get_bbox(mask_label) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] print(choose) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = norm( torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # making pose matrix dof = quaternion_matrix(my_r) dof[0:3, 3] = my_t rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3]) rot_to_angle = rot_to_angle.reshape(1, 3) my_t = my_t.reshape(1, 3) rot_t = np.concatenate([rot_to_angle, my_t], axis=0) object_poses = { 'tx': my_t[0][0], 'ty': my_t[0][1], 'tz': my_t[0][2], 'qx': my_r[0], 'qy': my_r[1], 'qz': my_r[2], 'qw': my_r[3] } my_result.append(object_poses) open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) cam_mat = cv2.UMat( np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], [0, 0, 1]])) imgpts, jac = cv2.projectPoints(cld[14], dof[0:3, 0:3], dof[0:3, 3], cam_mat, dist) # 14 mugcup open_cv_image = draw(open_cv_image, imgpts.get(), itemid) my_result_wo_refine.append(my_pred.tolist()) except ZeroDivisionError: # my_result_wo_refine.append([0.0 for i in range(7)]) # my_result.append([0.0 for i in range(7)]) open_cv_image = None print('Fail') except CvBridgeError as e: print(e) return my_result, open_cv_image
def callback(self, rgb, depth): if DEBUG: print('received depth image of type: ' + depth.encoding) print('received rgb image of type: ' + rgb.encoding) #https://answers.ros.org/question/64318/how-do-i-convert-an-ros-image-into-a-numpy-array/ depth = np.frombuffer(depth.data, dtype=np.uint16).reshape(depth.height, depth.width, -1) rgb = np.frombuffer(rgb.data, dtype=np.uint8).reshape(rgb.height, rgb.width, -1) rgb_original = rgb #cv2.imshow('depth', depth) #time1 = time.time() rgb = np.transpose(rgb, (2, 0, 1)) rgb = norm(torch.from_numpy(rgb.astype(np.float32))) rgb = Variable(rgb).cuda() semantic = self.model(rgb.unsqueeze(0)) _, pred = torch.max(semantic, dim=1) pred = pred * 255 pred = np.transpose(pred, (1, 2, 0)) # (CxHxW)->(HxWxC) #print(pred.shape) #ret, threshold = cv2.threshold(pred.cpu().numpy(), 1, 255, cv2.THRESH_BINARY) #pred is already binary, therefore, this line is unnecessary contours, hierarchy = cv2.findContours(np.uint8(pred), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(cnt) rmin, rmax, cmin, cmax = get_bbox([x, y, w, h]) #cv2.rectangle(rgb_original,(cmin,rmin), (cmax,rmax) , (0,255,0),2) #cv2.imwrite('depth.png', depth) #save depth image mask_depth = ma.getmasksarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) mask = mask_depth * mask_label #print(rgb.shape) #torch.Size([3, 480, 640]) #print(rgb_original.shape) #(480, 640, 3) img = np.transpose(rgb_original, (2, 0, 1)) img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] #print("length of choose is :{0}".format(len(choose))) if len(choose) == 0: cc = torch.LongTensor([0]) return (cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[: num_points] = 1 # if number of object pixels are bigger than 500, we select just 500 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] # now len(choose) = 500 else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked #print(pt2) pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img = Variable(img).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot( my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r = quaternion_matrix(my_r)[:3, :3] #print(my_t.shape) my_t = np.array(my_t) #print(my_t.shape) #print(my_r.shape) target = np.dot(self.scaled, my_r.T) target = np.add(target, my_t) p0 = (int((target[0][0] / target[0][2]) * self.cam_fx + self.cam_cx), int((target[0][1] / target[0][2]) * self.cam_fy + self.cam_cy)) p1 = (int((target[1][0] / target[1][2]) * self.cam_fx + self.cam_cx), int((target[1][1] / target[1][2]) * self.cam_fy + self.cam_cy)) p2 = (int((target[2][0] / target[2][2]) * self.cam_fx + self.cam_cx), int((target[2][1] / target[2][2]) * self.cam_fy + self.cam_cy)) p3 = (int((target[3][0] / target[3][2]) * self.cam_fx + self.cam_cx), int((target[3][1] / target[3][2]) * self.cam_fy + self.cam_cy)) p4 = (int((target[4][0] / target[4][2]) * self.cam_fx + self.cam_cx), int((target[4][1] / target[4][2]) * self.cam_fy + self.cam_cy)) p5 = (int((target[5][0] / target[5][2]) * self.cam_fx + self.cam_cx), int((target[5][1] / target[5][2]) * self.cam_fy + self.cam_cy)) p6 = (int((target[6][0] / target[6][2]) * self.cam_fx + self.cam_cx), int((target[6][1] / target[6][2]) * self.cam_fy + self.cam_cy)) p7 = (int((target[7][0] / target[7][2]) * self.cam_fx + self.cam_cx), int((target[7][1] / target[7][2]) * self.cam_fy + self.cam_cy)) cv2.line(rgb_original, p0, p1, (255, 255, 255), 2) cv2.line(rgb_original, p0, p3, (255, 255, 255), 2) cv2.line(rgb_original, p0, p4, (255, 255, 255), 2) cv2.line(rgb_original, p1, p2, (255, 255, 255), 2) cv2.line(rgb_original, p1, p5, (255, 255, 255), 2) cv2.line(rgb_original, p2, p3, (255, 255, 255), 2) cv2.line(rgb_original, p2, p6, (255, 255, 255), 2) cv2.line(rgb_original, p3, p7, (255, 255, 255), 2) cv2.line(rgb_original, p4, p5, (255, 255, 255), 2) cv2.line(rgb_original, p4, p7, (255, 255, 255), 2) cv2.line(rgb_original, p5, p6, (255, 255, 255), 2) cv2.line(rgb_original, p6, p7, (255, 255, 255), 2) #print('estimated rotation is :{0}'.format(my_r)) #print('estimated translation is :{0}'.format(my_t)) #time2 = time.time() #print('inference time is :{0}'.format(time2-time1)) cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB)) # OpenCV uses BGR model cv2.waitKey( 1 ) # pass any integr except 0, as 0 will freeze the display windows
def estimation_callback(self, rgb,depth,rois): try: img = self.bridge.imgmsg_to_cv2(rgb,'bgr8') depth = self.bridge.imgmsg_to_cv2(depth,'32SC1') rois = rois.bounding_boxes print(img, depth,posecnn_rois) class_list = ['002_master_chef_can', '003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can', '008_pudding_box', '009_gelatin_box', '010_potted_meat_can', '011_banana',#'019_pitcher_base', '025_mug', '021_bleach_cleanser', '024_bowl', '035_power_drill', '036_wood_block', '037_scissors', '040_large_marker', '051_large_clamp', '052_extra_large_clamp', '061_foam_brick'] object_number = len(rois) #lst = posecnn_rois[:,0:1].flatten() #lst = np.unique(label) my_result_wo_refine = [] my_result = [] for idx in range(object_number): #itemid = lst[idx] itemid = class_list.index(rois[idx].Class) +1 print(itemid, rois[idx]) try: label = seg_predict(img) rmin, rmax, cmin,cmax = get_bbox(rois,idx) # bounding box cutting #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0)) #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) #mask = mask_label * mask_depth # only image mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) mask = mask_label * mask_depth #rmin, rmax, cmin, cmax = get_bbox(mask_label) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # making pose matrix dof = quaternion_matrix(my_r) dof[0:3,3] = my_t rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3]) rot_to_angle = rot_to_angle.reshape(1,3) my_t = my_t.reshape(1,3) rot_t = np.concatenate([rot_to_angle,my_t], axis= 0) object_poses = { 'tx':my_t[0][0], 'ty':my_t[0][1], 'tz':my_t[0][2], 'qx':my_r[0], 'qy':my_r[1], 'qz':my_r[2], 'qw':my_r[3]} my_result.append(object_poses) open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], [0, 0, 1]])) imgpts, jac = cv2.projectPoints(cld[13], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) open_cv_image = draw(open_cv_image,imgpts.get(), itemid) my_result_wo_refine.append(my_pred.tolist()) pose_array = PoseArray() pose_msg = Pose() pose_pub.publish(pose_array) pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8')) """ for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final """ # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) #my_result.append(my_pred.tolist()) except ZeroDivisionError: # my_result_wo_refine.append([0.0 for i in range(7)]) # my_result.append([0.0 for i in range(7)]) print('Fail') except CvBridgeError as e: print(e)
def __getitem__(self, index): img = Image.open('{0}/{1}.png'.format(self.root, self.list[index])) depth = np.array(Image.open('{0}/{1}_depth.png'.format(self.root, self.list[index]))) #label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index]))) with open('{0}/{1}.yaml'.format(self.root, self.list[index]), 'r') as s: meta = yaml.safe_load(s) #mask_back = ma.getmaskarray(ma.masked_equal(label, 0)) self.add_noise=False add_front = False if self.add_noise: for k in range(5): seed = random.choice(self.syn) front = np.array(self.trancolor(Image.open('{0}/{1}.png'.format(self.root, seed)).convert("RGB"))) front = np.transpose(front, (2, 0, 1)) f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed))) front_label = np.unique(f_label).tolist()[1:] if len(front_label) < self.front_num: continue front_label = random.sample(front_label, self.front_num) for f_i in front_label: mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i)) if f_i == front_label[0]: mask_front = mk else: mask_front = mask_front * mk t_label = label * mask_front if len(t_label.nonzero()[0]) > 1000: label = t_label add_front = True break ''' obj=[k for k in meta] n=[int(meta[k]['num_pixels']) for k in meta] labels,nb=np.unique(label,return_counts=True) labels=labels[1:] nb=nb[1:] l=list(np.where(nb>self.num_pt)) idx = l[0][np.random.randint(0, len(l[0]))] it=obj[n.index(nb[idx])] mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, labels[idx])) mask = mask_label * mask_depth ''' it='sd' mask = ma.getmaskarray(ma.masked_not_equal(depth, 0)) if self.add_noise: img = self.trancolor(img) #TODO #rmin, rmax, cmin, cmax = get_bbox(mask_label) rmin, rmax, cmin, cmax = [250.0,250.0,250.0,250.0] img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax] # if self.list[index][:8] == 'data_syn': # seed = random.choice(self.real) # back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB"))) # back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax] # img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img # else: img_masked = img if self.add_noise and add_front: img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax]) # if self.list[index][:8] == 'data_syn': # img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape) # p_img = np.transpose(img_masked, (1, 2, 0)) # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img) # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32)) #print([k for k in meta[obj[idx]]]) target_r = quaternion_matrix(meta[it]['pose'][1])[:3,:3] target_t = np.array([meta[it]['pose'][0]]) add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)]) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > self.num_pt: c_mask = np.zeros(len(choose), dtype=int) c_mask[:self.num_pt] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) #cam_scale = meta['factor_depth'][0][0] #TODO cam_scale = 1.0 pt2 = depth_masked / cam_scale pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) #/ 1000.0 if self.add_noise: cloud = np.add(cloud, add_t) # fw = open('temp/{0}_cld.xyz'.format(index), 'w') # for it in cloud: # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) # fw.close() dellist = [j for j in range(0, len(self.cld[meta[it]['label']+1]))] if self.refine: dellist = random.sample(dellist, len(self.cld[meta[it]['label']+1]) - self.num_pt_mesh_large) else: dellist = random.sample(dellist, len(self.cld[meta[it]['label']+1]) - self.num_pt_mesh_small) model_points = np.delete(self.cld[meta[it]['label']+1], dellist, axis=0) # fw = open('temp/{0}_model_points.xyz'.format(index), 'w') # for it in model_points: # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) # fw.close() target = np.dot(model_points, target_r.T) if self.add_noise: target = np.add(target, target_t + add_t) #target_t/1000.0 else: target = np.add(target, target_t) #target_t/1000.0 # fw = open('temp/{0}_tar.xyz'.format(index), 'w') # for it in target: # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) # fw.close() return torch.from_numpy(cloud.astype(np.float32)), \ torch.LongTensor(choose.astype(np.int32)), \ self.norm(torch.from_numpy(img_masked.astype(np.float32))), \ torch.from_numpy(target.astype(np.float32)), \ torch.from_numpy(model_points.astype(np.float32)), \ torch.LongTensor([int(meta[it]['label']+1)])
def main(): # g13: parameter setting ------------------- batch_id = 1 opt.dataset ='linemod' opt.dataset_root = './datasets/linemod/Linemod_preprocessed' estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth' refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth' opt.resume_posenet = estimator_path opt.resume_posenet = refiner_path dataset_config_dir = 'datasets/linemod/dataset_config' output_result_dir = 'experiments/eval_result/linemod' bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader opt.iteration = 2 #default is 4 in eval_linemod.py t1_idx = 0 t1_total_eval_num = 3 axis_range = 0.1 # the length of X, Y, and Z axis in 3D vimg_dir = 'verify_img' if not os.path.exists(vimg_dir): os.makedirs(vimg_dir) #------------------------------------------- if opt.dataset == 'ycb': opt.num_objects = 21 #number of object classes in the dataset opt.num_points = 1000 #number of points on the input pointcloud opt.outf = 'trained_models/ycb' #folder to save trained models opt.log_dir = 'experiments/logs/ycb' #folder to save logs opt.repeat_epoch = 1 #number of repeat times for one epoch training elif opt.dataset == 'linemod': opt.num_objects = 13 opt.num_points = 500 opt.outf = 'trained_models/linemod' opt.log_dir = 'experiments/logs/linemod' opt.repeat_epoch = 20 else: print('Unknown dataset') return estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects) estimator.cuda() refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects) refiner.cuda() if opt.resume_posenet != '': estimator.load_state_dict(torch.load(estimator_path)) if opt.resume_refinenet != '': refiner.load_state_dict(torch.load(refiner_path)) opt.refine_start = True opt.decay_start = True opt.lr *= opt.lr_rate opt.w *= opt.w_rate opt.batch_size = int(opt.batch_size / opt.iteration) optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) else: opt.refine_start = False opt.decay_start = False optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) if opt.dataset == 'ycb': test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) elif opt.dataset == 'linemod': test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) print('complete loading testing loader\n') opt.sym_list = test_dataset.get_sym_list() opt.num_points_mesh = test_dataset.get_num_points_mesh() print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\ length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\ symmetry object list: {2}'\ .format( len(test_dataset), opt.num_points_mesh, opt.sym_list)) #load pytorch model estimator.eval() refiner.eval() criterion = Loss(opt.num_points_mesh, opt.sym_list) criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w') #Pose estimation for j, data in enumerate(testdataloader, 0): # g13: modify this part for evaluation target-------------------- if j == t1_total_eval_num: break #---------------------------------------------------------------- points, choose, img, target, model_points, idx = data if len(points.size()) == 2: print('No.{0} NOT Pass! Lost detection!'.format(j)) fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j)) continue points, choose, img, target, model_points, idx = Variable(points).cuda(), \ Variable(choose).cuda(), \ Variable(img).cuda(), \ Variable(target).cuda(), \ Variable(model_points).cuda(), \ Variable(idx).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) #if opt.refine_start: #iterative poserefinement # for ite in range(0, opt.iteration): # pred_r, pred_t = refiner(new_points, emb, idx) # dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1) pred_c = pred_c.view(bs, opt.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * opt.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, opt.iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # g13: start drawing pose on image------------------------------------ # pick up image print("index {0}: {1}".format(j, test_dataset.list_rgb[j])) img = Image.open(test_dataset.list_rgb[j]) # pick up center position by bbox meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r') meta = {} meta = yaml.load(meta_file) which_item = test_dataset.list_rank[j] bbx = meta[which_item][0]['obj_bb'] draw = ImageDraw.Draw(img) # draw box (ensure this is the right object) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) #get center c_x = bbx[0]+int(bbx[2]/2) c_y = bbx[1]+int(bbx[3]/2) draw.point((c_x,c_y), fill=(255,255,0)) #get the 3D position of center cam_intrinsic = np.zeros((3,3)) cam_intrinsic.itemset(0, test_dataset.cam_fx) cam_intrinsic.itemset(4, test_dataset.cam_fy) cam_intrinsic.itemset(2, test_dataset.cam_cx) cam_intrinsic.itemset(5, test_dataset.cam_cy) cam_intrinsic.itemset(8, 1) cam_extrinsic = my_mat_final[0:3, :] cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic) cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]]) # replace img.show() with plt.imshow(img) #transpose three 3D axis point into 2D x_3d = cen_3d + [[axis_range],[0],[0],[0]] y_3d = cen_3d + [[0],[axis_range],[0],[0]] z_3d = cen_3d + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d, x_3d) y_2d = np.matmul(cam2d_3d, y_3d) z_2d = np.matmul(cam2d_3d, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) #g13: show image #img.show() #save file under file img_file_name = '{0}/pred_obj{1}_pic{2}.png'.format(vimg_dir, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close()
def main(): # g13: parameter setting ------------------- ''' posemodel is trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth refine model is trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth ''' objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15] knn = KNearestNeighbor(1) opt.dataset ='linemod' opt.dataset_root = './datasets/linemod/Linemod_preprocessed' estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth' refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth' opt.model = estimator_path opt.refine_model = refiner_path dataset_config_dir = 'datasets/linemod/dataset_config' output_result_dir = 'experiments/eval_result/linemod' opt.refine_start = True bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader opt.iteration = 2 #default is 4 in eval_linemod.py t1_start = True t1_idx = 0 t1_total_eval_num = 3 t2_start = False t2_target_list = [22, 30, 172, 187, 267, 363, 410, 471, 472, 605, 644, 712, 1046, 1116, 1129, 1135, 1263] #t2_target_list = [0, 1] axis_range = 0.1 # the length of X, Y, and Z axis in 3D vimg_dir = 'verify_img' diameter = [] meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r') meta_d = yaml.load(meta_file) for obj in objlist: diameter.append(meta_d[obj]['diameter'] / 1000.0 * 0.1) print(diameter) if not os.path.exists(vimg_dir): os.makedirs(vimg_dir) #------------------------------------------- if opt.dataset == 'ycb': opt.num_objects = 21 #number of object classes in the dataset opt.num_points = 1000 #number of points on the input pointcloud opt.outf = 'trained_models/ycb' #folder to save trained models opt.log_dir = 'experiments/logs/ycb' #folder to save logs opt.repeat_epoch = 1 #number of repeat times for one epoch training elif opt.dataset == 'linemod': opt.num_objects = 13 opt.num_points = 500 opt.outf = 'trained_models/linemod' opt.log_dir = 'experiments/logs/linemod' opt.repeat_epoch = 20 else: print('Unknown dataset') return estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects) estimator.cuda() refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects) refiner.cuda() estimator.load_state_dict(torch.load(estimator_path)) refiner.load_state_dict(torch.load(refiner_path)) opt.refine_start = True test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) opt.sym_list = test_dataset.get_sym_list() opt.num_points_mesh = test_dataset.get_num_points_mesh() print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\ length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\ symmetry object list: {2}'\ .format( len(test_dataset), opt.num_points_mesh, opt.sym_list)) #load pytorch model estimator.eval() refiner.eval() criterion = Loss(opt.num_points_mesh, opt.sym_list) criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w') #Pose estimation for j, data in enumerate(testdataloader, 0): # g13: modify this part for evaluation target-------------------- if t1_start and j == t1_total_eval_num: break if t2_start and not (j in t2_target_list): continue #---------------------------------------------------------------- points, choose, img, target, model_points, idx = data if len(points.size()) == 2: print('No.{0} NOT Pass! Lost detection!'.format(j)) fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j)) continue points, choose, img, target, model_points, idx = Variable(points).cuda(), \ Variable(choose).cuda(), \ Variable(img).cuda(), \ Variable(target).cuda(), \ Variable(model_points).cuda(), \ Variable(idx).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) #if opt.refine_start: #iterative poserefinement # for ite in range(0, opt.iteration): # pred_r, pred_t = refiner(new_points, emb, idx) # dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1) pred_c = pred_c.view(bs, opt.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * opt.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, opt.iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) #g13: checking the dis value success_count = [0 for i in range(opt.num_objects)] num_count = [0 for i in range(opt.num_objects)] model_points = model_points[0].cpu().detach().numpy() my_r = quaternion_matrix(my_r)[:3, :3] pred = np.dot(model_points, my_r.T) + my_t target = target[0].cpu().detach().numpy() if idx[0].item() in opt.sym_list: pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous() target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous() inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) target = torch.index_select(target, 1, inds.view(-1) - 1) dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item() else: dis = np.mean(np.linalg.norm(pred - target, axis=1)) if dis < diameter[idx[0].item()]: success_count[idx[0].item()] += 1 print('No.{0} Pass! Distance: {1}'.format(j, dis)) fw.write('No.{0} Pass! Distance: {1}\n'.format(j, dis)) else: print('No.{0} NOT Pass! Distance: {1}'.format(j, dis)) fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(j, dis)) num_count[idx[0].item()] += 1 # g13: start drawing pose on image------------------------------------ # pick up image print('{0}:\nmy_r is {1}\nmy_t is {2}\ndis:{3}'.format(j, my_r, my_t, dis.item())) print("index {0}: {1}".format(j, test_dataset.list_rgb[j])) img = Image.open(test_dataset.list_rgb[j]) # pick up center position by bbox meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r') meta = {} meta = yaml.load(meta_file) which_item = test_dataset.list_rank[j] which_obj = test_dataset.list_obj[j] which_dict = 0 dict_leng = len(meta[which_item]) #print('get meta[{0}][{1}][obj_bb]'.format(which_item, which_obj)) k_idx = 0 while 1: if meta[which_item][k_idx]['obj_id'] == which_obj: which_dict = k_idx break k_idx = k_idx+1 bbx = meta[which_item][which_dict]['obj_bb'] draw = ImageDraw.Draw(img) # draw box (ensure this is the right object) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) #get center c_x = bbx[0]+int(bbx[2]/2) c_y = bbx[1]+int(bbx[3]/2) draw.point((c_x,c_y), fill=(255,255,0)) print('center:({0},{1})'.format(c_x, c_y)) #get the 3D position of center cam_intrinsic = np.zeros((3,3)) cam_intrinsic.itemset(0, test_dataset.cam_fx) cam_intrinsic.itemset(4, test_dataset.cam_fy) cam_intrinsic.itemset(2, test_dataset.cam_cx) cam_intrinsic.itemset(5, test_dataset.cam_cy) cam_intrinsic.itemset(8, 1) cam_extrinsic = my_mat_final[0:3, :] cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic) cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]]) # replace img.show() with plt.imshow(img) #transpose three 3D axis point into 2D x_3d = cen_3d + [[axis_range],[0],[0],[0]] y_3d = cen_3d + [[0],[axis_range],[0],[0]] z_3d = cen_3d + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d, x_3d) y_2d = np.matmul(cam2d_3d, y_3d) z_2d = np.matmul(cam2d_3d, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) #g13: draw the estimate pred obj for pti in pred: pti.transpose() pti_2d = np.matmul(cam_intrinsic, pti) #print('({0},{1})\n'.format(int(pti_2d[0]),int(pti_2d[1]))) draw.point([int(pti_2d[0]),int(pti_2d[1])], fill=(255,255,0)) #g13: show image #img.show() #save file under file img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close() # plot ground true ---------------------------- img = Image.open(test_dataset.list_rgb[j]) draw = ImageDraw.Draw(img) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) target_r = np.resize(np.array(meta[which_item][k_idx]['cam_R_m2c']), (3, 3)) target_t = np.array(meta[which_item][k_idx]['cam_t_m2c']) target_t = target_t[np.newaxis, :] cam_extrinsic_GT = np.concatenate((target_r, target_t.T), axis=1) #get center 3D cam2d_3d_GT = np.matmul(cam_intrinsic, cam_extrinsic_GT) cen_3d_GT = np.matmul(np.linalg.pinv(cam2d_3d_GT), [[c_x],[c_y],[1]]) #transpose three 3D axis point into 2D x_3d = cen_3d_GT + [[axis_range],[0],[0],[0]] y_3d = cen_3d_GT + [[0],[axis_range],[0],[0]] z_3d = cen_3d_GT + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d_GT, x_3d) y_2d = np.matmul(cam2d_3d_GT, y_3d) z_2d = np.matmul(cam2d_3d_GT, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) print('pred:\n{0}\nGT:\n{1}\n'.format(cam_extrinsic,cam_extrinsic_GT)) print('pred 3D:{0}\nGT 3D:{1}\n'.format(cen_3d, cen_3d_GT)) img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}_gt.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close() meta_file.close() print('\nplot_result_img.py completed the task\n')
how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) ### print("my_pred w/o refinement: \n", my_pred) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2
# my_mat_2 = quaternion_matrix(my_r_2) # my_mat_2[0:3, 3] = my_t_2 # my_mat_final = np.dot(my_mat, my_mat_2) # my_r_final = copy.deepcopy(my_mat_final) # my_r_final[0:3, 3] = 0 # my_r_final = quaternion_from_matrix(my_r_final, True) # my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) # my_pred = np.append(my_r_final, my_t_final) # my_r = my_r_final # my_t = my_t_final # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) model_points = model_points[0].cpu().detach().numpy() my_r = quaternion_matrix(my_r)[:3, :3] pred = np.dot(model_points, my_r.T) + my_t target = target[0].cpu().detach().numpy() if idx[0].item() in sym_list: pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose( 1, 0).contiguous() target = torch.from_numpy(target.astype(np.float32)).cuda().transpose( 1, 0).contiguous() inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) target = torch.index_select(target, 1, inds.view(-1) - 1) dis = torch.mean(torch.norm( (pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item() else: dis = np.mean(np.linalg.norm(pred - target, axis=1))
def callback(self): time1 = time.time() rgb_original = self.rgb self.rgb = np.transpose(self.rgb, (2, 0, 1)) norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.rgb = norm(torch.from_numpy(self.rgb.astype(np.float32))) self.rgb = Variable(self.rgb).cuda() semantic = self.model(self.rgb.unsqueeze(0)) _, pred = torch.max(semantic, dim=1) pred = pred *255 if IMGSAVE: torchvision.utils.save_image(pred, path + '/seg_result/out/' + 'torchpred.png') pred = np.transpose(pred.cpu().numpy(), (1, 2, 0)) # (CxHxW)->(HxWxC) if IMGSAVE: cv2.imwrite(path + '/seg_result/out/' + 'numpypred.png', pred) _, contours, _ = cv2.findContours(np.uint8(pred),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = max(contours, key=cv2.contourArea) x,y,w,h = cv2.boundingRect(cnt) rmin, rmax, cmin, cmax = get_bbox([x,y,w,h ]) print(get_bbox([x,y,w,h ])) if IMGSAVE: img_bbox = np.array(rgb_original.copy()) cv2.rectangle(img_bbox, (cmin, rmin), (cmax, rmax), (255, 0, 0), 2) cv2.imwrite(path + '/seg_result/out/' + 'bbox.png', img_bbox) mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth,0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) # print(mask_depth.shape, mask_label.shape) mask = mask_depth * mask_label.reshape(480, 640) img = np.transpose(rgb_original, (2, 0, 1)) img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] #print("length of choose is :{0}".format(len(choose))) if len(choose) == 0: cc = torch.LongTensor([0]) return(cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 # if number of object pixels are bigger than 500, we select just 500 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] # now len(choose) = 500 else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = self.depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud /1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img = Variable(img).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r = quaternion_matrix(my_r)[:3, :3] my_t = np.array(my_t) print('estimated rotation is\n:{0}'.format(my_r)) print('estimated translation is\n :{0}'.format(my_t)) ## custom scaling for 3Dbox col = [2,0,1] new_col = np.zeros((len(col), len(col))) for idx, i in enumerate(col): new_col[idx, i] = 1 self.scaled = np.dot(self.scaled, new_col) target = np.dot(self.scaled, my_r.T) target = np.add(target, my_t) p0 = (int((target[0][0]/ target[0][2])*self.cam_fx + self.cam_cx), int((target[0][1]/ target[0][2])*self.cam_fy + self.cam_cy)) p1 = (int((target[1][0]/ target[1][2])*self.cam_fx + self.cam_cx), int((target[1][1]/ target[1][2])*self.cam_fy + self.cam_cy)) p2 = (int((target[2][0]/ target[2][2])*self.cam_fx + self.cam_cx), int((target[2][1]/ target[2][2])*self.cam_fy + self.cam_cy)) p3 = (int((target[3][0]/ target[3][2])*self.cam_fx + self.cam_cx), int((target[3][1]/ target[3][2])*self.cam_fy + self.cam_cy)) p4 = (int((target[4][0]/ target[4][2])*self.cam_fx + self.cam_cx), int((target[4][1]/ target[4][2])*self.cam_fy + self.cam_cy)) p5 = (int((target[5][0]/ target[5][2])*self.cam_fx + self.cam_cx), int((target[5][1]/ target[5][2])*self.cam_fy + self.cam_cy)) p6 = (int((target[6][0]/ target[6][2])*self.cam_fx + self.cam_cx), int((target[6][1]/ target[6][2])*self.cam_fy + self.cam_cy)) p7 = (int((target[7][0]/ target[7][2])*self.cam_fx + self.cam_cx), int((target[7][1]/ target[7][2])*self.cam_fy + self.cam_cy)) cv2.line(rgb_original, p0,p1,(0,0,255), 2) cv2.line(rgb_original, p0,p3,(0,0,255), 2) cv2.line(rgb_original, p0,p4,(0,0,255), 2) cv2.line(rgb_original, p1,p2,(0,0,255), 2) cv2.line(rgb_original, p1,p5,(0,0,255), 2) cv2.line(rgb_original, p2,p3,(0,0,255), 2) cv2.line(rgb_original, p2,p6,(0,0,255), 2) cv2.line(rgb_original, p3,p7,(0,0,255), 2) cv2.line(rgb_original, p4,p5,(0,0,255), 2) cv2.line(rgb_original, p4,p7,(0,0,255), 2) cv2.line(rgb_original, p5,p6,(0,0,255), 2) cv2.line(rgb_original, p6,p7,(0,0,255), 2) """ Do not support live-view like cv.imshow """ plt.figure(figsize = (10,10)) plt.imshow(rgb_original, cmap = 'gray', interpolation = 'nearest', aspect='auto') plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis plt.show() """ need python3.x """ ## https://stackoverflow.com/questions/14655969/opencv-error-the-function-is-not-implemented # cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB)) # OpenCV uses BGR model # # cv2.waitKey(1) # key = cv2.waitKey(1) & 0xFF # if key == 27: # print("stopping streaming...") # break time2 = time.time() print('inference time is :{0}'.format(time2-time1))
def forward(self, img, x, choose, obj, focal_length, principal_point, motion, is_train): if img.shape[1] == 0 and is_train: if self.last_R_total[int(obj)] is None: return None, None, None, None last_pose = torch.cat([ torch.cat([ self.last_R_total[int(obj)], (self.last_t_total[int(obj)] + self.last_x_total[int(obj)]).reshape(1000, 3, 1) ], dim=2), torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32).repeat( 1000, 1, 1).cuda() ], dim=1) init_pose = motion[0].cuda().repeat(1000, 1, 1) transformed_pose = torch.bmm(init_pose, last_pose) self.last_R_total[int(obj)] = transformed_pose[:, 0:3, 0:3] self.last_t_total[int(obj)] = transformed_pose[:, 0:3, 3].reshape( 1, 1000, 3) - self.last_x_total[int(obj)] out_rx = self.last_R_total[int(obj)] out_tx = self.last_t_total[int(obj)] out_cx = self.last_c_total[int(obj)] out_x = self.last_x_total[int(obj)] return out_rx, out_tx, out_cx, out_x out_img = self.cnn(img) bs, di, _, _ = out_img.size() choose_label = choose.repeat(1, di, 1) label_img = out_img.view(bs, di, -1) x_label = torch.gather(label_img, 2, choose_label).transpose(1, 2) x_six = torch.cat([x, x_label], 2) pred_r_from_last = torch.as_tensor([1., 0, 0, 0]).view(1, 4, 1).cuda() pred_t_from_last = torch.as_tensor([1., 0, 0]).view(1, 3, 1).cuda() if self.last_R[int(obj)] is not None: init_pose = motion[0].cuda() last_R_matrix = quaternion_matrix(self.last_R[int(obj)])[0:3, 0:3].T last_pose = torch.cat([ torch.cat([ torch.as_tensor(last_R_matrix, dtype=torch.float32), self.last_t[int(obj)].view(3, 1) ], dim=1), torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32) ], dim=0) x_six, pred_r_from_last, pred_t_from_last = merge_pc( x_six, last_pose, init_pose) temporal_x = self.temporal_feat(x_six.transpose(1, 2), pred_r_from_last, pred_t_from_last) rx = F.relu(self.conv1_r(temporal_x)) tx = F.relu(self.conv1_t(temporal_x)) cx = F.relu(self.conv1_c(temporal_x)) rx = F.relu(self.conv2_r(rx)) tx = F.relu(self.conv2_t(tx)) cx = F.relu(self.conv2_c(cx)) rx = F.relu(self.conv3_r(rx)) tx = F.relu(self.conv3_t(tx)) cx = F.relu(self.conv3_c(cx)) rx = self.conv4_r(rx).view(bs, self.num_obj, 4, -1) tx = self.conv4_t(tx).view(bs, self.num_obj, 3, -1) cx = torch.sigmoid(self.conv4_c(cx)).view(bs, self.num_obj, 1, -1) b = 0 out_rx = torch.index_select(rx[b], 0, obj[b]) out_tx = torch.index_select(tx[b], 0, obj[b]) out_cx = torch.index_select(cx[b], 0, obj[b]) out_rx = out_rx.contiguous().transpose(2, 1).contiguous() out_cx = out_cx.contiguous().transpose(2, 1).contiguous() out_tx = out_tx.contiguous().transpose(2, 1).contiguous() pred_r, pred_t = getMaxRt(out_rx, out_cx, out_tx, x_six) self.last_R[int(obj)] = pred_r self.last_t[int(obj)] = pred_t out_rx = self.Q2R(out_rx) out_x = x_six[:, :, 0:3] # save current state if self.last_R_total[int(obj)] is not None: last_pose = torch.cat([ torch.cat([ self.last_R_total[int(obj)], (self.last_t_total[int(obj)] + self.last_x_total[int(obj)]).reshape(1000, 3, 1) ], dim=2), torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32).repeat( 1000, 1, 1).cuda() ], dim=1) init_pose = motion[0].cuda().repeat(1000, 1, 1) transformed_pose = torch.bmm(init_pose, last_pose) self.last_R_total[int(obj)] = transformed_pose[:, 0:3, 0:3] self.last_t_total[int(obj)] = transformed_pose[:, 0:3, 3].reshape( 1, 1000, 3) - self.last_x_total[int(obj)] lrt = torch.cat([self.last_R_total[int(obj)], out_rx], dim=0) ltt = torch.cat([self.last_t_total[int(obj)], out_tx], dim=1) lct = torch.cat([self.last_c_total[int(obj)], out_cx], dim=1) lxt = torch.cat([self.last_x_total[int(obj)], x_six[:, :, 0:3]], dim=1) out_rx = torch.cat([self.last_R_total[int(obj)], out_rx], dim=0) out_tx = torch.cat([self.last_t_total[int(obj)], out_tx], dim=1) out_cx = torch.cat([self.last_c_total[int(obj)], out_cx], dim=1) out_x = torch.cat([self.last_x_total[int(obj)], x_six[:, :, 0:3]], dim=1) mask = np.zeros(lrt.shape[0], dtype=int) mask[:1000] = 1 np.random.shuffle(mask) lrt = lrt[mask.nonzero(), :, :][0] ltt = ltt[0, mask.nonzero(), :] lct = lct[0, mask.nonzero(), :] lxt = lxt[0, mask.nonzero(), :] self.last_R_total[int(obj)] = lrt self.last_t_total[int(obj)] = ltt self.last_c_total[int(obj)] = lct self.last_x_total[int(obj)] = lxt else: self.last_R_total[int(obj)] = out_rx self.last_t_total[int(obj)] = out_tx self.last_c_total[int(obj)] = out_cx self.last_x_total[int(obj)] = x_six[:, :, 0:3] return out_rx, out_tx, out_cx, out_x
# this function modifies the parameters you pass in so, avoid # getting our data changed out from under us, by forcing copies (a # = b, wasn't sufficient, but a = float(b) forced a copy. tmp_yaw = float(yaw_rad) tmp_pitch = float(pitch_rad) tmp_roll = float(roll_rad) ned2body = transformations.quaternion_from_euler(tmp_yaw, tmp_pitch, tmp_roll, 'rzyx') body2ned = transformations.quaternion_inverse(ned2body) #print 'ned2body(q):', ned2body ned2cam_q = transformations.quaternion_multiply(ned2body, body2cam) ned2cam = np.matrix(transformations.quaternion_matrix(np.array(ned2cam_q))[:3,:3]).T #print 'ned2cam:', ned2cam R = ned2proj.dot( ned2cam ) rvec, jac = cv2.Rodrigues(R) ned = navpy.lla2ned( lat_deg, lon_deg, filt_alt, ref[0], ref[1], ref[2] ) if args.correction: ned[0] += correction.north_interp(time) ned[1] += correction.east_interp(time) ned[2] += correction.down_interp(time) #print 'ned:', ned tvec = -np.matrix(R) * np.matrix(ned).T R, jac = cv2.Rodrigues(rvec) # is this R the same as the earlier R? PROJ = np.concatenate((R, tvec), axis=1) #print 'PROJ:', PROJ
pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) my_result_wo_refine.append(my_pred.tolist()) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final)
# [0.03678917 0.85402822 0.51892423 0.] # [0. 0. 0. 1.]] # x=np.dot(my_rotation_matrix_from_euler,np.array([[1,0,0,1]]).transpose()).flatten() # y=np.dot(my_rotation_matrix_from_euler,np.array([[0,1,0,1]]).transpose()).flatten() # z=np.dot(my_rotation_matrix_from_euler,np.array([[0,0,1,1]]).transpose()).flatten() # newvs=[] # grasp_vector=[math.cos(my_euler_yaw),math.sin(my_euler_yaw),0] # # mx=my_rotation_matrix_from_euler[0,0:3] # my=my_rotation_matrix_from_euler[1,0:3] # mz=my_rotation_matrix_from_euler[2,0:3] # newvs.append([0,0,0,mx[0],mx[1],mx[2]]) # newvs.append([0,0,0,my[0],my[1],my[2]]) # newvs.append([0,0,0,mz[0],mz[1],mz[2]]) # vector_ploter.addvs(newvs) if __name__ == "__main__": posecnn = mask_cnn_segmentor() dfyw = DenseFusion_YW(posecnn=posecnn) #Notice: Change this to zed test_one_img, test_one_depth = dfyw.posecnn.get_an_test_img_and_depth() seg_res = posecnn.get_eval_res_by_name(TF.to_tensor(test_one_img), "banana") pred_r, pred_t = dfyw.DenseFusion(img=test_one_img, depth=test_one_depth, posecnn_res=seg_res) pred_r_matrix = quaternion_matrix(pred_r) print(pred_r, pred_t)
def main(): cfg = setup_config() pipeline = rs.pipeline() realsense_cfg = setup_realsense() pipeline.start(realsense_cfg) # Start streaming visualizer = predictor.VisualizationDemo(cfg) ref_frame_axies = [] ref_frame_label = [] min_distance = 0.9 label_cnt = 0 frameth = 0 my_t_pool = {} my_r_pool = {} while True: frameth += 1 cur_frame_axies = [] cur_frame_label = [] my_t_per_frame = [] my_r_per_frame = [] align = rs.align(rs.stream.color) frames = pipeline.wait_for_frames() aligned_frames = align.process(frames) rgb = aligned_frames.get_color_frame() rgb = np.asanyarray(rgb.get_data()) frame = rgb.copy() # Do instance segmentation start = time.time() segmentation, vis = visualizer.run_on_image(frame) #print("Time = " + str(time.time()-start)) cv2.imshow('Mask', vis) cv2.waitKey(1) # Get segmentation mask ori_label = segmentation['instances'].pred_masks.cpu().numpy() label = np.sum(ori_label, axis=0).astype(np.uint8) label = np.where(label != 0, 255, label) label = Image.fromarray(label).convert("L") label = np.asarray(label.convert('RGB')).astype(np.uint8) bboxes = segmentation['instances'].pred_boxes.tensor.cpu().numpy() xyxy_bboxes = bboxes bboxes = bbox_convert(bboxes) if len(bboxes) > 0: #depth_frames = frames.get_depth_frame() depth_frames = aligned_frames.get_depth_frame() video_profile = depth_frames.profile.as_video_stream_profile() intr = video_profile.get_intrinsics() depth = np.asanyarray(depth_frames.get_data()) #centers = segmentation['instances'].pred_boxes.get_centers() if len(my_t_pool) > 0: last_key = list(my_t_pool.keys())[-1] for i in range(0, len(bboxes)): bbox_xyxy = np.array(list(xyxy_bboxes[i])) bbox = list(bboxes[i]) print("Bounding Box:" + str(bbox)) #center = bboxes[i].get_centers() #center = centers[i].cpu().numpy() num_idx = float('nan') max_value = 0 label_of_object = ori_label[i].astype(np.uint8) label_of_object = np.where(label_of_object != 0, 255, label_of_object) label_of_object = Image.fromarray(label_of_object).convert("L") label_of_object = np.asarray( label_of_object.convert('RGB')).astype(np.uint8) if len(ref_frame_label) > 0: iou_list = [] b = bbox_xyxy a = np.array(ref_frame_axies) for k in range(len(ref_frame_axies)): iou = iou_score(a[k], b) iou_list.append(iou) iou_list = np.array(iou_list) max_value = iou_list.max() if (max_value > min_distance): min_idx = np.where(iou_list == max_value)[0][0] num_idx = ref_frame_label[min_idx] if (math.isnan(num_idx)): num_idx = label_cnt label_cnt += 1 cur_frame_label.append(num_idx) cur_frame_axies.append(bbox_xyxy) print(max_value) if (frameth == 1) or (max_value < 0.9) or ( i > len(my_t_pool[last_key]) - 1) or (frameth % 20 == 0): pos_text = (bbox[0], bbox[1]) class_id = segmentation['instances'].pred_classes[i].cpu( ).data.numpy() print("Class: " + str(class_id)) #idx = class_id if class_id == 0: idx = 0 if class_id == 2: idx = 1 model_points = model_points_list[idx] mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) #mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255))) mask_label = ma.getmaskarray( ma.masked_equal(label_of_object, np.array([255, 255, 255])))[:, :, 0] mask = mask_label * mask_depth rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox) # choose choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) == 0: choose = torch.LongTensor([0]) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) choose = np.array([choose]) # point cloud pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000.0 # print(cloud.shape) # cropped img #img_masked = rgb[:, :, :3] img_masked = rgb[:, :, ::-1] # bgr to rgb img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] my_mask = np.transpose(label_of_object, (2, 0, 1)) my_mask = my_mask[:, rmin:rmax, cmin: cmax] ## Added by me to crop the mask mask_img = np.transpose(my_mask, (1, 2, 0)) img_rgb = np.transpose(img_masked, (1, 2, 0)) croped_img_mask = cv2.bitwise_and(img_rgb, mask_img) crop_image_to_check = croped_img_mask.copy() cv2.imshow("mask_crop", croped_img_mask) croped_img_mask = np.transpose(croped_img_mask, (2, 0, 1)) # Variables cloud = torch.from_numpy(cloud.astype( np.float32)).unsqueeze(0) choose = torch.LongTensor(choose.astype( np.int32)).unsqueeze(0) #img_masked = torch.from_numpy(img_masked.astype(np.float32)).unsqueeze(0) img_masked = torch.from_numpy( croped_img_mask.astype(np.float32)).unsqueeze(0) index = torch.LongTensor([idx]).unsqueeze( 0) # Specify which object cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() # Deploy with torch.no_grad(): pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # Refinement for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype( np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view( 1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r_matrix = quaternion_matrix(my_r)[:3, :3] #print("Time = " + str(time.time()-start)) my_t_per_frame.append(my_t) my_r_per_frame.append(my_r_matrix) #rotation = Rot.from_matrix(my_r_matrix) #angle = rotation.as_euler('xyz', degrees=True) my_t = np.around(my_t, 5) #print("translation vector = " + str(my_t)) #print("rotation angles = " + str(my_r)) frame = posenet_deploy.get_3d_bbox(frame, model_points, my_r_matrix, my_t) frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t) if check_inverted(crop_image_to_check): cv2.putText(frame, str(num_idx) + "_inverted", pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) else: cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0,255,0), 2, cv2.LINE_AA) posenet_deploy.putText(frame, i, num_idx, class_id, my_t) #cv2.imshow('Result', rgb) #cv2.waitKey(1) else: rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox) img_masked = rgb[:, :, ::-1] # bgr to rgb img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] my_mask = np.transpose(label_of_object, (2, 0, 1)) my_mask = my_mask[:, rmin:rmax, cmin: cmax] ## Added by me to crop the mask mask_img = np.transpose(my_mask, (1, 2, 0)) img_rgb = np.transpose(img_masked, (1, 2, 0)) croped_img_mask = cv2.bitwise_and(img_rgb, mask_img) crop_image_to_check = croped_img_mask.copy() pos_text = (bbox[0], bbox[1]) last_key = list(my_t_pool.keys())[-1] print("POOL: " + str(my_t_pool[last_key])) class_id = segmentation['instances'].pred_classes[i].cpu( ).data.numpy() my_t = my_t_pool[last_key][min_idx] my_r_matrix = my_r_pool[last_key][min_idx] my_t_per_frame.append(my_t) my_r_per_frame.append(my_r_matrix) frame = posenet_deploy.get_3d_bbox(frame, model_points, my_r_matrix, my_t) frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t) if check_inverted(crop_image_to_check): cv2.putText(frame, str(num_idx) + "_inverted", pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) else: cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0,255,0), 2, cv2.LINE_AA) posenet_deploy.putText(frame, i, num_idx, class_id, my_t) if len(my_t_per_frame) > 0: my_t_pool[frameth] = my_t_per_frame my_r_pool[frameth] = my_r_per_frame ref_frame_label = cur_frame_label ref_frame_axies = cur_frame_axies end = time.time() - start cv2.putText(frame, "Time processing: " + str(round(end, 3)) + " seconds", (100, 700), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.imshow('Result', frame) cv2.waitKey(1) else: # Show images #video_writer.write(rgb) cv2.imshow('Result', rgb) cv2.waitKey(1) pipeline.stop()
def DenseFusion(self, img, depth, posecnn_res): my_result_wo_refine = [] itemid = 1 # this is simplified for single label decttion, if multi-label used, check DFYW3.py for more depth = np.array(depth) # img = img seg_res = posecnn_res x1, y1, x2, y2 = seg_res["box"] banana_bbox_draw = self.posecnn.get_box_rcwh(seg_res["box"]) rmin, rmax, cmin, cmax = int(x1), int(x2), int(y1), int(y2) depth = depth[:, :, 1] # because depth has 3 dimensions RGB but they are the all the same with each other mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) # ok label_banana = np.squeeze(seg_res["mask"]) label_banana = ma.getmaskarray(ma.masked_greater(label_banana, 0.5)) label_banana_nonzeros = label_banana.flatten().nonzero() mask_label = ma.getmaskarray(ma.masked_equal( label_banana, itemid)) # label from banana label mask = mask_label * mask_depth mask_nonzeros = mask[:].flatten().nonzero() choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > self.num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:self.num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: print("len of choose is 0, check error") choose = np.pad(choose, (0, self.num_points - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / self.cam_scale pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_np = np.array(img) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = self.norm(torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, self.num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = self.estimator(img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, self.num_points, 1) pred_c = pred_c.view(self.bs, self.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(self.bs * self.num_points, 1, 3) points = cloud.view(self.bs * self.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) my_result_wo_refine.append(my_pred.tolist()) my_result = [] for ite in range(0, self.iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( self.num_points, 1).contiguous().view(1, self.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = self.refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_result.append(my_pred.tolist()) my_result_np = np.array(my_result) my_result_mean = np.mean(my_result, axis=0) my_r = my_result_mean[:4] my_t = my_result_mean[4:] my_r_quaternion = my_r return my_r_quaternion, my_t