def sample_rotations_60(): """ icosahedral_group: 60 rotations """ phi = (1 + math.sqrt(5)) / 2 R1 = np.array([[-phi/2, 1/(2*phi), -0.5], [-1/(2*phi), 0.5, phi/2], [0.5, phi/2, -1/(2*phi)]]) R2 = np.array([[phi/2, 1/(2*phi), -0.5], [1/(2*phi), 0.5, phi/2], [0.5, -phi/2, 1/(2*phi)]]) group = [np.eye(3, dtype=float)] n = 0 while len(group) > n: n = len(group) set_so_far = group for rot in set_so_far: for R in [R1, R2]: new_R = np.matmul(rot, R) new = True for item in set_so_far: if np.sum(np.absolute(item - new_R)) < 1e-6: new = False break if new: group.append(new_R) break if new: break # return np.array(group) group = np.array(group) quaternion_group = np.zeros((60, 4)) for i in range(60): quaternion_group[i] = quaternion_from_matrix(group[i]) return quaternion_group.astype(float)
def evaluate_R_t(R_gt, t_gt, R_est, t_est, q_gt=None): t = t_est.flatten() t_gt = t_gt.flatten() eps = 1e-15 if q_gt is None: q_gt = quaternion_from_matrix(R_gt) q = quaternion_from_matrix(R_est) q = q / (np.linalg.norm(q) + eps) q_gt = q_gt / (np.linalg.norm(q_gt) + eps) loss_q = np.maximum(eps, (1.0 - np.sum(q * q_gt)**2)) err_q = np.arccos(1 - 2 * loss_q) # absolute distance error on t err_t = np.linalg.norm(t_gt - t) if np.sum(np.isnan(err_q)) or np.sum(np.isnan(err_t)): # This should never happen! Debug here err_q = np.pi err_t = np.inf return err_q, err_t
def Csv_6D_pose(rgb_img, depth_img): iteration = 4 bs = 1 # knn = KNearestNeighbor(1) points, choose, img = testdataset.getitem_by_array(rgb_img, depth_img) if choose.ndim < 3: return [] # print("choose.ndim =", choose.ndim) obj_id = torch.LongTensor([0]).unsqueeze(0) points, choose, img, obj_id = Variable(points).cuda(), Variable(choose).cuda(), Variable(img).cuda(), Variable(obj_id).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, obj_id) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, obj_id) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final print("final prediction: quaternion + translation") my_pred = np.asarray(my_pred, dtype='float') print(list(my_pred)) return list(my_pred)
def refinePose(self, emb, cloud, object_label, init_t, init_r, iterations=2): init_t = init_t.cpu().data.numpy() init_r = init_r.cpu().data.numpy() for ite in range(0, iteration): T = Variable(torch.from_numpy( init_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, self.num_points, 3) init_mat = quaternion_matrix(init_r) R = Variable(torch.from_numpy(init_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) init_mat[0:3, 3] = init_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = self.refiner(new_cloud, emb, object_label) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) delta_r = pred_r.view(-1).cpu().data.numpy() delta_t = pred_t.view(-1).cpu().data.numpy() delta_mat = quaternion_matrix(delta_r) delta_mat[0:3, 3] = delta_t refined_mat = np.dot(init_mat, delta_mat) refined_r = copy.deepcopy(refined_mat) refined_r[0:3, 3] = 0 refined_r = quaternion_from_matrix(refined_r, True) refined_t = np.array( [refined_mat[0][3], refined_mat[1][3], refined_mat[2][3]]) init_r = r_final init_t = t_final return refined_t, refined_t
def refine_posenet(self, refine_args): iteration = refine_args.iteration my_t, my_r = refine_args.t, refine_args.r num_points = refine_args.num_points cloud = refine_args.cloud refiner = refine_args.refiner_network emb = refine_args.emb index = refine_args.index for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() time_refiner = time.time() pred_r, pred_t = refiner(new_cloud, emb, index) print("--- RE %s seconds ---" % (time.time() - time_refiner)) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final
def sample_rotations_24(): """ octahedral_group: 24 rotations """ group = np.array([[[1, 0, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 0], [0, -1, 0], [0, 0, -1]], [[-1, 0, 0], [0, 1, 0], [0, 0, -1]], [[-1, 0, 0], [0, -1, 0], [0, 0, 1]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[1, 0, 0], [0, 0, -1], [0, -1, 0]], [[-1, 0, 0], [0, 0, 1], [0, -1, 0]], [[-1, 0, 0], [0, 0, -1], [0, 1, 0]], [[0, 1, 0], [1, 0, 0], [0, 0, 1]], [[0, 1, 0], [-1, 0, 0], [0, 0, -1]], [[0, -1, 0], [1, 0, 0], [0, 0, -1]], [[0, -1, 0], [-1, 0, 0], [0, 0, 1]], [[0, 1, 0], [0, 0, 1], [1, 0, 0]], [[0, 1, 0], [0, 0, -1], [-1, 0, 0]], [[0, -1, 0], [0, 0, 1], [-1, 0, 0]], [[0, -1, 0], [0, 0, -1], [1, 0, 0]], [[0, 0, 1], [1, 0, 0], [0, 1, 0]], [[0, 0, 1], [-1, 0, 0], [0, -1, 0]], [[0, 0, -1], [1, 0, 0], [0, -1, 0]], [[0, 0, -1], [-1, 0, 0], [0, 1, 0]], [[0, 0, 1], [0, 1, 0], [1, 0, 0]], [[0, 0, 1], [0, -1, 0], [-1, 0, 0]], [[0, 0, -1], [0, 1, 0], [-1, 0, 0]], [[0, 0, -1], [0, -1, 0], [1, 0, 0]]]) # return group.astype(float) quaternion_group = np.zeros((24, 4)) for i in range(24): quaternion_group[i] = quaternion_from_matrix(group[i]) return quaternion_group.astype(float)
def sample_rotations_12(): """ tetrahedral_group: 12 rotations """ group = np.array([[[1, 0, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 0], [0, -1, 0], [0, 0, -1]], [[-1, 0, 0], [0, 1, 0], [0, 0, -1]], [[-1, 0, 0], [0, -1, 0], [0, 0, 1]], [[0, 1, 0], [0, 0, 1], [1, 0, 0]], [[0, 1, 0], [0, 0, -1], [-1, 0, 0]], [[0, -1, 0], [0, 0, 1], [-1, 0, 0]], [[0, -1, 0], [0, 0, -1], [1, 0, 0]], [[0, 0, 1], [1, 0, 0], [0, 1, 0]], [[0, 0, 1], [-1, 0, 0], [0, -1, 0]], [[0, 0, -1], [1, 0, 0], [0, -1, 0]], [[0, 0, -1], [-1, 0, 0], [0, 1, 0]]]) # return group.astype(float) quaternion_group = np.zeros((12, 4)) for i in range(12): quaternion_group[i] = quaternion_from_matrix(group[i]) return quaternion_group.astype(float)
def upload_file(): global refiner if flask.request.method == 'POST': file1 = flask.request.files['file1'] file2 = flask.request.files['file2'] if file1 and allowedFile(file1.filename) and file2 and allowedFile( file2.filename): # Gets filenames, paths, and saves them fname1 = wz.secure_filename(file1.filename) fpath1 = os.path.join(app.config['UPLOAD_FOLDER'], fname1) fname2 = wz.secure_filename(file2.filename) fpath2 = os.path.join(app.config['UPLOAD_FOLDER'], fname2) # print(fname1, fname2) file1.save(fpath1) file2.save(fpath2) # Gets labels, bbox, and masks retUrl = upload(FULLDOMAIN, fpath1) objDict = downloadZip(retUrl, UPLOAD_FOLDER) # DEBUG 1 # print('objDict: \n', objDict) # retUrl = FULLDOMAIN + UPLOAD_FOLDER_REL + 'tmp.zip' # return retUrl # DEBUG 2 # retCsv = createCSV(objDict) # retStr = str() # with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of: # for line in retCsv: # retStr += line + '\n' # of.write(line + '\n') # return retStr # Starts shit bbList, maskList, scoreList, labelList = getLists(objDict) img = Image.open(fpath1) depth = np.array(Image.open(fpath2)) print('depth:\n', depth[:10, :10]) print('max depth:', depth.max()) my_result_wo_refine = [] my_result = [] itemid = 1 # Original Network # posecnn_meta = scio.loadmat('mycode/samples/input/000000.mat') # label = np.array(posecnn_meta['labels']) # posecnn_rois = np.array(posecnn_meta['rois']) # lst = posecnn_rois[:, 1:2].flatten() # for idx in range(len(lst)): # itemid = lst[idx] # # try: # # cmin, rmin, cmax, rmax = int(posecnn_rois[idx][2]), int(posecnn_rois[idx][3]), int(posecnn_rois[idx][4]), int(posecnn_rois[idx][5]) # rmin, rmax, cmin, cmax = get_bbox(posecnn_rois, idx) # print(cmin, rmin, cmax, rmax) # mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) # mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) # mask = mask_label * mask_depth for bb, mask, score, label in zip(bbList, maskList, scoreList, labelList): # cmin, rmin, cmax, rmax = bb # print(cmin, rmin, cmax, rmax) rmin, rmax, cmin, cmax = get_bbox(bb, None) # print(cmin, rmin, cmax, rmax) mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(mask, 1)) mask = mask_label * mask_depth choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] # print(mask.shape) # print(len(choose)) # for i in range(rmin, rmax): # for j in range(cmin, cmax): # val = mask[i,j] # print(val, end=' ') # print() # print(mask[rmin:rmax, cmin:cmax]) if len(choose) >= num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: # print(choose) choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = norm( torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() # print('DEBUG') cloud = cloud.view(1, num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) my_result_wo_refine.append(my_pred.tolist()) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0][3] = my_t[0] my_mat[1][3] = my_t[1] my_mat[2][3] = my_t[2] new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0][3] = my_t_2[0] my_mat_2[1][3] = my_t_2[1] my_mat_2[2][3] = my_t_2[2] my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0][3] = 0 my_r_final[1][3] = 0 my_r_final[2][3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_result.append(my_pred.tolist()) itemid += 1 # except ZeroDivisionError: # # print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now)) # print('divide by zero error') # # my_result_wo_refine.append([0.0 for i in range(7)]) # my_result.append([0.0 for i in range(7)]) # DEBUG # print(my_result) # Creates return csv retCsv = createCSV(objDict, my_result) retStr = str() with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of: for line in retCsv: retStr += line + '\n' of.write(line + '\n') # retStr = str() # with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of: # for line in my_result: # lineStr = ','.join([str(l) for l in line]) # retStr += ','.join([str(l) for l in line]) + '\n' # of.write(lineStr + '\n') return retStr
print( 'scales, translation for part 0 and part {} is {}, {}'.format( 1, scales, translation)) print( 'ransac with with coordinate descent takes {} seconds for part 0, {}' .format(tend - tstart, 1)) aligned_RT = compose_rt(rotation[0], translation[0]) rt_dict['pred_it'][0] = aligned_RT scale_dict['pred_it'][0] = scales aligned_RT = compose_rt(rotation[1], translation[1]) rt_dict['pred_it'][1] = aligned_RT scale_dict['pred_it'][1] = scales # final evaluation per part for j in range(num_parts - 1): q_pred = quaternion_from_matrix(rt_dict['pred'][j][:3, :3]) q_pred_it = quaternion_from_matrix(rt_dict['pred_it'][j][:3, :3]) q_gt = quaternion_from_matrix(rt_dict['gt'][j][:3, :3]) q_pred_list = [q_pred, q_pred_it, q_gt] # # how to deal with err rt_pred_list = [rt_dict['pred'][j], rt_dict['pred_it'][j]] methods = ['vanilla SVD', 'coords descent'] for m in range(2): ang_dis = 2 * np.arccos(sum( q_pred_list[m] * q_gt)) * 180 / np.pi xyz_dis = np.linalg.norm(rt_pred_list[m][:3, 3] - rt_dict['gt'][j][:3, 3]) if args.verbose: print( 'Angular distance is : {} for part {} with {}'.format(
def DenseFusion(self, img, depth, posecnn_res): my_result_wo_refine = [] itemid = 1 # this is simplified for single label decttion, if multi-label used, check DFYW3.py for more depth = np.array(depth) # img = img seg_res = posecnn_res x1, y1, x2, y2 = seg_res["box"] banana_bbox_draw = self.posecnn.get_box_rcwh(seg_res["box"]) rmin, rmax, cmin, cmax = int(x1), int(x2), int(y1), int(y2) depth = depth[:, :, 1] # because depth has 3 dimensions RGB but they are the all the same with each other mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) # ok label_banana = np.squeeze(seg_res["mask"]) label_banana = ma.getmaskarray(ma.masked_greater(label_banana, 0.5)) label_banana_nonzeros = label_banana.flatten().nonzero() mask_label = ma.getmaskarray(ma.masked_equal( label_banana, itemid)) # label from banana label mask = mask_label * mask_depth mask_nonzeros = mask[:].flatten().nonzero() choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > self.num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:self.num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: print("len of choose is 0, check error") choose = np.pad(choose, (0, self.num_points - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / self.cam_scale pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_np = np.array(img) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = self.norm(torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, self.num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = self.estimator(img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, self.num_points, 1) pred_c = pred_c.view(self.bs, self.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(self.bs * self.num_points, 1, 3) points = cloud.view(self.bs * self.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) my_result_wo_refine.append(my_pred.tolist()) my_result = [] for ite in range(0, self.iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( self.num_points, 1).contiguous().view(1, self.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = self.refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_result.append(my_pred.tolist()) my_result_np = np.array(my_result) my_result_mean = np.mean(my_result, axis=0) my_r = my_result_mean[:4] my_t = my_result_mean[4:] my_r_quaternion = my_r return my_r_quaternion, my_t
def callback(self): time1 = time.time() rgb_original = self.rgb self.rgb = np.transpose(self.rgb, (2, 0, 1)) norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.rgb = norm(torch.from_numpy(self.rgb.astype(np.float32))) self.rgb = Variable(self.rgb).cuda() semantic = self.model(self.rgb.unsqueeze(0)) _, pred = torch.max(semantic, dim=1) pred = pred *255 if IMGSAVE: torchvision.utils.save_image(pred, path + '/seg_result/out/' + 'torchpred.png') pred = np.transpose(pred.cpu().numpy(), (1, 2, 0)) # (CxHxW)->(HxWxC) if IMGSAVE: cv2.imwrite(path + '/seg_result/out/' + 'numpypred.png', pred) _, contours, _ = cv2.findContours(np.uint8(pred),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = max(contours, key=cv2.contourArea) x,y,w,h = cv2.boundingRect(cnt) rmin, rmax, cmin, cmax = get_bbox([x,y,w,h ]) print(get_bbox([x,y,w,h ])) if IMGSAVE: img_bbox = np.array(rgb_original.copy()) cv2.rectangle(img_bbox, (cmin, rmin), (cmax, rmax), (255, 0, 0), 2) cv2.imwrite(path + '/seg_result/out/' + 'bbox.png', img_bbox) mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth,0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) # print(mask_depth.shape, mask_label.shape) mask = mask_depth * mask_label.reshape(480, 640) img = np.transpose(rgb_original, (2, 0, 1)) img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] #print("length of choose is :{0}".format(len(choose))) if len(choose) == 0: cc = torch.LongTensor([0]) return(cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 # if number of object pixels are bigger than 500, we select just 500 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] # now len(choose) = 500 else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = self.depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud /1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img = Variable(img).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r = quaternion_matrix(my_r)[:3, :3] my_t = np.array(my_t) print('estimated rotation is\n:{0}'.format(my_r)) print('estimated translation is\n :{0}'.format(my_t)) ## custom scaling for 3Dbox col = [2,0,1] new_col = np.zeros((len(col), len(col))) for idx, i in enumerate(col): new_col[idx, i] = 1 self.scaled = np.dot(self.scaled, new_col) target = np.dot(self.scaled, my_r.T) target = np.add(target, my_t) p0 = (int((target[0][0]/ target[0][2])*self.cam_fx + self.cam_cx), int((target[0][1]/ target[0][2])*self.cam_fy + self.cam_cy)) p1 = (int((target[1][0]/ target[1][2])*self.cam_fx + self.cam_cx), int((target[1][1]/ target[1][2])*self.cam_fy + self.cam_cy)) p2 = (int((target[2][0]/ target[2][2])*self.cam_fx + self.cam_cx), int((target[2][1]/ target[2][2])*self.cam_fy + self.cam_cy)) p3 = (int((target[3][0]/ target[3][2])*self.cam_fx + self.cam_cx), int((target[3][1]/ target[3][2])*self.cam_fy + self.cam_cy)) p4 = (int((target[4][0]/ target[4][2])*self.cam_fx + self.cam_cx), int((target[4][1]/ target[4][2])*self.cam_fy + self.cam_cy)) p5 = (int((target[5][0]/ target[5][2])*self.cam_fx + self.cam_cx), int((target[5][1]/ target[5][2])*self.cam_fy + self.cam_cy)) p6 = (int((target[6][0]/ target[6][2])*self.cam_fx + self.cam_cx), int((target[6][1]/ target[6][2])*self.cam_fy + self.cam_cy)) p7 = (int((target[7][0]/ target[7][2])*self.cam_fx + self.cam_cx), int((target[7][1]/ target[7][2])*self.cam_fy + self.cam_cy)) cv2.line(rgb_original, p0,p1,(0,0,255), 2) cv2.line(rgb_original, p0,p3,(0,0,255), 2) cv2.line(rgb_original, p0,p4,(0,0,255), 2) cv2.line(rgb_original, p1,p2,(0,0,255), 2) cv2.line(rgb_original, p1,p5,(0,0,255), 2) cv2.line(rgb_original, p2,p3,(0,0,255), 2) cv2.line(rgb_original, p2,p6,(0,0,255), 2) cv2.line(rgb_original, p3,p7,(0,0,255), 2) cv2.line(rgb_original, p4,p5,(0,0,255), 2) cv2.line(rgb_original, p4,p7,(0,0,255), 2) cv2.line(rgb_original, p5,p6,(0,0,255), 2) cv2.line(rgb_original, p6,p7,(0,0,255), 2) """ Do not support live-view like cv.imshow """ plt.figure(figsize = (10,10)) plt.imshow(rgb_original, cmap = 'gray', interpolation = 'nearest', aspect='auto') plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis plt.show() """ need python3.x """ ## https://stackoverflow.com/questions/14655969/opencv-error-the-function-is-not-implemented # cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB)) # OpenCV uses BGR model # # cv2.waitKey(1) # key = cv2.waitKey(1) & 0xFF # if key == 27: # print("stopping streaming...") # break time2 = time.time() print('inference time is :{0}'.format(time2-time1))
def getRANSACInliersCoords(SourceHom0, TargetHom0, \ SourceHom1, TargetHom1, joints=None, rt_ref=[None, None], rt_pre=[None, None], MaxIterations=100, PassThreshold=[200, 200], StopThreshold=[1, 1], \ viz=False, viz_ransac=False, viz_sample=False, viz_normal=False, verbose=False, \ use_jt_pts=False, use_ext_rot=False, \ eval_rts=False): """ joints: [position, axis, pts] position: [1, 3] axis : 3 pts : [N, 3] """ BestResidual0 = 1e10 BestResidual1 = 1e10 BestInlierRatio0 = 0 BestInlierRatio1 = 0 BestInlierIdx0 = np.arange(SourceHom0.shape[1]) BestInlierIdx1 = np.arange(SourceHom1.shape[1]) # if viz_ransac: # todo # plot3d_pts([[SourceHom0[:3].transpose(), SourceHom1[:3].transpose(), TargetHom0[:3].transpose(), TargetHom1[:3].transpose()]], [['source0', 'source1', 'target0', 'target1']], s=5, title_name=['points to ransac'], color_channel=None, save_fig=False, sub_name='default') position, joint_axis, joint_pts = get_joint_features(joints) assert joint_pts.shape[0] == 4 ang_dis_list = [[], []] inliers_ratio = [[], []] select_index = [0] * 2 for i in range(0, MaxIterations): if i > 5: verbose = False RandIdx0 = np.random.randint(SourceHom0.shape[1], size=5) RandIdx1 = np.random.randint(SourceHom1.shape[1], size=5) scale, Rs, Ts, OutTrans = estimateSimilarityUmeyamaCoords(SourceHom0[:, RandIdx0], TargetHom0[:, RandIdx0],\ SourceHom1[:, RandIdx1], TargetHom1[:, RandIdx1], joint_axis, joint_pts=joint_pts, rt_ref=rt_ref, rt_pre=rt_pre, \ viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, use_jt_pts=use_jt_pts, use_ext_rot=use_ext_rot, verbose=verbose, index=i+1) # evaluate per part pts if eval_rts: # print('evaluating inliers using rts for pair 0') Residual0, InlierRatio0, InlierIdx0 = evaluateModel( OutTrans[0], SourceHom0, TargetHom0, PassThreshold[0]) else: Residual0, InlierRatio0, InlierIdx0 = evaluateModelRotation( Rs[0].T, SourceHom0, TargetHom0, 0.05 * PassThreshold[0], rt_ref=rt_ref[0], viz_normal=viz_normal) # if Residual0 < BestResidual0: # todo # if InlierRatio0 > BestInlierRatio0 and Residual0 < BestResidual0: if eval_rts: # print('evaluating inliers using rts for pair 1') Residual1, InlierRatio1, InlierIdx1 = evaluateModel( OutTrans[1], SourceHom1, TargetHom1, PassThreshold[1]) else: Residual1, InlierRatio1, InlierIdx1 = evaluateModelRotation( Rs[1].T, SourceHom1, TargetHom1, 0.05 * PassThreshold[1], rt_ref=rt_ref[1], viz_normal=viz_normal) if viz_ransac: inliers_ratio[0].append(InlierRatio0) inliers_ratio[1].append(InlierRatio1) for j in range(2): q_gt = quaternion_from_matrix(rt_ref[j][:3, :3]) q_iter = quaternion_from_matrix(Rs[j].T) ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi if ang_dis > 180: ang_dis = 360 - ang_dis ang_dis_list[j].append(ang_dis) if InlierRatio0 > BestInlierRatio0: select_index[0] = i BestResidual0 = Residual0 BestInlierRatio0 = InlierRatio0 BestInlierIdx0 = InlierIdx0 # if Residual1 < BestResidual1: # todo # if InlierRatio1 > BestInlierRatio1 and Residual1 < BestResidual1: if InlierRatio1 > BestInlierRatio1: select_index[1] = i BestResidual1 = Residual1 BestInlierRatio1 = InlierRatio1 BestInlierIdx1 = InlierIdx1 # print('Iteration: ', i, '\n Residual: ', [Residual0, Residual1], 'Inlier ratio: ', [InlierRatio0, InlierRatio1]) if BestResidual0 < StopThreshold[0] and BestResidual1 < StopThreshold[ 1]: break # if viz_ransac: # fig = plt.figure(dpi=200) # for j in range(2): # ax = plt.subplot(1, 2, j+1) # plt.plot(range(len(ang_dis_list[j])), ang_dis_list[j], label='rotation err') # plt.plot(range(len(inliers_ratio[j])), inliers_ratio[j], label='inliers ratio') # plt.plot([select_index[j]], [ang_dis_list[j][select_index[j]]], 'bo') # plt.plot([select_index[0]], [ang_dis_list[j][select_index[0]]], 'ro') # plt.xlabel('Ransac sampling order') # plt.ylabel('value') # ax.text(0.55, 0.80, 'Select {0}th inliers with {1:0.4f} rotation error'.format(select_index[j], ang_dis_list[j][select_index[j]]), transform=ax.transAxes, color='blue', fontsize=6) # plt.grid(True) # plt.legend() # plt.title('part {}'.format(j)) # plt.show() inliers = [ SourceHom0[:, BestInlierIdx0], TargetHom0[:, BestInlierIdx0], BestInlierRatio0, SourceHom1[:, BestInlierIdx1], TargetHom1[:, BestInlierIdx1], BestInlierRatio1 ] return inliers, [ang_dis_list, inliers_ratio, select_index]
def main(): # g13: parameter setting ------------------- batch_id = 1 opt.dataset ='linemod' opt.dataset_root = './datasets/linemod/Linemod_preprocessed' estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth' refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth' opt.resume_posenet = estimator_path opt.resume_posenet = refiner_path dataset_config_dir = 'datasets/linemod/dataset_config' output_result_dir = 'experiments/eval_result/linemod' bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader opt.iteration = 2 #default is 4 in eval_linemod.py t1_idx = 0 t1_total_eval_num = 3 axis_range = 0.1 # the length of X, Y, and Z axis in 3D vimg_dir = 'verify_img' if not os.path.exists(vimg_dir): os.makedirs(vimg_dir) #------------------------------------------- if opt.dataset == 'ycb': opt.num_objects = 21 #number of object classes in the dataset opt.num_points = 1000 #number of points on the input pointcloud opt.outf = 'trained_models/ycb' #folder to save trained models opt.log_dir = 'experiments/logs/ycb' #folder to save logs opt.repeat_epoch = 1 #number of repeat times for one epoch training elif opt.dataset == 'linemod': opt.num_objects = 13 opt.num_points = 500 opt.outf = 'trained_models/linemod' opt.log_dir = 'experiments/logs/linemod' opt.repeat_epoch = 20 else: print('Unknown dataset') return estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects) estimator.cuda() refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects) refiner.cuda() if opt.resume_posenet != '': estimator.load_state_dict(torch.load(estimator_path)) if opt.resume_refinenet != '': refiner.load_state_dict(torch.load(refiner_path)) opt.refine_start = True opt.decay_start = True opt.lr *= opt.lr_rate opt.w *= opt.w_rate opt.batch_size = int(opt.batch_size / opt.iteration) optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) else: opt.refine_start = False opt.decay_start = False optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) if opt.dataset == 'ycb': test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) elif opt.dataset == 'linemod': test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) print('complete loading testing loader\n') opt.sym_list = test_dataset.get_sym_list() opt.num_points_mesh = test_dataset.get_num_points_mesh() print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\ length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\ symmetry object list: {2}'\ .format( len(test_dataset), opt.num_points_mesh, opt.sym_list)) #load pytorch model estimator.eval() refiner.eval() criterion = Loss(opt.num_points_mesh, opt.sym_list) criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w') #Pose estimation for j, data in enumerate(testdataloader, 0): # g13: modify this part for evaluation target-------------------- if j == t1_total_eval_num: break #---------------------------------------------------------------- points, choose, img, target, model_points, idx = data if len(points.size()) == 2: print('No.{0} NOT Pass! Lost detection!'.format(j)) fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j)) continue points, choose, img, target, model_points, idx = Variable(points).cuda(), \ Variable(choose).cuda(), \ Variable(img).cuda(), \ Variable(target).cuda(), \ Variable(model_points).cuda(), \ Variable(idx).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) #if opt.refine_start: #iterative poserefinement # for ite in range(0, opt.iteration): # pred_r, pred_t = refiner(new_points, emb, idx) # dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1) pred_c = pred_c.view(bs, opt.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * opt.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, opt.iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # g13: start drawing pose on image------------------------------------ # pick up image print("index {0}: {1}".format(j, test_dataset.list_rgb[j])) img = Image.open(test_dataset.list_rgb[j]) # pick up center position by bbox meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r') meta = {} meta = yaml.load(meta_file) which_item = test_dataset.list_rank[j] bbx = meta[which_item][0]['obj_bb'] draw = ImageDraw.Draw(img) # draw box (ensure this is the right object) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) #get center c_x = bbx[0]+int(bbx[2]/2) c_y = bbx[1]+int(bbx[3]/2) draw.point((c_x,c_y), fill=(255,255,0)) #get the 3D position of center cam_intrinsic = np.zeros((3,3)) cam_intrinsic.itemset(0, test_dataset.cam_fx) cam_intrinsic.itemset(4, test_dataset.cam_fy) cam_intrinsic.itemset(2, test_dataset.cam_cx) cam_intrinsic.itemset(5, test_dataset.cam_cy) cam_intrinsic.itemset(8, 1) cam_extrinsic = my_mat_final[0:3, :] cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic) cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]]) # replace img.show() with plt.imshow(img) #transpose three 3D axis point into 2D x_3d = cen_3d + [[axis_range],[0],[0],[0]] y_3d = cen_3d + [[0],[axis_range],[0],[0]] z_3d = cen_3d + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d, x_3d) y_2d = np.matmul(cam2d_3d, y_3d) z_2d = np.matmul(cam2d_3d, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) #g13: show image #img.show() #save file under file img_file_name = '{0}/pred_obj{1}_pic{2}.png'.format(vimg_dir, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close()
def estimateSimilarityTransformCoords(source: np.array, target: np.array, source1=None, target1=None, joints=None, rt_ref=[None, None], rt_pre=[None, None],\ viz=False, viz_ransac=False, viz_sample=False, viz_normal=False, use_jt_pts=False, eval_rts=False, use_ext_rot=False, verbose=False, index=0): nIter = 100 # [4, N], [4, N] SourceHom, TargetHom, TargetNorm, SourceNorm, RatioTS, RatioST, PassT, StopT = set_config( source, target, verbose) SourceHom1, TargetHom1, TargetNorm1, SourceNorm1, RatioTS1, RatioST1, PassT1, StopT1 = set_config( source1, target1, verbose) # 1. find inliers inliers, records = getRANSACInliersCoords(SourceHom, TargetHom, SourceHom1, TargetHom1, joints=joints, rt_ref=rt_ref, rt_pre=rt_pre, \ MaxIterations=nIter, PassThreshold=[PassT, PassT1], StopThreshold=[StopT, StopT1], \ viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, viz_normal=viz_normal, use_jt_pts=use_jt_pts, eval_rts=eval_rts, use_ext_rot=use_ext_rot, verbose=verbose) SourceInliersHom, TargetInliersHom, BestInlierRatio0, SourceInliersHom1, TargetInliersHom1, BestInlierRatio1 = inliers ang_dis_list, inliers_ratio, select_index = records if (BestInlierRatio0 < 0.05) or (BestInlierRatio1 < 0.05): print('[ WARN ] - Something is wrong. Small BestInlierRatio: ', [BestInlierRatio0, BestInlierRatio1]) return None, None, None, None # 2. further use inlier points and joints to decide the final pose position, joint_axis, joint_pts = get_joint_features(joints) assert joint_pts.shape[0] == 4 Scale, Rotations, Translations, OutTransforms = estimateSimilarityUmeyamaCoords(SourceInliersHom, TargetInliersHom, SourceInliersHom1, TargetInliersHom1, joint_axis, rt_ref=rt_ref, joint_pts=joint_pts, \ viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, use_jt_pts=use_jt_pts, use_ext_rot=use_ext_rot, verbose=verbose) if verbose: print('BestInlierRatio:', BestInlierRatio0) if viz_ransac: fig = plt.figure(dpi=200) for j in range(2): q_gt = quaternion_from_matrix(rt_ref[j][:3, :3]) q_iter = quaternion_from_matrix(Rotations[j].T) ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi if ang_dis > 180: ang_dis = 360 - ang_dis ax = plt.subplot(1, 2, j + 1) plt.plot(range(len(ang_dis_list[j])), ang_dis_list[j], label='rotation err') plt.plot(range(len(inliers_ratio[j])), inliers_ratio[j], label='inliers ratio') plt.plot([select_index[j]], [ang_dis_list[j][select_index[j]]], 'bo') plt.plot([select_index[0]], [ang_dis_list[j][select_index[0]]], 'ro') plt.plot([select_index[j]], [ang_dis], 'yo', label='final rotation error') plt.xlabel('Ransac sampling order') plt.ylabel('value') ax.text(0.55, 0.80, 'Select {0}th inliers with {1:0.4f} rotation error'.format( select_index[j], ang_dis_list[j][select_index[j]]), transform=ax.transAxes, color='blue', fontsize=6) plt.grid(True) plt.legend() plt.title('part {}'.format(j)) plt.show() save_path = '/home/lxiaol9/Downloads/ARCwork/6DPOSE/results/test_pred/images' fig.savefig('{}/{}_{}.png'.format(save_path, index, 'coord_descent'), pad_inches=0) return Scale, Rotations, Translations, OutTransforms
def pose(self): # get mask and segmentation mask, bbox, viz = self.draw_seg(self.batch_predict()) pred = mask pred = pred * 255 pred = np.transpose(pred, (1, 2, 0)) # (CxHxW)->(HxWxC) # convert img into tensor rgb_original = self.rgb norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.rgb = Variable(norm(torch.from_numpy(self.rgb.astype( np.float32)))).cuda() all_masks = [] mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) for b in range(len(bbox)): mask = mask_depth * mask_label[:, :, b] rmin = int(bbox[b, 0]) rmax = int(bbox[b, 1]) cmin = int(bbox[b, 2]) cmax = int(bbox[b, 3]) img = np.transpose(rgb_original, (0, 1, 2)) #CxHxW img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) == 0: cc = torch.LongTensor([0]) return (cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') # visualize each masks # plt.imshow(mask), plt.show() depth_masked = self.depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) cam_scale = 1.0 pt2 = depth_masked / cam_scale pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_ = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img_ = Variable(img_).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator( img_, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 0) #1 pred_t = pred_t.view(bs * num_points, 1, 3) # print("max confidence", how_max) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot( my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # POSITION # ndds has cm units my_t = np.array(my_t) # my_t = np.array([my_t[0], my_t[1], 1-my_t[2]]) # print('estimated translation is:{0}'.format(my_t)) # ROTATION my_r = quaternion_matrix(my_r)[:3, :3] # my_r = np.dot(my_r, np.array([[1, 0, 0], [0, 0, -1], [0, -1, 0]])) # print('estimated rotation is\n:{0}'.format(my_r)) # Draw estimated pose 3Dbox target = np.dot(self.scaled, my_r.T) #my_r.T target = np.add(target, my_t) self.draw_cube(target, viz) # Norm pose NormPos = np.linalg.norm((my_t), ord=1) print("Pos:{0}".format(my_t)) plt.figure(figsize=(10, 10)), plt.imshow(viz), plt.show() return viz
def __getitem__(self, index): try: img = np.array( cv2.imread('{0}/{1}_color.png'.format( self.root, self.list[index]))) / 255. depth = np.array( cv2.imread( '{0}/{1}_depth.png'.format(self.root, self.list[index]), -1)) if len(depth.shape) == 3: depth = np.uint16(depth[:, :, 1] * 256) + \ np.uint16(depth[:, :, 2]) label = np.array( cv2.imread('{0}/{1}_mask.png'.format(self.root, self.list[index]))[:, :, 2]) meta = dict() with open("{0}/{1}_meta.txt".format(self.root, self.list[index]), "r") as f: for line in f: line = line.replace("\n", "") line = line.split(" ") if int(line[1]) == 0: # mask out background continue d = {"cls_id": line[1], "inst_name": line[2]} if "real_train" in self.list[index]: d["inst_dir"] = os.path.join( self.root, "obj_models", "real_train", line[2] + "_{}.ply".format(self.num_pt)) d["ori_inst_dir"] = os.path.join( self.root, "obj_models", "real_train", line[2] + ".obj") elif "real_test" in self.list[index]: d["inst_dir"] = os.path.join( self.root, "obj_models", "real_test", line[2] + "_{}.ply".format(self.num_pt)) d["ori_inst_dir"] = os.path.join( self.root, "obj_models", "real_test", line[2] + ".obj") else: d["inst_dir"] = os.path.join( self.root, "obj_models", "train", *line[2:], "model_{}.ply".format(self.num_pt)) d["ori_inst_dir"] = os.path.join( self.root, "obj_models", "train", *line[2:], "model.obj") meta[int(line[0])] = d if not self.list[index].startswith("real"): cam_cx = self.cam_cx_2 cam_cy = self.cam_cy_2 cam_fx = self.cam_fx_2 cam_fy = self.cam_fy_2 else: cam_cx = self.cam_cx_1 cam_cy = self.cam_cy_1 cam_fx = self.cam_fx_1 cam_fy = self.cam_fy_1 obj = list(meta.keys()) iidx = np.arange(len(obj)) np.random.shuffle(iidx) for idx in iidx: mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx])) mask = mask_label * mask_depth if len(mask.nonzero()[0]) > self.minimum_num_pt: break else: print("Can't find any valid training object in {}".format( self.list[index])) raise ValueError # A method to load target_r and target_t if os.path.isfile("{}/gts/{}_poses.txt".format( self.root, self.list[index])) and os.path.isfile( "{}/gts/{}_scales.txt".format(self.root, self.list[index])): meta["poses"] = np.loadtxt("{}/gts/{}_poses.txt".format( self.root, self.list[index])).reshape(-1, 4, 4) meta["scales"] = np.loadtxt("{}/gts/{}_scales.txt".format( self.root, self.list[index])).reshape(-1, 3) else: coord = cv2.imread('{0}/{1}_coord.png'.format( self.root, self.list[index]))[:, :, :3][:, :, (2, 1, 0)] coord = np.array(coord, dtype=np.float32) / 255. coord[:, :, 2] = 1.0 - coord[:, :, 2] intr = np.array([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], [0., 0., 1.]]) poses, scales = align(obj, label, coord, depth, intr) os.makedirs(os.path.dirname("{}/gts/{}_poses.txt".format( self.root, self.list[index])), exist_ok=True) np.savetxt( "{}/gts/{}_poses.txt".format(self.root, self.list[index]), poses.reshape(-1, 4)) np.savetxt( "{}/gts/{}_scales.txt".format(self.root, self.list[index]), scales.reshape(-1, 3)) meta["poses"] = poses meta["scales"] = scales rmin, rmax, cmin, cmax = get_bbox(mask_label) img_masked = np.transpose(img, (2, 0, 1))[:, rmin:rmax, cmin:cmax] target_r = meta['poses'][idx][:3, 0:3] target_t = np.array([meta['poses'][idx][:3, 3:4].flatten()]) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > self.num_pt: c_mask = np.zeros(len(choose), dtype=int) c_mask[:self.num_pt] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) cam_scale = 1000.0 pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((-pt0, -pt1, pt2), axis=1) model_points = load_obj(path=meta[obj[idx]]["inst_dir"], ori_path=meta[obj[idx]]["ori_inst_dir"], num_points=self.num_pt) model_points = model_points * meta["scales"][idx] target = np.dot(model_points, target_r.T) target = np.add(target, target_t) matrix = np.eye(4) matrix[:3, :3] = target_r quat = quaternion_from_matrix(matrix) return torch.from_numpy(cloud.astype(np.float32)), \ torch.LongTensor(choose.astype(np.int32)), \ self.norm(torch.from_numpy(img_masked.astype(np.float32))), \ torch.from_numpy(target.astype(np.float32)), \ torch.from_numpy(model_points.astype(np.float32)), \ torch.LongTensor([int(meta[obj[idx]]["cls_id"])-1]), \ torch.from_numpy(quat.astype(np.float32)), \ torch.from_numpy(target_t.astype(np.float32)) except: return self.__getitem__(index // 2)
init_cloud, max_iterations=20000, tolerance=0.000001) t_itr.append(iterations) # pcd_src = o3d.geometry.PointCloud() # pcd_target = o3d.geometry.PointCloud() # pcd_src.points = o3d.utility.Vector3dVector(init_cloud) # pcd_target.points = o3d.utility.Vector3dVector(original_cloud) # t_itr.append(0) # reg_p2p = o3d.pipelines.registration.registration_icp(pcd_target, pcd_src, 0.2, np.eye(4),o3d.pipelines.registration.TransformationEstimationPointToPoint(), o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration = 20000, relative_rmse = 1.0e-10, relative_fitness=1.000000e-10)) # delta_T = reg_p2p.transformation my_mat_final = np.dot(my_mat, delta_T) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final else: for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3)
def pose_predict(img, depth, rois): label_pub = rospy.Publisher('/label', Image, queue_size=10) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) class_list = [ '002_master_chef_can', '003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can', '008_pudding_box', '009_gelatin_box', '010_potted_meat_can', '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser', '024_bowl', '035_power_drill', '036_wood_block', '037_scissors', '040_large_marker', '051_large_clamp', '052_extra_large_clamp', '061_foam_brick' ] try: object_number = len(rois) #lst = posecnn_rois[:,0:1].flatten() #lst = np.unique(label) my_result_wo_refine = [] my_result = [] for idx in range(object_number): #itemid = lst[idx] itemid = class_list.index(rois[idx].Class) + 1 #itemid = class_list.index(rois[idx].Class) +3 print(object_number, itemid, rois[idx]) try: label, pub_label = seg_predict(img) pub_label = pub_label * 50 label_pub.publish(bridge.cv2_to_imgmsg(pub_label, '8UC1')) ####################### with Detection algorithm ################################# # rmin, rmax, cmin,cmax = get_bbox(rois,idx) ##################################################################################### mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) mask = mask_label * mask_depth rmin, rmax, cmin, cmax = get_bbox(mask_label) choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) img_masked = np.array(img)[:, :, :3] img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] cloud = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img_masked = norm( torch.from_numpy(img_masked.astype(np.float32))) index = torch.LongTensor([itemid - 1]) cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() cloud = cloud.view(1, num_points, 3) img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # making pose matrix rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3]) rot_to_angle = rot_to_angle.reshape(1, 3) my_t = my_t.reshape(1, 3) rot_t = np.concatenate([rot_to_angle, my_t], axis=0) # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], # [0, 0, 1]])) #tl = np.array([100,100,100]) #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696], # [0, 0, 1]])) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final open_cv_image = img.copy() open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) dof = quaternion_matrix(my_r) dof[0:3, 3] = my_t object_poses = { 'tx': my_t[0][0], 'ty': my_t[0][1], 'tz': my_t[0][2], 'qx': my_r[0], 'qy': my_r[1], 'qz': my_r[2], 'qw': my_r[3] } my_result.append(object_poses) open_cv_image = img.copy() open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3, 0:3], dof[0:3, 3], cam_mat, dist) # 13 = mug open_cv_image = draw(open_cv_image, imgpts, itemid) except ZeroDivisionError: open_cv_image = None print('Fail') except CvBridgeError as e: print(e) return my_result, open_cv_image
def tcplink(sock,addr): print("Accept a new connection from %s:%s..."%addr) sock.send(b'Welcome!') # flag = 1 # cv2.namedWindow('color label') while True: length = recvall(sock,16) if not length: break stringData = recvall(sock,int(length)) if not stringData: break data = np.fromstring(stringData,dtype = 'uint8') color_image = cv2.imdecode(data,cv2.IMREAD_COLOR) color_image = np.asanyarray(color_image) ############ depth image length2 = recvall(sock,16) if not length2: break stringData2 = recvall(sock,int(length2)) if not stringData: break data2 = np.fromstring(stringData2,dtype = 'uint8') depth_image = cv2.imdecode(data2,-1) depth_image = np.asanyarray(depth_image) rgb2 =copy.deepcopy(color_image) rgb3 = Image.fromarray(rgb2.astype('uint8')).convert('RGB') rgb3 = ImageEnhance.Brightness(rgb3).enhance(1.4) # rgb3 = ImageEnhance.Contrast(rgb3).enhance(1.4) # rgb3 = rgb3.filter(ImageFilter.GaussianBlur(radius=2)) # rgb3 = copy.deepcopy(rgb2) rgb = np.array(rgb2).astype(np.float32) rgb = torch.from_numpy(rgb).cuda().permute(2, 0, 1).contiguous() rgb = rgb_norm(rgb).view(1,3,480,640) semantic = model(rgb) semantic = semantic.view(4,480,640).permute(1,2,0).contiguous() max_values , labels = torch.max( semantic , 2 ) labels = labels.cpu().detach().numpy().astype(np.uint8) encode_labels = cv2.imencode('.jpg',labels)[1] # cv2.waitKey() label_encode = np.array(encode_labels) str_label = label_encode.tostring() label_length = str.encode(str(len(str_label)).ljust(16)) sock.send(label_length) sock.send(str_label) ######## pose prediction obj_ids = np.unique(labels)[1:] print(obj_ids) posenetlist = [1,2,3] zero_mat = np.zeros((4,4)) pose_result = [] for obj in posenetlist: arr = copy.deepcopy(labels) arr = np.where(arr != obj, 0, arr) arr = np.where(arr == obj, 255, arr) contours,hierachy = cv2.findContours(arr,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE) contour = 0 x,y,w,h = 0,0,0,0 if len(contours)==0: continue continue_flag = 0 for i in range(len(contours)): area =cv2.contourArea(contours[i]) if area > 2500: contour =contours[i] x,y,w,h =cv2.boundingRect(contour) continue_flag = 0 break else: continue_flag = 1 if (continue_flag==1): pose_result.append(zero_mat) continue idx = posenetlist.index(obj) bbx = [] bbx.append(y) bbx.append(y+h) bbx.append(x) bbx.append(x+w) rmin, rmax, cmin, cmax = get_bbox(bbx) # img = copy.deepcopy(color_image) img_masked = np.transpose(np.array(rgb3)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax] img_masked_shape = img_masked.shape mask_label = ma.getmaskarray(ma.masked_equal(labels, np.array(obj))) choose = mask_label[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') choose_shape = choose.shape xmap = np.array([[j for i in range(640)] for j in range(480)]) ymap = np.array([[i for i in range(640)] for j in range(480)]) depth = copy.deepcopy(depth_image) depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) cam_scale = 1.0 pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud /= 1000 cloud_shape = cloud.shape # points = cloud.reshape((1,cloud_shape[0,cloud_shape[1]])) # print(cloud_shape) # points = cloud.view(1,cloud_shape[0],cloud_shape[1]) # choose = choose.view(1,choose_shape[0],choose[1]) # img_masked = img_masked.reshape((1,img_masked_shape[0],img_masked_shape[1],img_masked_shape[2])) if cloud.shape[0] < 2: print('Lost detection!') # fw.write('No.{0} NOT Pass! Lost detection!\n'.format(i)) # continue points = torch.from_numpy(cloud.astype(np.float32)).cuda() choose = torch.LongTensor(choose.astype(np.int32)).cuda() img = rgb_norm(torch.from_numpy(img_masked.astype(np.float32))).cuda() idx = torch.LongTensor([idx]).cuda() points = points.view(1,cloud_shape[0],cloud_shape[1]).contiguous() img = img.view(1,img_masked_shape[0],img_masked_shape[1],img_masked_shape[2]).contiguous() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_mat_final[:3,:3] = quaternion_matrix(my_r)[:3, :3] my_mat_final[:3,3] = my_t # pose_mat[obj-1,3,:] = np.array([0,0,0,1]) # pose_mat[:,:,obj-1]=my_mat_final pose_result.append(my_mat_final) if (obj == posenetlist[-1]): break pose_result = np.array(pose_result) print(pose_result) my_mat_str = pose_result.tostring() length = str.encode(str(len(my_mat_str)).ljust(16)) sock.send(length) sock.send(my_mat_str) print() print(my_mat_final) print() sock.close() print('connection from %s:%s is closed'% addr)
def merge_pc(cur_cloud, last_pose, init_pose): pred_pose = torch.mm(init_pose.cpu(), last_pose) pred_r = torch.as_tensor(quaternion_from_matrix(pred_pose[0:3, 0:3]).T, dtype=torch.float32).view(1, 4, 1).cuda() pred_t = pred_pose[0:3, 3].view(1, 3, 1).cuda() return cur_cloud[0].reshape(1, -1, 35), pred_r, pred_t
def main(): # g13: parameter setting ------------------- ''' posemodel is trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth refine model is trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth ''' objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15] knn = KNearestNeighbor(1) opt.dataset ='linemod' opt.dataset_root = './datasets/linemod/Linemod_preprocessed' estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth' refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth' opt.model = estimator_path opt.refine_model = refiner_path dataset_config_dir = 'datasets/linemod/dataset_config' output_result_dir = 'experiments/eval_result/linemod' opt.refine_start = True bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader opt.iteration = 2 #default is 4 in eval_linemod.py t1_start = True t1_idx = 0 t1_total_eval_num = 3 t2_start = False t2_target_list = [22, 30, 172, 187, 267, 363, 410, 471, 472, 605, 644, 712, 1046, 1116, 1129, 1135, 1263] #t2_target_list = [0, 1] axis_range = 0.1 # the length of X, Y, and Z axis in 3D vimg_dir = 'verify_img' diameter = [] meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r') meta_d = yaml.load(meta_file) for obj in objlist: diameter.append(meta_d[obj]['diameter'] / 1000.0 * 0.1) print(diameter) if not os.path.exists(vimg_dir): os.makedirs(vimg_dir) #------------------------------------------- if opt.dataset == 'ycb': opt.num_objects = 21 #number of object classes in the dataset opt.num_points = 1000 #number of points on the input pointcloud opt.outf = 'trained_models/ycb' #folder to save trained models opt.log_dir = 'experiments/logs/ycb' #folder to save logs opt.repeat_epoch = 1 #number of repeat times for one epoch training elif opt.dataset == 'linemod': opt.num_objects = 13 opt.num_points = 500 opt.outf = 'trained_models/linemod' opt.log_dir = 'experiments/logs/linemod' opt.repeat_epoch = 20 else: print('Unknown dataset') return estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects) estimator.cuda() refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects) refiner.cuda() estimator.load_state_dict(torch.load(estimator_path)) refiner.load_state_dict(torch.load(refiner_path)) opt.refine_start = True test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) opt.sym_list = test_dataset.get_sym_list() opt.num_points_mesh = test_dataset.get_num_points_mesh() print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\ length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\ symmetry object list: {2}'\ .format( len(test_dataset), opt.num_points_mesh, opt.sym_list)) #load pytorch model estimator.eval() refiner.eval() criterion = Loss(opt.num_points_mesh, opt.sym_list) criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w') #Pose estimation for j, data in enumerate(testdataloader, 0): # g13: modify this part for evaluation target-------------------- if t1_start and j == t1_total_eval_num: break if t2_start and not (j in t2_target_list): continue #---------------------------------------------------------------- points, choose, img, target, model_points, idx = data if len(points.size()) == 2: print('No.{0} NOT Pass! Lost detection!'.format(j)) fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j)) continue points, choose, img, target, model_points, idx = Variable(points).cuda(), \ Variable(choose).cuda(), \ Variable(img).cuda(), \ Variable(target).cuda(), \ Variable(model_points).cuda(), \ Variable(idx).cuda() pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) #if opt.refine_start: #iterative poserefinement # for ite in range(0, opt.iteration): # pred_r, pred_t = refiner(new_points, emb, idx) # dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1) pred_c = pred_c.view(bs, opt.num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * opt.num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, opt.iteration): T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) #g13: checking the dis value success_count = [0 for i in range(opt.num_objects)] num_count = [0 for i in range(opt.num_objects)] model_points = model_points[0].cpu().detach().numpy() my_r = quaternion_matrix(my_r)[:3, :3] pred = np.dot(model_points, my_r.T) + my_t target = target[0].cpu().detach().numpy() if idx[0].item() in opt.sym_list: pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous() target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous() inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) target = torch.index_select(target, 1, inds.view(-1) - 1) dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item() else: dis = np.mean(np.linalg.norm(pred - target, axis=1)) if dis < diameter[idx[0].item()]: success_count[idx[0].item()] += 1 print('No.{0} Pass! Distance: {1}'.format(j, dis)) fw.write('No.{0} Pass! Distance: {1}\n'.format(j, dis)) else: print('No.{0} NOT Pass! Distance: {1}'.format(j, dis)) fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(j, dis)) num_count[idx[0].item()] += 1 # g13: start drawing pose on image------------------------------------ # pick up image print('{0}:\nmy_r is {1}\nmy_t is {2}\ndis:{3}'.format(j, my_r, my_t, dis.item())) print("index {0}: {1}".format(j, test_dataset.list_rgb[j])) img = Image.open(test_dataset.list_rgb[j]) # pick up center position by bbox meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r') meta = {} meta = yaml.load(meta_file) which_item = test_dataset.list_rank[j] which_obj = test_dataset.list_obj[j] which_dict = 0 dict_leng = len(meta[which_item]) #print('get meta[{0}][{1}][obj_bb]'.format(which_item, which_obj)) k_idx = 0 while 1: if meta[which_item][k_idx]['obj_id'] == which_obj: which_dict = k_idx break k_idx = k_idx+1 bbx = meta[which_item][which_dict]['obj_bb'] draw = ImageDraw.Draw(img) # draw box (ensure this is the right object) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) #get center c_x = bbx[0]+int(bbx[2]/2) c_y = bbx[1]+int(bbx[3]/2) draw.point((c_x,c_y), fill=(255,255,0)) print('center:({0},{1})'.format(c_x, c_y)) #get the 3D position of center cam_intrinsic = np.zeros((3,3)) cam_intrinsic.itemset(0, test_dataset.cam_fx) cam_intrinsic.itemset(4, test_dataset.cam_fy) cam_intrinsic.itemset(2, test_dataset.cam_cx) cam_intrinsic.itemset(5, test_dataset.cam_cy) cam_intrinsic.itemset(8, 1) cam_extrinsic = my_mat_final[0:3, :] cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic) cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]]) # replace img.show() with plt.imshow(img) #transpose three 3D axis point into 2D x_3d = cen_3d + [[axis_range],[0],[0],[0]] y_3d = cen_3d + [[0],[axis_range],[0],[0]] z_3d = cen_3d + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d, x_3d) y_2d = np.matmul(cam2d_3d, y_3d) z_2d = np.matmul(cam2d_3d, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) #g13: draw the estimate pred obj for pti in pred: pti.transpose() pti_2d = np.matmul(cam_intrinsic, pti) #print('({0},{1})\n'.format(int(pti_2d[0]),int(pti_2d[1]))) draw.point([int(pti_2d[0]),int(pti_2d[1])], fill=(255,255,0)) #g13: show image #img.show() #save file under file img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close() # plot ground true ---------------------------- img = Image.open(test_dataset.list_rgb[j]) draw = ImageDraw.Draw(img) draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5) draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5) target_r = np.resize(np.array(meta[which_item][k_idx]['cam_R_m2c']), (3, 3)) target_t = np.array(meta[which_item][k_idx]['cam_t_m2c']) target_t = target_t[np.newaxis, :] cam_extrinsic_GT = np.concatenate((target_r, target_t.T), axis=1) #get center 3D cam2d_3d_GT = np.matmul(cam_intrinsic, cam_extrinsic_GT) cen_3d_GT = np.matmul(np.linalg.pinv(cam2d_3d_GT), [[c_x],[c_y],[1]]) #transpose three 3D axis point into 2D x_3d = cen_3d_GT + [[axis_range],[0],[0],[0]] y_3d = cen_3d_GT + [[0],[axis_range],[0],[0]] z_3d = cen_3d_GT + [[0],[0],[axis_range],[0]] x_2d = np.matmul(cam2d_3d_GT, x_3d) y_2d = np.matmul(cam2d_3d_GT, y_3d) z_2d = np.matmul(cam2d_3d_GT, z_3d) #draw the axis on 2D draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5) draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5) draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5) print('pred:\n{0}\nGT:\n{1}\n'.format(cam_extrinsic,cam_extrinsic_GT)) print('pred 3D:{0}\nGT 3D:{1}\n'.format(cen_3d, cen_3d_GT)) img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}_gt.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item) img.save( img_file_name, "PNG" ) img.close() meta_file.close() print('\nplot_result_img.py completed the task\n')
def callback(self, rgb, depth): if DEBUG: print('received depth image of type: ' + depth.encoding) print('received rgb image of type: ' + rgb.encoding) #https://answers.ros.org/question/64318/how-do-i-convert-an-ros-image-into-a-numpy-array/ depth = np.frombuffer(depth.data, dtype=np.uint16).reshape(depth.height, depth.width, -1) rgb = np.frombuffer(rgb.data, dtype=np.uint8).reshape(rgb.height, rgb.width, -1) rgb_original = rgb #cv2.imshow('depth', depth) #time1 = time.time() rgb = np.transpose(rgb, (2, 0, 1)) rgb = norm(torch.from_numpy(rgb.astype(np.float32))) rgb = Variable(rgb).cuda() semantic = self.model(rgb.unsqueeze(0)) _, pred = torch.max(semantic, dim=1) pred = pred * 255 pred = np.transpose(pred, (1, 2, 0)) # (CxHxW)->(HxWxC) #print(pred.shape) #ret, threshold = cv2.threshold(pred.cpu().numpy(), 1, 255, cv2.THRESH_BINARY) #pred is already binary, therefore, this line is unnecessary contours, hierarchy = cv2.findContours(np.uint8(pred), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(cnt) rmin, rmax, cmin, cmax = get_bbox([x, y, w, h]) #cv2.rectangle(rgb_original,(cmin,rmin), (cmax,rmax) , (0,255,0),2) #cv2.imwrite('depth.png', depth) #save depth image mask_depth = ma.getmasksarray(ma.masked_not_equal(depth, 0)) mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255))) mask = mask_depth * mask_label #print(rgb.shape) #torch.Size([3, 480, 640]) #print(rgb_original.shape) #(480, 640, 3) img = np.transpose(rgb_original, (2, 0, 1)) img_masked = img[:, rmin:rmax, cmin:cmax] choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] #print("length of choose is :{0}".format(len(choose))) if len(choose) == 0: cc = torch.LongTensor([0]) return (cc, cc, cc, cc, cc, cc) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[: num_points] = 1 # if number of object pixels are bigger than 500, we select just 500 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] # now len(choose) = 500 else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = self.xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) ymap_masked = self.ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) choose = np.array([choose]) pt2 = depth_masked #print(pt2) pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000 points = torch.from_numpy(cloud.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) img = norm(torch.from_numpy(img_masked.astype(np.float32))) idx = torch.LongTensor([self.object_index]) img = Variable(img).cuda().unsqueeze(0) points = Variable(points).cuda().unsqueeze(0) choose = Variable(choose).cuda().unsqueeze(0) idx = Variable(idx).cuda().unsqueeze(0) pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx) pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) for ite in range(0, iteration): T = Variable(torch.from_numpy( my_t.astype(np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable(torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_points = torch.bmm((points - T), R).contiguous() pred_r, pred_t = self.refiner(new_points, emb, idx) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot( my_mat, my_mat_2) # refine pose means two matrix multiplication my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array( [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r = quaternion_matrix(my_r)[:3, :3] #print(my_t.shape) my_t = np.array(my_t) #print(my_t.shape) #print(my_r.shape) target = np.dot(self.scaled, my_r.T) target = np.add(target, my_t) p0 = (int((target[0][0] / target[0][2]) * self.cam_fx + self.cam_cx), int((target[0][1] / target[0][2]) * self.cam_fy + self.cam_cy)) p1 = (int((target[1][0] / target[1][2]) * self.cam_fx + self.cam_cx), int((target[1][1] / target[1][2]) * self.cam_fy + self.cam_cy)) p2 = (int((target[2][0] / target[2][2]) * self.cam_fx + self.cam_cx), int((target[2][1] / target[2][2]) * self.cam_fy + self.cam_cy)) p3 = (int((target[3][0] / target[3][2]) * self.cam_fx + self.cam_cx), int((target[3][1] / target[3][2]) * self.cam_fy + self.cam_cy)) p4 = (int((target[4][0] / target[4][2]) * self.cam_fx + self.cam_cx), int((target[4][1] / target[4][2]) * self.cam_fy + self.cam_cy)) p5 = (int((target[5][0] / target[5][2]) * self.cam_fx + self.cam_cx), int((target[5][1] / target[5][2]) * self.cam_fy + self.cam_cy)) p6 = (int((target[6][0] / target[6][2]) * self.cam_fx + self.cam_cx), int((target[6][1] / target[6][2]) * self.cam_fy + self.cam_cy)) p7 = (int((target[7][0] / target[7][2]) * self.cam_fx + self.cam_cx), int((target[7][1] / target[7][2]) * self.cam_fy + self.cam_cy)) cv2.line(rgb_original, p0, p1, (255, 255, 255), 2) cv2.line(rgb_original, p0, p3, (255, 255, 255), 2) cv2.line(rgb_original, p0, p4, (255, 255, 255), 2) cv2.line(rgb_original, p1, p2, (255, 255, 255), 2) cv2.line(rgb_original, p1, p5, (255, 255, 255), 2) cv2.line(rgb_original, p2, p3, (255, 255, 255), 2) cv2.line(rgb_original, p2, p6, (255, 255, 255), 2) cv2.line(rgb_original, p3, p7, (255, 255, 255), 2) cv2.line(rgb_original, p4, p5, (255, 255, 255), 2) cv2.line(rgb_original, p4, p7, (255, 255, 255), 2) cv2.line(rgb_original, p5, p6, (255, 255, 255), 2) cv2.line(rgb_original, p6, p7, (255, 255, 255), 2) #print('estimated rotation is :{0}'.format(my_r)) #print('estimated translation is :{0}'.format(my_t)) #time2 = time.time() #print('inference time is :{0}'.format(time2-time1)) cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB)) # OpenCV uses BGR model cv2.waitKey( 1 ) # pass any integr except 0, as 0 will freeze the display windows
def estimateSimilarityUmeyamaCoords(SourceHom0, TargetHom0, SourceHom1, TargetHom1, joint_axis, joint_pts=None, rt_ref=[None, None], rt_pre=[None, None], \ viz=False, viz_ransac=False, viz_sample=False, use_jt_pts=False, use_ext_rot=False, verbose=False, index=0): """ SourceHom0: [4, 5] joint_pts : [4, 5] joint_axis: [4, 1] """ U, D0, Vh = svd_pts(SourceHom0, TargetHom0) # R0 = np.matmul(U, Vh).T # Transpose is the one that works U, D1, Vh = svd_pts(SourceHom1, TargetHom1) # R1 = np.matmul(U, Vh).T # # begin EM max_iter = 100 # max_iter = 1 # todo StopThreshold = 2 * np.cos(0.5 / 180 * np.pi) if viz_sample: plot3d_pts([[ SourceHom0[:3].transpose(), SourceHom1[:3].transpose(), TargetHom0[:3].transpose(), TargetHom1[:3].transpose(), joint_pts[:3].transpose() ]], [['source0', 'source1', 'target0', 'target1', 'joint_points']], s=100, title_name=['sampled points'], color_channel=None, save_fig=False, sub_name='default') joint_axis_tiled0 = np.tile(joint_axis, (1, int(SourceHom0.shape[1] / 5))) joint_axis_tiled1 = np.tile(joint_axis, (1, int(SourceHom1.shape[1] / 5))) # joint_axis_tiled0 = np.tile(joint_axis, (1, int(SourceHom0.shape[1]))) # joint_axis_tiled1 = np.tile(joint_axis, (1, int(SourceHom1.shape[1]))) if use_ext_rot and rt_pre[0] is not None: # print('using external rotation') R0 = rt_pre[0][:3, :3].T R1 = rt_pre[1][:3, :3].T else: r_list = [[R0], [R1]] for i in range(max_iter): rotated_axis = np.matmul(R0.T, joint_axis_tiled1[:3]) # [3, 1] U, D1, Vh = svd_pts(SourceHom1, TargetHom1, joint_axis_tiled1, rotated_axis, viz_sample=viz_sample, index=2 * i) R1_new = np.matmul(U, Vh).T rotated_axis = np.matmul(R1_new.T, joint_axis_tiled0[:3]) U, D0, Vh = svd_pts(SourceHom0, TargetHom0, joint_axis_tiled0, rotated_axis, viz_sample=viz_sample, index=2 * i + 1) R0_new = np.matmul(U, Vh).T eigen_sum0 = np.trace(np.matmul(R0_new.T, R0)) - 1 eigen_sum1 = np.trace(np.matmul(R1_new.T, R1)) - 1 R0 = R0_new R1 = R1_new r_list[0].append(R0) r_list[1].append(R1) if eigen_sum0 > StopThreshold and eigen_sum1 > StopThreshold: # if verbose: # print('Algorithm converges at {}th iteration for Coordinate Descent'.format(i)) break if viz_ransac and index < 10: # and SourceHom0.shape[1]>5: ang_dis_list = [[], []] for j in range(2): q_gt = quaternion_from_matrix(rt_ref[j][:3, :3]) for rot_iter in r_list[j]: q_iter = quaternion_from_matrix(rot_iter.T) ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi if ang_dis > 180: ang_dis = 360 - ang_dis ang_dis_list[j].append(ang_dis) fig = plt.figure(dpi=200) for j in range(2): ax = plt.subplot(1, 2, j + 1) plt.plot(range(len(ang_dis_list[j])), ang_dis_list[j]) plt.xlabel('iteration') plt.ylabel('rotation error') plt.title('{}th sampling part {}'.format(index, j)) plt.show() Rs = [R0, R1] if use_jt_pts: if viz_sample: plot3d_pts([[ SourceHom0[:3].transpose(), SourceHom1[:3].transpose(), TargetHom0[:3].transpose(), TargetHom1[:3].transpose(), joint_pts[:3].transpose() ]], [['source0', 'source1', 'target0', 'target1', 'joint_points']], s=100, title_name=['sampled points'], color_channel=None, save_fig=False, sub_name='default') final_scale, Ts, OutTrans = compute_scale_translation( [SourceHom0, SourceHom1], [TargetHom0, TargetHom1], Rs, joint_pts) if verbose: print("scale by adding joints are \n: {}".format(final_scale)) else: if viz_sample: plot3d_pts([[ SourceHom0[:3].transpose(), SourceHom1[:3].transpose(), TargetHom0[:3].transpose(), TargetHom1[:3].transpose() ]], [['source0', 'source1', 'target0', 'target1']], s=100, title_name=['points after sampling'], color_channel=None, save_fig=False, sub_name='default') final_scale0, T0, OutTrans0 = est_ST(SourceHom0, TargetHom0, D0, Rs[0]) final_scale1, T1, OutTrans1 = est_ST(SourceHom1, TargetHom1, D1, Rs[1]) final_scale = [final_scale0, final_scale1] Ts = [T0, T1] OutTrans = [OutTrans0, OutTrans1] if verbose: print("scale by direct solving per part are \n: {}".format( final_scale)) return final_scale, Rs, Ts, OutTrans
def main(): cfg = setup_config() pipeline = rs.pipeline() realsense_cfg = setup_realsense() pipeline.start(realsense_cfg) # Start streaming visualizer = predictor.VisualizationDemo(cfg) ref_frame_axies = [] ref_frame_label = [] min_distance = 0.9 label_cnt = 0 frameth = 0 my_t_pool = {} my_r_pool = {} while True: frameth += 1 cur_frame_axies = [] cur_frame_label = [] my_t_per_frame = [] my_r_per_frame = [] align = rs.align(rs.stream.color) frames = pipeline.wait_for_frames() aligned_frames = align.process(frames) rgb = aligned_frames.get_color_frame() rgb = np.asanyarray(rgb.get_data()) frame = rgb.copy() # Do instance segmentation start = time.time() segmentation, vis = visualizer.run_on_image(frame) #print("Time = " + str(time.time()-start)) cv2.imshow('Mask', vis) cv2.waitKey(1) # Get segmentation mask ori_label = segmentation['instances'].pred_masks.cpu().numpy() label = np.sum(ori_label, axis=0).astype(np.uint8) label = np.where(label != 0, 255, label) label = Image.fromarray(label).convert("L") label = np.asarray(label.convert('RGB')).astype(np.uint8) bboxes = segmentation['instances'].pred_boxes.tensor.cpu().numpy() xyxy_bboxes = bboxes bboxes = bbox_convert(bboxes) if len(bboxes) > 0: #depth_frames = frames.get_depth_frame() depth_frames = aligned_frames.get_depth_frame() video_profile = depth_frames.profile.as_video_stream_profile() intr = video_profile.get_intrinsics() depth = np.asanyarray(depth_frames.get_data()) #centers = segmentation['instances'].pred_boxes.get_centers() if len(my_t_pool) > 0: last_key = list(my_t_pool.keys())[-1] for i in range(0, len(bboxes)): bbox_xyxy = np.array(list(xyxy_bboxes[i])) bbox = list(bboxes[i]) print("Bounding Box:" + str(bbox)) #center = bboxes[i].get_centers() #center = centers[i].cpu().numpy() num_idx = float('nan') max_value = 0 label_of_object = ori_label[i].astype(np.uint8) label_of_object = np.where(label_of_object != 0, 255, label_of_object) label_of_object = Image.fromarray(label_of_object).convert("L") label_of_object = np.asarray( label_of_object.convert('RGB')).astype(np.uint8) if len(ref_frame_label) > 0: iou_list = [] b = bbox_xyxy a = np.array(ref_frame_axies) for k in range(len(ref_frame_axies)): iou = iou_score(a[k], b) iou_list.append(iou) iou_list = np.array(iou_list) max_value = iou_list.max() if (max_value > min_distance): min_idx = np.where(iou_list == max_value)[0][0] num_idx = ref_frame_label[min_idx] if (math.isnan(num_idx)): num_idx = label_cnt label_cnt += 1 cur_frame_label.append(num_idx) cur_frame_axies.append(bbox_xyxy) print(max_value) if (frameth == 1) or (max_value < 0.9) or ( i > len(my_t_pool[last_key]) - 1) or (frameth % 20 == 0): pos_text = (bbox[0], bbox[1]) class_id = segmentation['instances'].pred_classes[i].cpu( ).data.numpy() print("Class: " + str(class_id)) #idx = class_id if class_id == 0: idx = 0 if class_id == 2: idx = 1 model_points = model_points_list[idx] mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) #mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255))) mask_label = ma.getmaskarray( ma.masked_equal(label_of_object, np.array([255, 255, 255])))[:, :, 0] mask = mask_label * mask_depth rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox) # choose choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] if len(choose) == 0: choose = torch.LongTensor([0]) if len(choose) > num_points: c_mask = np.zeros(len(choose), dtype=int) c_mask[:num_points] = 1 np.random.shuffle(c_mask) choose = choose[c_mask.nonzero()] else: choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') depth_masked = depth[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) xmap_masked = xmap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) ymap_masked = ymap[ rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype( np.float32) choose = np.array([choose]) # point cloud pt2 = depth_masked / cam_scale pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy cloud = np.concatenate((pt0, pt1, pt2), axis=1) cloud = cloud / 1000.0 # print(cloud.shape) # cropped img #img_masked = rgb[:, :, :3] img_masked = rgb[:, :, ::-1] # bgr to rgb img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] my_mask = np.transpose(label_of_object, (2, 0, 1)) my_mask = my_mask[:, rmin:rmax, cmin: cmax] ## Added by me to crop the mask mask_img = np.transpose(my_mask, (1, 2, 0)) img_rgb = np.transpose(img_masked, (1, 2, 0)) croped_img_mask = cv2.bitwise_and(img_rgb, mask_img) crop_image_to_check = croped_img_mask.copy() cv2.imshow("mask_crop", croped_img_mask) croped_img_mask = np.transpose(croped_img_mask, (2, 0, 1)) # Variables cloud = torch.from_numpy(cloud.astype( np.float32)).unsqueeze(0) choose = torch.LongTensor(choose.astype( np.int32)).unsqueeze(0) #img_masked = torch.from_numpy(img_masked.astype(np.float32)).unsqueeze(0) img_masked = torch.from_numpy( croped_img_mask.astype(np.float32)).unsqueeze(0) index = torch.LongTensor([idx]).unsqueeze( 0) # Specify which object cloud = Variable(cloud).cuda() choose = Variable(choose).cuda() img_masked = Variable(img_masked).cuda() index = Variable(index).cuda() # Deploy with torch.no_grad(): pred_r, pred_t, pred_c, emb = estimator( img_masked, cloud, choose, index) pred_r = pred_r / torch.norm(pred_r, dim=2).view( 1, num_points, 1) pred_c = pred_c.view(bs, num_points) how_max, which_max = torch.max(pred_c, 1) pred_t = pred_t.view(bs * num_points, 1, 3) points = cloud.view(bs * num_points, 1, 3) my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() my_pred = np.append(my_r, my_t) # Refinement for ite in range(0, iteration): T = Variable(torch.from_numpy(my_t.astype( np.float32))).cuda().view(1, 3).repeat( num_points, 1).contiguous().view(1, num_points, 3) my_mat = quaternion_matrix(my_r) R = Variable( torch.from_numpy(my_mat[:3, :3].astype( np.float32))).cuda().view(1, 3, 3) my_mat[0:3, 3] = my_t new_cloud = torch.bmm((cloud - T), R).contiguous() pred_r, pred_t = refiner(new_cloud, emb, index) pred_r = pred_r.view(1, 1, -1) pred_r = pred_r / (torch.norm(pred_r, dim=2).view( 1, 1, 1)) my_r_2 = pred_r.view(-1).cpu().data.numpy() my_t_2 = pred_t.view(-1).cpu().data.numpy() my_mat_2 = quaternion_matrix(my_r_2) my_mat_2[0:3, 3] = my_t_2 my_mat_final = np.dot(my_mat, my_mat_2) my_r_final = copy.deepcopy(my_mat_final) my_r_final[0:3, 3] = 0 my_r_final = quaternion_from_matrix(my_r_final, True) my_t_final = np.array([ my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3] ]) my_pred = np.append(my_r_final, my_t_final) my_r = my_r_final my_t = my_t_final my_r_matrix = quaternion_matrix(my_r)[:3, :3] #print("Time = " + str(time.time()-start)) my_t_per_frame.append(my_t) my_r_per_frame.append(my_r_matrix) #rotation = Rot.from_matrix(my_r_matrix) #angle = rotation.as_euler('xyz', degrees=True) my_t = np.around(my_t, 5) #print("translation vector = " + str(my_t)) #print("rotation angles = " + str(my_r)) frame = posenet_deploy.get_3d_bbox(frame, model_points, my_r_matrix, my_t) frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t) if check_inverted(crop_image_to_check): cv2.putText(frame, str(num_idx) + "_inverted", pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) else: cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0,255,0), 2, cv2.LINE_AA) posenet_deploy.putText(frame, i, num_idx, class_id, my_t) #cv2.imshow('Result', rgb) #cv2.waitKey(1) else: rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox) img_masked = rgb[:, :, ::-1] # bgr to rgb img_masked = np.transpose(img_masked, (2, 0, 1)) img_masked = img_masked[:, rmin:rmax, cmin:cmax] my_mask = np.transpose(label_of_object, (2, 0, 1)) my_mask = my_mask[:, rmin:rmax, cmin: cmax] ## Added by me to crop the mask mask_img = np.transpose(my_mask, (1, 2, 0)) img_rgb = np.transpose(img_masked, (1, 2, 0)) croped_img_mask = cv2.bitwise_and(img_rgb, mask_img) crop_image_to_check = croped_img_mask.copy() pos_text = (bbox[0], bbox[1]) last_key = list(my_t_pool.keys())[-1] print("POOL: " + str(my_t_pool[last_key])) class_id = segmentation['instances'].pred_classes[i].cpu( ).data.numpy() my_t = my_t_pool[last_key][min_idx] my_r_matrix = my_r_pool[last_key][min_idx] my_t_per_frame.append(my_t) my_r_per_frame.append(my_r_matrix) frame = posenet_deploy.get_3d_bbox(frame, model_points, my_r_matrix, my_t) frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t) if check_inverted(crop_image_to_check): cv2.putText(frame, str(num_idx) + "_inverted", pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) else: cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0,255,0), 2, cv2.LINE_AA) posenet_deploy.putText(frame, i, num_idx, class_id, my_t) if len(my_t_per_frame) > 0: my_t_pool[frameth] = my_t_per_frame my_r_pool[frameth] = my_r_per_frame ref_frame_label = cur_frame_label ref_frame_axies = cur_frame_axies end = time.time() - start cv2.putText(frame, "Time processing: " + str(round(end, 3)) + " seconds", (100, 700), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.imshow('Result', frame) cv2.waitKey(1) else: # Show images #video_writer.write(rgb) cv2.imshow('Result', rgb) cv2.waitKey(1) pipeline.stop()
def find_essential(i1, i2): # quick sanity checks if i1 == i2: return None if not i2.name in i1.match_list: return None if len(i1.match_list[i2.name]) == 0: return None if not i1.kp_list or not len(i1.kp_list): i1.load_features() if not i2.kp_list or not len(i2.kp_list): i2.load_features() # camera calibration K = camera.get_K() IK = np.linalg.inv(K) # setup data structurs of cv2 call uv1 = [] uv2 = [] indices = [] for pair in i1.match_list[i2.name]: uv1.append(i1.kp_list[pair[0]].pt) uv2.append(i2.kp_list[pair[1]].pt) uv1 = np.float32(uv1) uv2 = np.float32(uv2) E, mask = cv2.findEssentialMat(uv1, uv2, K, method=method) print(i1.name, 'vs', i2.name) print("E:\n", E) print() (n, R, tvec, mask) = cv2.recoverPose(E, uv1, uv2, K) print(' inliers:', n, 'of', len(uv1)) print(' R:', R) print(' tvec:', tvec) # convert R to homogeonous #Rh = np.concatenate((R, np.zeros((3,1))), axis=1) #Rh = np.concatenate((Rh, np.zeros((1,4))), axis=0) #Rh[3,3] = 1 # extract the equivalent quaternion, and invert q = transformations.quaternion_from_matrix(R) q_inv = transformations.quaternion_inverse(q) (ned1, ypr1, quat1) = i1.get_camera_pose() (ned2, ypr2, quat2) = i2.get_camera_pose() diff = np.array(ned2) - np.array(ned1) dist = np.linalg.norm(diff) dir = diff / dist print('dist:', dist, 'ned dir:', dir[0], dir[1], dir[2]) crs_gps = 90 - math.atan2(dir[0], dir[1]) * r2d if crs_gps < 0: crs_gps += 360 if crs_gps > 360: crs_gps -= 360 print('crs_gps: %.1f' % crs_gps) Rbody2ned = i1.get_body2ned() cam2body = i1.get_cam2body() body2cam = i1.get_body2cam() est_dir = Rbody2ned.dot(cam2body).dot(R).dot(tvec) est_dir = est_dir / np.linalg.norm(est_dir) # normalize print('est dir:', est_dir.tolist()) crs_fit = 90 - math.atan2(-est_dir[0], -est_dir[1]) * r2d if crs_fit < 0: crs_fit += 360 if crs_fit > 360: crs_fit -= 360 print('est crs_fit: %.1f' % crs_fit) print("est yaw error: %.1f" % (crs_fit - crs_gps))