def match(): global lock, rgb, depth if lock: matches = detector.match([rgb, depth], 65.0, objIds, masks=[]) dets = np.zeros(shape=(len(matches), 5)) for i in range(len(matches)): match = matches[i] templateInfo = infos[match.class_id] info = templateInfo[match.template_id] dets[i, 0] = match.x dets[i, 1] = match.y dets[i, 2] = match.x + info['width'] dets[i, 3] = match.y + info['height'] dets[i, 4] = match.similarity idx = nms(dets, 0.5) ts = np.zeros(shape=(len(idx))) ts_scores = np.zeros(shape=(len(idx))) Rs = [] ids = [] confidences = [] for i in range(len(idx)): match = matches[idx[i]] templateInfo = infos[match.class_id] info = templateInfo[match.template_id] model = models[match.class_id] K_match = info['cam_K'] R_match = info['cam_R_w2c'] t_match = info['cam_t_w2c'] depth_ren = render(model, depth.shape, K_match, R_match, t_match, mode='depth') poseRefine.process(depth.astype(np.uint16), depth_ren.astype(np.uint16), K_cam.astype(np.float32), K_match.astype(np.float32), R_match.astype(np.float32), t_match.astype(np.float32) , match.x, match.y) ts[i,:] = np.reshape(poseRefine.getT(),newshape=(3,)) Rs.append(poseRefine.getR()) ids.append(match.class_id) confidences.append(match.similarity) ts_scores[i] = -poseRefine.getResidual() idx = nms_norms(ts, ts_scores, 40.0) results = [] for i in idx: result = {} result['id'] = ids[i] result['R'] = Rs[i] result['t'] = ts[i, :] result['s'] = confidences[i] results.append(result) publishResults(results) lock = False
color = (1, 1, 1) else: color = tuple(colors[(obj_id - 1) % len(colors)]) color_uint8 = tuple([int(255 * c) for c in color]) model = models[gt['obj_id']] K = scene_info[im_id]['cam_K'] R = gt['cam_R_m2c'] t = gt['cam_t_m2c'] # Rendering if vis_rgb: if vis_orig_color: m_rgb = renderer.render(model, im_size, K, R, t, mode='rgb') else: m_rgb = renderer.render(model, im_size, K, R, t, mode='rgb', surf_color=color) if vis_depth or (vis_rgb and vis_rgb_resolve_visib): m_depth = renderer.render(model, im_size, K,
seg_mask = seg_result == 3 seg_test_cloud = cxx_3d_seg.depth2cloud(depth, seg_mask.astype(np.uint8), K.astype(np.float32)) test_pose = cxx_3d_seg.pose_estimation(seg_test_cloud, model_path) render_R = test_pose[0:3, 0:3] render_t = test_pose[0:3, 3:4] elapsed_time = time.time() - start_time # print("pose refine time: {}s".format(elapsed_time)) render_rgb, render_depth = render(model, im_size, render_K, render_R, render_t, surf_color=[0, 1, 0]) visible_mask = render_depth < depth mask = render_depth > 0 mask = mask.astype(np.uint8) rgb_mask = np.dstack([mask] * 3) render_rgb = render_rgb * rgb_mask render_rgb = rgb * (1 - rgb_mask) + render_rgb draw_axis(rgb, render_R, render_t, render_K) visual = True # visual = False if visual: cv2.namedWindow('rgb_render')
gt_stats = {} for im_id in im_ids: print('dataset: {}, scene/obj: {}, im: {}'.format( dataset, data_id, im_id)) K = info[im_id]['cam_K'] depth_path = dp[depth_mpath_key].format(data_id, im_id) depth_im = inout.load_depth(depth_path) depth_im *= dp['cam']['depth_scale'] # to [mm] im_size = (depth_im.shape[1], depth_im.shape[0]) gt_stats[im_id] = [] for gt_id, gt in enumerate(gts[im_id]): depth_gt = renderer.render(models[gt['obj_id']], im_size, K, gt['cam_R_m2c'], gt['cam_t_m2c'], mode='depth') # Get distance images dist_gt = misc.depth_im_to_dist_im(depth_gt, K) dist_im = misc.depth_im_to_dist_im(depth_im, K) # Estimation of visibility mask visib_gt = visibility.estimate_visib_mask_gt( dist_im, dist_gt, delta) # Visible surface fraction obj_mask_gt = dist_gt > 0 px_count_valid = np.sum(dist_im[obj_mask_gt] > 0) px_count_visib = visib_gt.sum()
view_sampler.save_vis(out_views_vis_mpath.format(str(radius)), views, views_level) # Render the object model from all the views for view_id, view in enumerate(views): if view_id % 10 == 0: print('obj,radius,view: ' + str(obj_id) + ',' + str(radius) + ',' + str(view_id)) # Render RGB image rgb = renderer.render(model, im_size_rgb, K_rgb, view['R'], view['t'], clip_near, clip_far, texture=model_texture, ambient_weight=ambient_weight, shading=shading, mode='rgb') # The OpenCV function was used for rendering of the training images # provided for the SIXD Challenge 2017. rgb = cv2.resize(rgb, par['cam']['im_size'], interpolation=cv2.INTER_AREA) #rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic') # Save the rendered images inout.save_im(out_rgb_mpath.format(obj_id, im_id), rgb)
ren_depth = np.zeros(depth.shape, np.float) gt_ids_curr = range(len(scene_gt[im_id])) if gt_ids: gt_ids_curr = set(gt_ids_curr).intersection(gt_ids) for gt_id in gt_ids_curr: gt = scene_gt[im_id][gt_id] model = models[gt['obj_id']] K = scene_info[im_id]['cam_K'] R = gt['cam_R_m2c'] t = gt['cam_t_m2c'] # Rendering if vis_rgb: m_rgb = renderer.render(model, im_size, K, R, t, mode='rgb') if vis_depth or (vis_rgb and vis_rgb_resolve_visib): m_depth = renderer.render(model, im_size, K, R, t, mode='depth') # Get mask of the surface parts that are closer than the # surfaces rendered before visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) mask = np.logical_and(m_depth != 0, visible_mask) ren_depth[mask] = m_depth[mask].astype(ren_depth.dtype)
# Sample views views, views_level = view_sampler.sample_views( min_n_views, radius, azimuth_range, elev_range) print('Sampled views: ' + str(len(views))) # Render the object model from all the views for view_id, view in enumerate(views): if view_id % 10 == 0: print('obj,radius,view: ' + str(obj_id) + ',' + str(radius) + ',' + str(view_id)) # Render depth image depth = render(model, dp['cam']['im_size'], dp['cam']['K'], view['R'], view['t'], clip_near, clip_far, mode='depth') # Convert depth so it is in the same units as the real test images depth /= dp['cam']['depth_scale'] depth = depth.astype(np.uint16) # Render RGB image rgb = render(model, im_size_rgb, K_rgb, view['R'], view['t'], clip_near,
file_name = os.path.join(p0, '{:06d}'.format(i) + '-color.png') print(file_name) rgb = cv2.imread(file_name, cv2.IMREAD_UNCHANGED) im_size = [rgb.shape[1], rgb.shape[0]] cv2.imshow("rgb", rgb) cv2.waitKey(0) meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.mat') meta = scipy.io.loadmat(meta_file) # print('meta keys', meta.keys()) K = meta['intrinsic_matrix'] print('K',K) poses = meta['poses'] R = poses[:,:3] print ('R',R) t = poses[:,3] print('t',t) mdl_proj = renderer.render(model, im_size, K, R, t, mode='rgb', clip_near=0, clip_far=2000, shading='flat') print("dtype", mdl_proj.dtype) print("max min", np.amax(mdl_proj), np.amin(mdl_proj)) cv2.imshow('model', mdl_proj) cv2.waitKey(0)
# just test one seg result, may break because it's not guaranteed as an object mask seg_mask = (indices == 3) seg_test_cloud = np.zeros_like(cloud) seg_test_cloud[seg_mask] = cloud[seg_mask] test_pose = cxx_3d_seg_pybind.pose_estimation(seg_test_cloud, model_path) render_R = test_pose[0:3, 0:3] render_t = test_pose[0:3, 3:4] elapsed_time = time.time() - start_time # print("pose refine time: {}s".format(elapsed_time)) render_rgb, render_depth = render(model, im_size, render_K, render_R, render_t, surf_color=[0, 1, 0]) visible_mask = render_depth < depth mask = render_depth > 0 mask = mask.astype(np.uint8) rgb_mask = np.dstack([mask] * 3) render_rgb = render_rgb * rgb_mask render_rgb = rgb * (1 - rgb_mask) + render_rgb draw_axis(rgb, render_R, render_t, render_K) visual = True # visual = False if visual: cv2.namedWindow('rgb') cv2.imshow('rgb', rgb) cv2.namedWindow('rgb_render')
for radius in radii: # Sample views views, views_level = view_sampler.sample_views(min_n_views, radius, azimuth_range, elev_range, tilt_range=(-math.pi/2, math.pi/2), tilt_step=0.2*math.pi) print('Sampled views: ' + str(len(views))) # Render the object model from all the views for view_id, view in enumerate(views): if view_id % 10 == 0: print('obj,radius,view: ' + str(obj_id) + ',' + str(radius) + ',' + str(view_id)) # Render depth image depth = render(model, dp['cam']['im_size'], dp['cam']['K'], view['R'], view['t'], clip_near, clip_far, mode='depth') # Convert depth so it is in the same units as the real test images depth /= dp['cam']['depth_scale'] depth = depth.astype(np.uint16) # Render RGB image rgb = render(model, im_size_rgb, K_rgb, view['R'], view['t'], clip_near, clip_far, texture=model_texture, ambient_weight=ambient_weight, shading=shading, mode='rgb') rgb = cv2.resize(rgb, dp['cam']['im_size'], interpolation=cv2.INTER_AREA) K = dp['cam']['K'] R = view['R']
poses = np.array(meta['poses']).reshape(4,4) R = poses[:3,:3] # print ('R',R) t = poses[:3,3] t /= 1000. # print('t',t) # update with tuning Rt44 = np.eye(4) Rt44[:3,:3] = R Rt44[:3,3] = t Rt44 = np.dot(Rt44,TT) R = Rt44[:3,:3] t = Rt44[:3,3] mdl_proj, mdl_proj_depth = renderer.render(model, im_size, K, R, t, mode='rgb+depth', clip_near=.3, clip_far=6., shading='flat') # print("dtype", mdl_proj.dtype) # print("max min", np.amax(mdl_proj), np.amin(mdl_proj)) # cv2.imshow('model', mdl_proj) # cv2.waitKey(1) # depth format is int16 # convert depth (see PCNN train_net.py) factor_depth = 10000 zfar = 6.0 znear = 0.25 im_depth_raw = factor_depth * 2 * zfar * znear / (zfar + znear - (zfar - znear) * (2 * mdl_proj_depth - 1)) I = np.where(mdl_proj_depth == 1) im_depth_raw[I[0], I[1]] = 0
n_top_curr = n_gt else: n_top_curr = n_top ests_sorted = ests_sorted[slice(0, n_top_curr)] for est_id, est in ests_sorted: est_errs = [] R_e = est['R'] t_e = est['t'] score = est['score'] # Rendering model = models[obj_id] if vis_rgb: if vis_orig_color: m_rgb = renderer.render( model, im_size, K, R_e, t_e, mode='rgb') else: m_rgb = renderer.render( model, im_size, K, R_e, t_e, mode='rgb', surf_color=color) if vis_depth or (vis_rgb and vis_rgb_resolve_visib): m_depth = renderer.render( model, im_size, K, R_e, t_e, mode='depth') # Get mask of the surface parts that are closer than the # surfaces rendered before visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) mask = np.logical_and(m_depth != 0, visible_mask)
def augmentAcPData(params): ''' params.DATA_ROOT \n params.PLY_MODEL \n params.pose_tuning = [tx, ty, tz, rz] -> transl: meter, rot: deg \n params.frame_num ''' # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet' # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet' # DATA_ROOT = '/media/shawnle/Data0/YCB_Video_Dataset/SLM_datasets/Exhibition/DUCK' DATA_ROOT = params.DATA_ROOT p0 = os.path.abspath(DATA_ROOT) # GEN_ROOT = r'D:\SL\Summer_2019\original_sixd_toolkit\sixd_toolkit\data\gen_data' GEN_ROOT = DATA_ROOT # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\sheep\textured.ply') # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\ply\rotated.ply') # model = inout.load_ply(r'D:\SL\PoseCNN\Loc_data\DUCK\015_duck_toy\textured_m_text.ply') # model = inout.load_ply('/media/shawnle/Data0/YCB_Video_Dataset/YCB_Video_Dataset/data_syn_LOV/models/015_duck_toy/textured_dense.ply') # model = inout.load_ply('/home/shawnle/Downloads/textured.ply') model = inout.load_ply(params.PLY_MODEL) print('model keys', model.keys()) max = np.amax(model['pts'], axis=0) min = np.amin(model['pts'], axis=0) extents = np.abs(max) + np.abs(min) max_all_dim = np.amax(extents) assert max_all_dim < 1., 'Unit is millimeter? Meter should be used instead.' exit() # meta_file = os.path.join(p0, '{:06d}'.format(0) + '-meta.json') # print('opening ', meta_file) # with open(meta_file, 'r') as f: # meta_json = json.load(f) # print('kyes ',meta_json.keys() ) # print('poses ') # pose = np.array(meta_json['poses']).reshape(4,4) # print(pose) # print('intrinsic_matrix ') # print(np.array(meta_json['intrinsic_matrix']).reshape(3,3)) # tuning pose tx = params.pose_tuning[0] #-.001 # m ty = params.pose_tuning[1] # -.005 tz = params.pose_tuning[2] # -.001 rz = params.pose_tuning[3] / 180. * math.pi #2./180.*math.pi # rad xaxis, yaxis, zaxis = [1, 0, 0], [0, 1, 0], [0, 0, 1] Tt = tf.translation_matrix([tx, ty, tz]) Rt = tf.rotation_matrix(rz, zaxis) TT = np.eye(4) TT[:3, :3] = Rt[:3, :3] TT[:3, 3] = Tt[:3, 3] # print('Tt = ') # print(Tt) # print('Rt = ') # print(Rt) print('TT = ') print(TT) # TT1 = np.dot(Tt,Rt) # print('TT1 = ') # print(TT1) for i in range(params.frame_num): file_name = os.path.join(p0, '{:06d}'.format(i) + '-color.png') print(file_name) rgb = cv2.imread(file_name, cv2.IMREAD_UNCHANGED) im_size = [rgb.shape[1], rgb.shape[0]] # cv2.imshow("rgb", rgb) # cv2.waitKey(1) # meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.mat') # meta = scipy.io.loadmat(meta_file) meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.json') print('opening ', meta_file) with open(meta_file, 'r') as f: meta = json.load(f) K = np.array(meta['intrinsic_matrix']).reshape(3, 3) # print('K',K) poses = np.array(meta['poses']).reshape(4, 4) R = poses[:3, :3] # print ('R',R) t = poses[:3, 3] t /= 1000. # print('t',t) # update with tuning Rt44 = np.eye(4) Rt44[:3, :3] = R Rt44[:3, 3] = t Rt44 = np.dot(Rt44, TT) R = Rt44[:3, :3] t = Rt44[:3, 3] mdl_proj, mdl_proj_depth = renderer.render(model, im_size, K, R, t, mode='rgb+depth', clip_near=.3, clip_far=6., shading='flat') # print("dtype", mdl_proj.dtype) # print("max min", np.amax(mdl_proj), np.amin(mdl_proj)) # cv2.imshow('model', mdl_proj) # cv2.waitKey(1) # depth format is int16 # convert depth (see PCNN train_net.py) factor_depth = 10000 zfar = 6.0 znear = 0.25 im_depth_raw = factor_depth * 2 * zfar * znear / ( zfar + znear - (zfar - znear) * (2 * mdl_proj_depth - 1)) I = np.where(mdl_proj_depth == 1) im_depth_raw[I[0], I[1]] = 0 depth_file = os.path.join(GEN_ROOT, '{:06d}-depth.png'.format(i)) cv2.imwrite(depth_file, im_depth_raw.astype(np.uint16)) print('writing depth ' + depth_file) label_file = os.path.join(GEN_ROOT, '{:06d}-label.png'.format(i)) # process the label image i.e. achieve nonzero pixel, then cast to cls_id value I = np.where(mdl_proj_depth > 0) # print('I shape',I.shape) label = np.zeros((rgb.shape[0], rgb.shape[1])) if len(I[0]) > 0: print('len I0', len(I[0])) print('label is exported') label[I[0], I[1]] = 1 cv2.imwrite(label_file, label.astype(np.uint8)) print('writing label ' + label_file) blend_name = os.path.join(GEN_ROOT, "{:06d}-blend.png".format(i)) gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY) mdl_proj_g = cv2.cvtColor(mdl_proj, cv2.COLOR_BGR2GRAY) alf = .5 bet = 1 - alf bld = cv2.addWeighted(mdl_proj_g, alf, gray, bet, 0.) cv2.imwrite(blend_name, bld) cv2.imshow('blend', bld) cv2.waitKey(1) print('writing blend ' + blend_name) # revise pose json -> unit of pose is now in meter # save meta_data meta_file_rev = os.path.join(p0, '{:06d}'.format(i) + '-meta_rev.json') meta['poses'] = Rt44.flatten().tolist() with open(meta_file_rev, 'w') as fp: json.dump(meta, fp) print('writing meta ', meta_file_rev)
# Sample views views, views_level = view_sampler.sample_views(min_n_views, radius, azimuth_range, elev_range) print('Sampled views: ' + str(len(views))) view_sampler.save_vis(out_views_vis_mpath.format(str(radius)), views, views_level) # Render the object model from all the views for view_id, view in enumerate(views): if view_id % 10 == 0: print('obj,radius,view: ' + str(obj_id) + ',' + str(radius) + ',' + str(view_id)) # Render depth image depth = renderer.render(model, par['cam']['im_size'], par['cam']['K'], view['R'], view['t'], clip_near, clip_far, mode='depth') # Convert depth so it is in the same units as the real test images depth /= par['cam']['depth_scale'] # Render RGB image rgb = renderer.render(model, im_size_rgb, K_rgb, view['R'], view['t'], clip_near, clip_far, texture=model_texture, ambient_weight=ambient_weight, shading=shading, mode='rgb') # The OpenCV function was used for rendering of the training images # provided for the SIXD Challenge 2017. rgb = cv2.resize(rgb, par['cam']['im_size'], interpolation=cv2.INTER_AREA) #rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic')
im_ids = sorted(gts.keys()) gt_stats = {} for im_id in im_ids: print('dataset: {}, scene/obj: {}, im: {}'.format(dataset, data_id, im_id)) K = info[im_id]['cam_K'] depth_path = dp[depth_mpath_key].format(data_id, im_id) depth_im = inout.load_depth(depth_path) depth_im *= dp['cam']['depth_scale'] # to [mm] im_size = (depth_im.shape[1], depth_im.shape[0]) gt_stats[im_id] = [] for gt_id, gt in enumerate(gts[im_id]): depth_gt = renderer.render(models[gt['obj_id']], im_size, K, gt['cam_R_m2c'], gt['cam_t_m2c'], mode='depth') # Get distance images dist_gt = misc.depth_im_to_dist_im(depth_gt, K) dist_im = misc.depth_im_to_dist_im(depth_im, K) # Estimation of visibility mask visib_gt = visibility.estimate_visib_mask_gt(dist_im, dist_gt, delta) # Visible surface fraction obj_mask_gt = dist_gt > 0 px_count_valid = np.sum(dist_im[obj_mask_gt] > 0) px_count_visib = visib_gt.sum() px_count_all = obj_mask_gt.sum() if px_count_all > 0:
if pose['R'].size != 0 and pose['t'].size != 0: # Transfom the GT pose R_m2c = pose['R'].dot(R_conv) t_m2c = pose['t'] * 1000 # from [m] to [mm] # Get 2D bounding box of the object model at the ground truth pose obj_bb = misc.calc_pose_2d_bbox(model, par['cam']['im_size'], par['cam']['K'], R_m2c, t_m2c) # Visualisation if False: rgb = inout.load_im(rgb_mpath.format(im_id, im_id)) ren_rgb = renderer.render(model, par['cam']['im_size'], par['cam']['K'], R_m2c, t_m2c, mode='rgb') vis_rgb = 0.4 * rgb.astype(np.float32) + 0.6 * ren_rgb.astype( np.float32) vis_rgb = vis_rgb.astype(np.uint8) vis_rgb = misc.draw_rect(vis_rgb, obj_bb) plt.imshow(vis_rgb) plt.show() scene_gt.setdefault(im_id, []).append({ 'obj_id': obj_id, 'cam_R_m2c': R_m2c.flatten().tolist(), 'cam_t_m2c':
for i in range(len(gt_poses)): RT = np.array(gt_poses[i]).reshape(4, 4) R = RT[:3, :3] t = RT[:3, 3] * .001 # to meter Rs.append(R) ts.append(t) print(R) print(t) size = (im_size[1], im_size[0]) rgb, dpt, lbl = renderer.render(model, size, K, Rs, ts, mode='rgb+depth+label', clip_near=.3, clip_far=6., shading='flat') np.save('lbl.npy', lbl) print('lbl.npy is saved to disk.') im_rescale_factor = cfg.IMG_RESCALE_FACTOR rgb = cv2.resize(rgb, None, fx=im_rescale_factor, fy=im_rescale_factor, interpolation=cv2.INTER_LINEAR) dpt = cv2.resize(dpt,