def __init__(self, path, background_path, data_indices, img_height = 480, img_width = 640, n_class=9, gaussian_noise=False, gamma_augmentation=False, avaraging=False, salt_pepper_noise=False, contrast=False, random_iteration=False, bg_flip = True, channel_swap=False, metric_filter=1.0): self.base_path = path self.n_class = n_class self.data_indices = data_indices self.img_height = img_height self.img_width = img_width ## augmentation option self.gaussian_noise = gaussian_noise self.gamma_augmentation = gamma_augmentation self.avaraging = avaraging self.salt_pepper_noise =salt_pepper_noise self.contrast = contrast if self.contrast: self.contrast_server = preprocess_utils.ContrastAugmentation() self.bg_flip = bg_flip self.bg_fpaths = glob.glob(os.path.join(background_path, '*.jpg')) self.random_iteration = random_iteration ## load models model_fpath_mask = os.path.join(path, 'models_ply', '{0}.ply') dummy_model_fpath_mask = os.path.join(path, 'dummy_models_ply', '{0}.ply') self.objs = ['Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher'] self.dummy_objs = ['benchviseblue', 'cam', 'iron', 'lamp', 'phone'] self.models = [] self.dummy_models = [] for obj in self.objs: print 'Loading data:', obj model_fpath = model_fpath_mask.format(obj) self.models.append(inout.load_ply(model_fpath)) for obj in self.dummy_objs: print 'Loading data:', obj model_fpath = dummy_model_fpath_mask.format(obj) self.dummy_models.append(inout.load_ply(model_fpath)) self.K = np.array([[572.41140, 0, 325.26110], [0, 573.57043, 242.04899], [0, 0, 0]]) self.channel_swap = channel_swap self.metric_filter = metric_filter
def main(): parser = argparse.ArgumentParser(description='Data Augmentation Samples') parser.add_argument( '--path', '-p', default="../../train_data/OcclusionChallengeICCV2015/models_ply", help='Path to ply data') parser.add_argument('--out', '-o', default="output.png", help='output image name') parser.add_argument( '--objs', default=[ 'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher' ], ) args = parser.parse_args() # load object ply models = [] for obj_name in args.objs: print 'load ply : ', obj_name models.append( inout.load_ply(os.path.join(args.path, '{}.ply'.format(obj_name)))) # Camera parameters K = np.eye(3) K[0, 0] = 500.0 # fx K[1, 1] = 500.0 # fy K[0, 2] = 250.0 # cx K[1, 2] = 250.0 # cy im_size = (500, 500) # pose R = np.zeros((3, 3, 3)) R[0] = transform.rotation_matrix(np.pi, (1, 0, 0))[:3, :3] R[1] = transform.rotation_matrix(np.pi, (1, 0.5, 0.5))[:3, :3] R[2] = transform.rotation_matrix(np.pi, (1, 0, 1))[:3, :3] t = np.array([[0, 0, 0.3]]).T # output fig fig = plt.figure(figsize=(len(models), 3)) for i in range(len(models)): for j in range(3): depth = renderer.render(models[i], im_size, K, R[j], t, 0.1, 2.0, mode='depth') ax = fig.add_subplot(3, len(models), i + len(models) * j + 1) ax.imshow(depth) plt.show()
def __init__(self, path, objs_indices, background_path, models_path=None, img_height=480, img_width=640, gaussian_noise=False, gamma_augmentation=False, avaraging=False, salt_pepper_noise=False, contrast=False, bg_flip=True, channel_swap=True, random_iteration=False, mode='test', interval=1, resize_rate=0.5, iteration_per_epoch=1000, metric_filter=1.0): super(LinemodSIXDRenderingDataset, self).__init__(path, objs_indices, background_path, img_height=img_height, img_width=img_width, gaussian_noise=gaussian_noise, gamma_augmentation=gamma_augmentation, avaraging=False, salt_pepper_noise=salt_pepper_noise, contrast=contrast, mode=mode, interval=interval, resize_rate=resize_rate, random_iteration=random_iteration, iteration_per_epoch=iteration_per_epoch, bg_flip=bg_flip, channel_swap=channel_swap, load_poses=False, metric_filter=metric_filter) if models_path is None: models_path = os.path.join(path, 'models') models_fpath_mask = os.path.join(models_path, 'obj_{0:0>2}.ply') self.models = [] for obj in objs_indices: print 'Loading data: obj_{0:0>2}'.format(obj) model_fpath = models_fpath_mask.format(obj) self.models.append(inout.load_ply(model_fpath)) self.K = np.array([[572.41140, 0, 325.26110], [0, 573.57043, 242.04899], [0, 0, 0]]) self.iteration_per_epoch = iteration_per_epoch
def main(): # Camera parameters im_size = (640, 480) K = np.array([[572.41140, 0, 325.26110], [0, 573.57043, 242.04899], [0, 0, 0]]) objs = ['Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher'] model_fpath_mask = os.path.join('../../train_data/OcclusionChallengeICCV2015', 'models_ply', '{0}.ply') pose_fpath_mask = os.path.join('../../train_data/OcclusionChallengeICCV2015', 'poses', '{0}', '*.txt') models = [] gt_poses = [] for obj in objs: print 'Loading data:', obj model_fpath = model_fpath_mask.format(obj) models.append(inout.load_ply(model_fpath)) gt_fpaths = sorted(glob.glob(pose_fpath_mask.format(obj))) gt_poses_obj = [] for gt_fpath in gt_fpaths: gt_poses_obj.append( inout.load_gt_pose_dresden(gt_fpath)) gt_poses.append(gt_poses_obj) fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 10)) repeats = 100 for i in range(repeats): print "multi object rendering {} / {}".format(i, repeats) rot_list = [] pos_list = [] indices = np.random.choice(len(objs), 1) model_list = [] labels = [] for obj_id, obj_name in enumerate(objs): # pos = np.random.rand(3) - 0.5 # pos = np.zeros(3) # pos[2] += -2.0 # pos = np.array([0,0,-1.5]).T # rot = np.eye(3) pose = gt_poses[obj_id][int(i)] if pose['R'].size != 0 and pose['t'].size != 0: pos = pose['t'] rot = pose['R'] pos_list.append(pos) rot_list.append(rot) model_list.append(models[obj_id]) # rgb_ren, depth_ren = renderer.render( # models[obj_id], im_size, K, rot, pos, 0.1, 4.0, mode='rgb+depth') labels.append(obj_id + 1) rgb_ren, depth_ren, label_ren = multi_object_renderer.render( model_list, im_size, K, rot_list, pos_list, 0.1, 4.0, labels=labels, mode='rgb+depth+label') label_img = np.zeros((im_size[1], im_size[0], 3)) n_colors = len(DEFAULT_COLORS) for lbl_id in labels: color = color_dict[DEFAULT_COLORS[lbl_id % n_colors]] label_img[(label_ren == lbl_id), :] = color # Clear axes for ax in axes.flatten(): ax.clear() axes[0].imshow(rgb_ren.astype(np.uint8)) axes[0].set_title('RGB image') axes[1].imshow(depth_ren) axes[1].set_title('Depth image') axes[2].imshow(label_img) axes[2].set_title('Label image') fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, hspace=0.15, wspace=0.15) plt.draw() # # plt.pause(0.01) plt.waitforbuttonpress()
def main(): parser = argparse.ArgumentParser() parser.add_argument('model_file') parser.add_argument('--auto-data', dest='auto_data', action='store_true') parser.set_defaults(auto_data=False) parser.add_argument('-g', '--gpu', default=-1, type=int, help='if -1, use cpu only (default: 0)') args = parser.parse_args() alpha=0.3 image_alpha=1 render_eps = 0.015 data_path = '../../train_data/linemodSIXD2017' bg_path = '../../train_data/VOCdevkit/VOC2012/JPEGImages' ## delta for visibility correspondance delta = 0.015 # [m] # objs =['background', 'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher'] objs = np.arange(15) + 1 n_class = len(objs) + 1 distance_sanity = 0.05 min_distance= 0.005 output_scale = 0.14 prob_eps = 0.4 eps = 0.05 im_size=(640, 480) interval = 15 K_orig = np.array([[572.41140000, 0.00000000, 325.26110000], [0.00000000, 573.57043000, 242.04899000], [0.00000000, 0.00000000, 1.00000000]]) ## load object models obj_model_fpath_mask = os.path.join(data_path, 'models', 'obj_{0:0>2}.ply') obj_models = [] for obj in objs: if obj != 'background': print 'Loading data: obj_{0}'.format(obj) obj_model_fpath = obj_model_fpath_mask.format(obj) obj_models.append(inout.load_ply(obj_model_fpath)) ## load network model model = DualCenterProposalNetworkRes50_predict7(n_class=n_class) chainer.serializers.load_npz(args.model_file, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() test = LinemodSIXDExtendedDataset(data_path, objs, mode='test', interval=interval, metric_filter=output_scale + eps) fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 10)) im_ids = range(test.__len__()) # pose estimator instance # pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity, # base_path=data_path, # min_distance=min_distance, eps=eps, im_size=im_size) pei = PoseEstimationInterface(objs= ['obj_{0:0>2}'.format(i) for i in objs], base_path=data_path, distance_sanity=distance_sanity, model_scale = 1000.0, model_partial= 1, min_distance=min_distance, eps=prob_eps, im_size=im_size) imagenet_mean = np.array( [103.939, 116.779, 123.68], dtype=np.float32)[np.newaxis, np.newaxis, :] colors= ('grey', 'red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink', 'yellowgreen', 'blueviolet', 'brown', 'darkmagenta', 'aqua', 'crimson') # save output img save_image = False if save_image: save_dir = os.path.join("bvise_demo_data") if not os.path.exists(save_dir): os.makedirs(save_dir) for im_id in im_ids: print "executing {0} / {1}".format(im_id, test.__len__()) img_rgb, label, img_depth, img_cp, img_ocp, pos, rot, pc, obj_mask, nonnan_mask, K = test.get_example(im_id) x_data = np.expand_dims(img_rgb, axis=0) with chainer.no_backprop_mode(): if args.gpu >= 0: x_data = cuda.to_gpu(x_data) x = chainer.Variable(x_data) with chainer.using_config('train', False): y_cls_d, y_cp_d, y_ocp_d = model(x) cls_pred = chainer.functions.argmax(y_cls_d, axis=1)[0] cls_prob = chainer.functions.max(y_cls_d, axis=1)[0] cls_pred = chainer.cuda.to_cpu(cls_pred.data) cls_prob = chainer.cuda.to_cpu(cls_prob.data) y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0] y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0] y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0] cls_pred[cls_prob < prob_eps] = 0 y_pos, y_rot = pei.execute(y_cls, y_cp * output_scale, y_ocp * output_scale, img_depth, K) img_rgb = (img_rgb.transpose(1,2,0) * 255.0 + imagenet_mean).astype(np.uint8) img_rgb_resize = cv2.resize(img_rgb, (img_rgb.shape[1] / 2, img_rgb.shape[0] / 2)) # gray = img_as_float(rgb2gray(img_rgb_resize)) gray = img_as_float(rgb2gray(img_rgb)) gray = gray2rgb(gray) * image_alpha + (1 - image_alpha) cls_gt = np.zeros_like(img_rgb_resize) cls_mask = np.zeros_like(img_rgb_resize) # cls ground truth for cls_id, cls in enumerate(objs): if cls_id != 0: color = color_dict[colors[cls_id]] acls = (cls_pred == cls_id)[:, :, np.newaxis] * color acls_gt = (label == cls_id)[:, :, np.newaxis] * color cls_mask = cls_mask + acls cls_gt = cls_gt + acls_gt cls_mask = cv2.resize((cls_mask * 255).astype(np.uint8), im_size, interpolation=cv2.INTER_NEAREST) cls_gt = cv2.resize((cls_gt * 255).astype(np.uint8), im_size, interpolation=cv2.INTER_NEAREST) cls_vis_gt = cls_gt * alpha + gray * (1 - alpha) cls_vis_gt = (cls_vis_gt * 255).astype(np.uint8) cls_vis = cls_mask * alpha + gray * (1 - alpha) cls_vis = (cls_vis * 255).astype(np.uint8) pose_vis_gt = (gray.copy() * 255).astype(np.uint8) pose_vis_pred = (gray.copy() * 255).astype(np.uint8) vis_render = True for i in six.moves.range(n_class - 1): if np.sum(pos[i]) != 0 and np.sum(rot[i]) != 0: pos_diff = np.linalg.norm(y_pos[i] - pos[i]) quat = quaternion.from_rotation_matrix(np.dot(y_rot[i].T, rot[i])) quat_w = min(1, abs(quat.w)) diff_angle = np.rad2deg(np.arccos(quat_w)) * 2 print "obj_{0:0>2} : position_diff = {1}, rotation_diff = {2}".format(i + 1, pos_diff, diff_angle) if not vis_render: continue # Render the object model img_depth_resize = cv2.resize(img_depth, im_size) if np.sum(rot[i]) != 0: ren_gt = renderer.render( obj_models[i], im_size, K_orig, rot[i], (pos[i].T * 1000), 100, 4000, mode='rgb+depth') mask = np.logical_and((ren_gt[1] != 0), np.logical_or((ren_gt[1] / 1000.0 < img_depth_resize + render_eps), (img_depth_resize == 0))) pose_vis_gt[mask, :] = ren_gt[0][mask] if np.sum(y_rot[i]) != 0: ren_pred = renderer.render( obj_models[i], im_size, K_orig, y_rot[i], (y_pos[i].T * 1000), 100, 4000, mode='rgb+depth') mask = np.logical_and((ren_pred[1] != 0), np.logical_or((ren_pred[1] / 1000.0 < img_depth_resize + render_eps), (img_depth_resize == 0))) pose_vis_pred[mask, :] = ren_pred[0][mask] if save_image: cv2.imwrite(os.path.join(save_dir, "rgb_{0:0>4}.png".format(im_id)), img_rgb.astype(np.uint8)) cv2.imwrite(os.path.join(save_dir, "mask_vis_{0:0>4}.png".format(im_id)), cls_vis[:, :, ::-1].astype(np.uint8)) cv2.imwrite(os.path.join(save_dir, "pose_{0:0>4}.png".format(im_id)), pose_vis_pred[:, :, ::-1].astype(np.uint8)) cv2.imwrite(os.path.join(save_dir, "gt_pose_{0:0>4}.png".format(im_id)), pose_vis_gt[:, :, ::-1].astype(np.uint8)) # Clear axes for ax in axes.flatten(): ax.clear() axes[0, 0].imshow(img_rgb[:,:,::-1].astype(np.uint8)) axes[0, 0].set_title('RGB image') axes[0, 1].imshow(img_depth) axes[0, 1].set_title('Depth image') axes[1, 0].imshow(cls_vis_gt) axes[1, 0].set_title('class gt') axes[1, 1].imshow(cls_vis) axes[1, 1].set_title('class pred') axes[2, 0].imshow(pose_vis_gt) axes[2, 0].set_title('pose gt vis') axes[2, 1].imshow(pose_vis_pred) axes[2, 1].set_title('pose pred vis') fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, hspace=0.15, wspace=0.15) plt.draw() plt.pause(0.01) plt.waitforbuttonpress()
def main(): parser = argparse.ArgumentParser(description='generate mask image of LinemodSIXD dataset') parser.add_argument('--mode', default='train', help='select train or test') args = parser.parse_args() mode = args.mode if not mode in ['train', 'test']: sys.stderr.write("Error: mode should be 'train' or 'test', but mode is {}\n".format(mode)) sys.exit() objs = np.arange(15) + 1 im_size = (640, 480) data_basepath = '../train_data/linemodSIXD2017' model_fpath_mask = os.path.join(data_basepath, 'models', 'obj_{0:0>2}.ply') rgb_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'rgb', '{1:0>4}.png') depth_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'depth', '{1:0>4}.png') gt_poses_mask = os.path.join(data_basepath, mode, '{0:0>2}','gt.yml') info_mask = os.path.join(data_basepath, mode, '{0:0>2}','info.yml') gt_mask_dir = os.path.join(data_basepath, mode, '{0:0>2}', 'mask') mask_fpath_mask = os.path.join(data_basepath, mode, '{0:0>2}', 'mask', '{1:0>4}_{2:0>2}.png') # load object models models = [] if mode == 'test': for obj in objs: print 'Loading data: {0:0>2}'.format(obj) model_fpath = model_fpath_mask.format(obj) models.append(inout.load_ply(model_fpath)) ## delta for visibility correspondance delta = 15 # [mm] for scene_id in six.moves.range(len(objs)): gt_poses = yaml.load(open(gt_poses_mask.format(objs[scene_id]))) info = yaml.load(open(info_mask.format(objs[scene_id]))) if not os.path.exists(gt_mask_dir.format(objs[scene_id])): os.makedirs(gt_mask_dir.format(objs[scene_id])) n_imgs = len(glob.glob(rgb_fpath_mask.format(objs[scene_id], '****'))) for im_id in six.moves.range(n_imgs): print "scene : obj_{0:0>2}, image id : {1}".format(objs[scene_id], im_id) if mode == 'test': depth_fpath = depth_fpath_mask.format(objs[scene_id], im_id) depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32) depth = preprocess_utils.depth_inpainting(depth) K = np.asarray(info[im_id]['cam_K']).reshape(3,3) # Convert the input depth image to a distance image dist = depth_im_to_dist_im(depth, K) poses = gt_poses[im_id] for pose in poses: obj_id = pose['obj_id'] rot = np.asarray(pose['cam_R_m2c']).reshape(3, 3) trans = np.asarray(pose['cam_t_m2c']).T # Render the object model depth_ren_gt = renderer.render( models[obj_id - 1], im_size, K, rot, trans, 100, 2500, mode='depth') depth_ren_gt = depth_ren_gt dist_ren_gt = depth_im_to_dist_im(depth_ren_gt, K) visib_mask = estimate_visib_mask(dist, dist_ren_gt, delta) cv2.imwrite(mask_fpath_mask.format(objs[scene_id], im_id, obj_id), visib_mask * 255) elif mode == 'train': depth_fpath = depth_fpath_mask.format(objs[scene_id], im_id) depth = cv2.imread(depth_fpath, cv2.IMREAD_UNCHANGED).astype(np.float32) mask = (depth > 0) * 255 cv2.imwrite(mask_fpath_mask.format(objs[scene_id], im_id, objs[scene_id]), mask)
def main(): parser = argparse.ArgumentParser() parser.add_argument('model_file') parser.add_argument('--auto-data', dest='auto_data', action='store_true') parser.set_defaults(auto_data=False) parser.add_argument('-g', '--gpu', default=-1, type=int, help='if -1, use cpu only (default: 0)') args = parser.parse_args() alpha = 0.3 image_alpha = 1 render_eps = 0.015 data_path = '../../train_data/OcclusionChallengeICCV2015' bg_path = '../../train_data/VOCdevkit/VOC2012/JPEGImages' ## delta for visibility correspondance delta = 0.015 # [m] objs = [ 'background', 'Ape', 'Can', 'Cat', 'Driller', 'Duck', 'Eggbox', 'Glue', 'Holepuncher' ] n_class = len(objs) distance_sanity = 0.05 min_distance = 0.005 output_scale = 0.12 eps = 0.2 im_size = (640, 480) # im_size=(512, 384) ## load object models obj_model_fpath_mask = os.path.join(data_path, 'models_ply', '{0}.ply') obj_models = [] for obj in objs: if obj != 'background': print 'Loading data:', obj obj_model_fpath = obj_model_fpath_mask.format(obj) obj_models.append(inout.load_ply(obj_model_fpath)) ## load network model model = DualCenterProposalNetworkRes50_predict7(n_class=n_class) chainer.serializers.load_npz(args.model_file, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() if args.auto_data: test_range = np.arange(100) test = DualCPNetAutoGenerateDataset(data_path, bg_path, test_range, random_iteration=True, metric_filter=output_scale + eps) else: train_range, test_range = train_test_split(np.arange(1213), test_size=100, random_state=1234) test = DualCPNetDataset(data_path, test_range, img_height=im_size[1], img_width=im_size[0]) # pose estimator instance pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity, base_path=data_path, min_distance=min_distance, eps=eps, im_size=im_size) fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 10)) n_colors = len(DEFAULT_COLORS) im_ids = range(test.__len__()) imagenet_mean = np.array([103.939, 116.779, 123.68], dtype=np.float32)[np.newaxis, np.newaxis, :] for im_id in im_ids: print "executing {0} / {1}".format(im_id, test.__len__()) img_rgb, label, img_depth, img_cp, img_ocp, pos, rot, pc, obj_mask, nonnan_mask, K = test.get_example( im_id) x_data = np.expand_dims(img_rgb, axis=0) with chainer.no_backprop_mode(): if args.gpu >= 0: x_data = cuda.to_gpu(x_data) x = chainer.Variable(x_data) with chainer.using_config('train', False): y_cls_d, y_cp_d, y_ocp_d = model(x) cls_pred = chainer.functions.argmax(y_cls_d, axis=1)[0] cls_pred = chainer.cuda.to_cpu(cls_pred.data) y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0] y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0] y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0] estimated_ocp, estimated_R = pei.execute(y_cls, y_cp * output_scale, y_ocp * output_scale, img_depth, K) img_rgb = (img_rgb.transpose(1, 2, 0) * 255.0 + imagenet_mean).astype( np.uint8) img_rgb_resize = cv2.resize( img_rgb, (img_rgb.shape[1] / 2, img_rgb.shape[0] / 2)) gray = img_as_float(rgb2gray(img_rgb_resize)) gray = gray2rgb(gray) * image_alpha + (1 - image_alpha) cls_gt = np.zeros_like(img_rgb_resize) cls_mask = np.zeros_like(img_rgb_resize) # cls ground truth for cls_id, cls in enumerate(objs): if cls_id != 0: color = color_dict[DEFAULT_COLORS[cls_id % n_colors]] acls = (cls_pred == cls_id)[:, :, np.newaxis] * color acls_gt = (label == cls_id)[:, :, np.newaxis] * color cls_mask = cls_mask + acls cls_gt = cls_gt + acls_gt cls_vis_gt = cls_gt * alpha + gray * (1 - alpha) cls_vis_gt = (cls_vis_gt * 255).astype(np.uint8) cls_vis = cls_mask * alpha + gray * (1 - alpha) cls_vis = (cls_vis * 255).astype(np.uint8) pose_mask_gt = np.zeros_like(img_rgb_resize) pose_mask_pred = np.zeros_like(img_rgb_resize) pose_vis_gt = (gray.copy() * 255).astype(np.uint8) pose_vis_pred = (gray.copy() * 255).astype(np.uint8) for obj_id, obj_name in enumerate(objs): if obj == 'background': continue # Render the object model if np.sum(rot[obj_id - 1]) != 0: ren_gt = renderer.render(obj_models[obj_id - 1], (im_size[0] / 2, im_size[1] / 2), K, rot[obj_id - 1], pos[obj_id - 1].T, 0.1, 4.0, mode='rgb+depth') mask = np.logical_and( (ren_gt[1] != 0), np.logical_or((ren_gt[1] < img_depth + render_eps), (img_depth == 0))) pose_vis_gt[mask, :] = ren_gt[0][mask] if np.sum(estimated_R[obj_id - 1]) != 0: ren_pred = renderer.render(obj_models[obj_id - 1], (im_size[0] / 2, im_size[1] / 2), K, estimated_R[obj_id - 1], estimated_ocp[obj_id - 1].T, 0.1, 4.0, mode='rgb+depth') mask = np.logical_and( (ren_pred[1] != 0), np.logical_or((ren_pred[1] < img_depth + render_eps), (img_depth == 0))) pose_vis_pred[mask, :] = ren_pred[0][mask] # Clear axes for ax in axes.flatten(): ax.clear() axes[0, 0].imshow(img_rgb[:, :, ::-1].astype(np.uint8)) axes[0, 0].set_title('RGB image') axes[0, 1].imshow(img_depth) axes[0, 1].set_title('Depth image') axes[1, 0].imshow(cls_vis_gt) axes[1, 0].set_title('class gt') axes[1, 1].imshow(cls_vis) axes[1, 1].set_title('class pred') axes[2, 0].imshow(pose_vis_gt) axes[2, 0].set_title('pose gt vis') axes[2, 1].imshow(pose_vis_pred) axes[2, 1].set_title('pose pred vis') fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, hspace=0.15, wspace=0.15) plt.draw() # # plt.pause(0.01) plt.waitforbuttonpress()
def main(): parser = argparse.ArgumentParser() parser.add_argument('model_file') parser.add_argument('-g', '--gpu', default=0, type=int, help='if -1, use cpu only (default: 0)') args = parser.parse_args() alpha = 0.3 image_alpha = 1 render_eps = 0.015 data_path = '../../train_data/linemodSIXD2017' ## delta for visibility correspondance delta = 0.015 # [m] objs = np.arange(15) + 1 n_class = len(objs) + 1 distance_sanity = 0.02 min_distance = 0.005 output_scale = 0.14 prob_eps = 0.4 eps = 0.02 im_size = (640, 480) interval = 15 ## load object models obj_model_fpath_mask = os.path.join(data_path, 'models', 'obj_{0:0>2}.ply') obj_models = [] for obj in objs: if obj != 'background': print 'Loading data: obj_{0}'.format(obj) obj_model_fpath = obj_model_fpath_mask.format(obj) obj_models.append(inout.load_ply(obj_model_fpath)) ## load network model model = DualCenterProposalNetworkRes50_predict7(n_class=n_class) chainer.serializers.load_npz(args.model_file, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # pose estimator instance # pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity, # min_distance=min_distance, eps=prob_eps) pei = PoseEstimationInterface(objs=['obj_{0:0>2}'.format(i) for i in objs], base_path=data_path, n_ransac=500, distance_sanity=distance_sanity, model_scale=1000.0, model_partial=1, min_distance=min_distance, eps=prob_eps, im_size=im_size) scores_pos = [] scores_rot = [] ids_arr = [] scores_5cm5deg = [] scores_6dpose = [] test_objs = np.delete(np.arange(15) + 1, [2, 6]) for obj_id in test_objs: test = LinemodSIXDSingleInstanceDataset(data_path, obj_id, mode='test', interval=interval, metric_filter=output_scale + eps) im_ids = test.__len__() # im_ids = 50 detect_cnt = 0 cnt_5cm5deg = 0 cnt_6dpose = 0 sum_pos = 0 sum_rot = 0 for im_id in tqdm.trange(im_ids): # print "executing {0} / {1}".format(im_id, test.__len__()) img_rgb, img_depth, pos, rot, K = test.get_example(im_id) x_data = np.expand_dims(img_rgb, axis=0) with chainer.no_backprop_mode(): if args.gpu >= 0: x_data = cuda.to_gpu(x_data) x = chainer.Variable(x_data) with chainer.using_config('train', False): y_cls_d, y_cp_d, y_ocp_d = model(x) y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0] y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0] y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0] y_pos, y_rot = pei.execute(y_cls, y_cp * output_scale, y_ocp * output_scale, img_depth, K, estimate_idx=[obj_id - 1]) for i in six.moves.range(n_class - 1): if np.sum(pos[i]) != 0 and np.sum(rot[i]) != 0: # 5cm 5deg metric pos_diff = np.linalg.norm(y_pos[i] - pos[i]) quat = quaternion.from_rotation_matrix( np.dot(y_rot[i].T, rot[i])) quat_w = min(1, abs(quat.w)) diff_angle = np.rad2deg(np.arccos(quat_w)) * 2 # print "obj_{0:0>2} : position_diff = {1}, rotation_diff = {2}".format(i + 1, pos_diff, diff_angle) if i + 1 == obj_id and pos_diff < 1.0: sum_pos += pos_diff sum_rot += diff_angle detect_cnt += 1 if pos_diff < 0.05 and diff_angle < 5: cnt_5cm5deg += 1 # 6d pose metric model_pts = obj_models[i]['pts'].transpose(1, 0) gt_proj = np.dot(rot[i].T, model_pts) + pos[i, :, np.newaxis] pred_proj = np.dot(y_rot[i].T, model_pts) + pos[i, :, np.newaxis] diff_mean = np.mean( np.linalg.norm((gt_proj - pred_proj), axis=0)) if i + 1 == obj_id and pos_diff < 1.0 and diff_mean < 0.1 * object_diameters[ i]: cnt_6dpose += 1 print "obj_{0:0>2} : detection_rate = {1},\n position_diff = {2}, rotation_diff = {3}, 5cm5deg = {4}, 6d pose = {5}" \ .format(obj_id, detect_cnt / (1.0 * im_ids), sum_pos / detect_cnt, sum_rot / detect_cnt, cnt_5cm5deg / (1.0 * im_ids), cnt_6dpose / (1.0 * im_ids)) scores_pos.append(sum_pos / im_ids) scores_rot.append(sum_rot / im_ids) scores_5cm5deg.append(cnt_5cm5deg) scores_6dpose.append(cnt_6dpose) ids_arr.append(im_ids) print "-- results --" print scores_pos print scores_rot print "-- total results --" ids_arr = np.asarray(ids_arr) scores_5cm5deg = np.asarray(scores_5cm5deg) ave_5cm5deg = np.sum(scores_5cm5deg) / (1.0 * np.sum(ids_arr)) ave_6dpose = np.sum(scores_6dpose) / (1.0 * np.sum(ids_arr)) print "5cm5deg metric : {}".format(ave_5cm5deg) print "6d pose metric : {}".format(ave_6dpose) # scores_pos = scores_pos * 1000 # m -> mm scores_5cm5deg = scores_5cm5deg / ids_arr scores_6dpose = scores_6dpose / ids_arr scores_ave = np.array([ave_5cm5deg, ave_6dpose]) if not os.path.exists('eval_results'): os.makedirs('eval_results') np.save('eval_results/scores_pos.npy', scores_pos) np.save('eval_results/scores_rot.npy', scores_rot) np.save('eval_results/scores_5cm5deg.npy', scores_5cm5deg) np.save('eval_results/scores_6dpose.npy', scores_6dpose) np.save('eval_results/scores_ave.npy', scores_ave)