def _write_background_images(self): cache_file = os.path.join(self._cache_path, self._name + '_backgrounds.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: self._backgrounds = cPickle.load(fid) if self._name != 'lov_train': cache_file_lov = os.path.join(self._cache_path, 'lov_train_backgrounds.pkl') if os.path.exists(cache_file_lov): with open(cache_file_lov, 'rb') as fid: backgrounds_lov = cPickle.load(fid) self._backgrounds = self._backgrounds + backgrounds_lov print '{} backgrounds loaded from {}, {} images'.format(self._name, cache_file, len(self._backgrounds)) return print "building background images" outdir = os.path.join(self._cache_path, self._name + '_backgrounds') if not os.path.exists(outdir): os.mkdir(outdir) num = 1000 perm = np.random.permutation(np.arange(len(self._roidb))) perm = perm[:num] print len(perm) backgrounds = [None]*num kernel = np.ones((50, 50), np.uint8) for i in xrange(num): index = perm[i] # rgba rgba = pad_im(cv2.imread(self._roidb[index]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # generate background image mask = pad_im(cv2.imread(self._roidb[index]['label'], cv2.IMREAD_UNCHANGED), 16) index = np.where(mask > 0) mask[index[0], index[1]] = 1 mask = cv2.dilate(mask, kernel) background = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # write the image filename = os.path.join(self._cache_path, self._name + '_backgrounds', '%04d.jpg' % (i)) cv2.imwrite(filename, background) backgrounds[i] = filename self._backgrounds = backgrounds print "build background images finished" with open(cache_file, 'wb') as fid: cPickle.dump(backgrounds, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote backgrounds to {}'.format(cache_file)
def callback_rgbd(self, rgb, depth): if depth.encoding == '32FC1': depth_cv = self.cv_bridge.imgmsg_to_cv2(depth) elif depth.encoding == '16UC1': depth_cv = self.cv_bridge.imgmsg_to_cv2(depth).copy().astype(np.float32) depth_cv /= 1000.0 else: rospy.logerr_throttle( 1, 'Unsupported depth type. Expected 16UC1 or 32FC1, got {}'.format( depth.encoding)) return im = self.cv_bridge.imgmsg_to_cv2(rgb, 'bgr8') # rescale image if necessary if cfg.TEST.SCALES_BASE[0] != 1: im_scale = cfg.TEST.SCALES_BASE[0] im = pad_im(cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR), 16) depth_cv = pad_im(cv2.resize(depth_cv, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST), 16) with lock: self.im = im.copy() self.depth = depth_cv.copy() self.rgb_frame_id = rgb.header.frame_id self.rgb_frame_stamp = rgb.header.stamp
def _get_image_blob(self, color_file, depth_file, scale_ind): # rgba rgba = pad_im(cv2.imread(color_file, cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] if im_scale != 1.0: im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) height = im.shape[0] width = im.shape[1] # chromatic transform if cfg.TRAIN.CHROMATIC and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = add_noise(im) im_tensor = torch.from_numpy(im) / 255.0 im_tensor -= self._pixel_mean image_blob = im_tensor.permute(2, 0, 1).float() # depth image im_depth = pad_im(cv2.imread(depth_file, cv2.IMREAD_UNCHANGED), 16) if im_scale != 1.0: im_depth = cv2.resize(im_depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) im_depth = im_depth.astype('float') / 1000.0 return image_blob, im_depth, im_scale, height, width
def read_input_data(src_path_prefix, color, depth): rgba = pad_im(cv2.imread(src_path_prefix + color, cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba depth_cv = cv2.imread(src_path_prefix + depth, cv2.IMREAD_ANYDEPTH) return im, depth_cv
def _get_label_blob(roidb, intrinsic_matrix, num_classes, db_inds_syn, im_scales, extents, is_syn): """ build the label blob """ num_images = len(roidb) processed_depth = [] processed_label = [] processed_meta_data = [] if cfg.TRAIN.VERTEX_REG_2D or cfg.TRAIN.VERTEX_REG_3D: processed_vertex_targets = [] processed_vertex_weights = [] pose_blob = np.zeros((0, 13), dtype=np.float32) else: pose_blob = [] if not cfg.TRAIN.SEGMENTATION: assert len(im_scales) == 1, "Single batch only" assert len(roidb) == 1, "Single batch only" if not cfg.TRAIN.SEGMENTATION: assert len(im_scales) == 1, "Single batch only" assert len(roidb) == 1, "Single batch only" # gt boxes: (x1, y1, x2, y2, cls) gt_boxes = np.zeros((0, 5), dtype=np.float32) pose_blob = np.zeros((0, 13), dtype=np.float32) else: gt_boxes = [] for i in xrange(num_images): im_scale = im_scales[i] if is_syn: filename = cfg.TRAIN.SYNROOT + '{:06d}-meta.mat'.format( db_inds_syn[i]) meta_data = scipy.io.loadmat(filename) filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format( db_inds_syn[i]) im_depth = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # read label image filename = cfg.TRAIN.SYNROOT + '{:06d}-label.png'.format( db_inds_syn[i]) im = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) else: meta_data = scipy.io.loadmat(roidb[i]['meta_data']) im_depth = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # read label image im = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) meta_data['cls_indexes'] = meta_data['cls_indexes'].flatten() height = im.shape[0] width = im.shape[1] # mask the label image according to depth if cfg.INPUT == 'DEPTH': I = np.where(im_depth == 0) if len(im.shape) == 2: im[I[0], I[1]] = 0 else: im[I[0], I[1], :] = 0 if roidb[i]['flipped']: if len(im.shape) == 2: im = im[:, ::-1] else: im = im[:, ::-1, :] im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) # process annotation if training for two classes if num_classes == 2: I = np.where(im == roidb[i]['cls_index']) im[:, :] = 0 im[I[0], I[1]] = 1 ind = np.where( meta_data['cls_indexes'] == roidb[i]['cls_index'])[0] meta_data['cls_indexes'] = np.ones((1, ), dtype=np.float32) if len(meta_data['poses'].shape) == 3: meta_data['poses'] = meta_data['poses'][:, :, ind] meta_data['center'] = meta_data['center'][ind, :] meta_data['box'] = meta_data['box'][ind, :] im_cls, im_labels = _process_label_image(im, roidb[i]['class_colors'], roidb[i]['class_weights']) processed_label.append(im_cls) # bounding boxes if not cfg.TRAIN.SEGMENTATION: boxes = meta_data['box'].copy() if roidb[i]['flipped']: oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = width - oldx2 - 1 boxes[:, 2] = width - oldx1 - 1 gt_box = np.concatenate( (boxes * im_scales[0], meta_data['cls_indexes'][:, np.newaxis]), axis=1) gt_boxes = np.concatenate((gt_boxes, gt_box), axis=0) poses = meta_data['poses'] if len(poses.shape) == 2: poses = np.reshape(poses, (3, 4, 1)) if roidb[i]['flipped']: poses = _flip_poses(poses, meta_data['intrinsic_matrix'], width) num = poses.shape[2] qt = np.zeros((num, 13), dtype=np.float32) for j in xrange(num): R = poses[:, :3, j] T = poses[:, 3, j] qt[j, 0] = i qt[j, 1] = meta_data['cls_indexes'][j] qt[j, 2:6] = 0 # fill box later qt[j, 6:10] = mat2quat(R) qt[j, 10:] = T pose_blob = np.concatenate((pose_blob, qt), axis=0) # vertex regression targets and weights if cfg.TRAIN.VERTEX_REG_2D or cfg.TRAIN.VERTEX_REG_3D: poses = meta_data['poses'] if len(poses.shape) == 2: poses = np.reshape(poses, (3, 4, 1)) if roidb[i]['flipped']: poses = _flip_poses(poses, meta_data['intrinsic_matrix'], width) if cfg.TRAIN.VERTEX_REG_3D: vertmap = meta_data['vertmap'] if roidb[i]['flipped']: vertmap = vertmap[:, ::-1, :] vertmap = cv2.resize(vertmap, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: vertmap = [] center = meta_data['center'] if roidb[i]['flipped']: center[:, 0] = width - center[:, 0] vertex_targets, vertex_weights = \ _generate_vertex_targets(im, meta_data['cls_indexes'], im_scale * center, poses, num_classes, vertmap, extents) processed_vertex_targets.append(vertex_targets) processed_vertex_weights.append(vertex_weights) num = poses.shape[2] qt = np.zeros((num, 13), dtype=np.float32) for j in xrange(num): R = poses[:, :3, j] T = poses[:, 3, j] qt[j, 0] = i qt[j, 1] = meta_data['cls_indexes'][j] qt[j, 2:6] = 0 # fill box later qt[j, 6:10] = mat2quat(R) qt[j, 10:] = T pose_blob = np.concatenate((pose_blob, qt), axis=0) # depth if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) depth = cv2.resize(depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_depth.append(depth) # voxelization # points = voxelizer.backproject_camera(im_depth, meta_data) # voxelizer.voxelized = False # voxelizer.voxelize(points) # RT_world = meta_data['rotation_translation_matrix'] # compute camera poses # RT_live = meta_data['rotation_translation_matrix'] # pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) # pose_live2world = se3_inverse(pose_world2live) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) * im_scale K[2, 2] = 1 Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() # mdata[18:30] = pose_world2live.flatten() # mdata[30:42] = pose_live2world.flatten() # mdata[42] = voxelizer.step_x # mdata[43] = voxelizer.step_y # mdata[44] = voxelizer.step_z # mdata[45] = voxelizer.min_x # mdata[46] = voxelizer.min_y # mdata[47] = voxelizer.min_z if cfg.FLIP_X: mdata[0] = -1 * mdata[0] mdata[9] = -1 * mdata[9] mdata[11] = -1 * mdata[11] processed_meta_data.append(mdata) # construct the blobs height = processed_depth[0].shape[0] width = processed_depth[0].shape[1] depth_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) height = processed_label[0].shape[0] width = processed_label[0].shape[1] label_blob = np.zeros((num_images, height, width, num_classes), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) if cfg.TRAIN.VERTEX_REG_2D or cfg.TRAIN.VERTEX_REG_3D: vertex_target_blob = np.zeros( (num_images, height, width, 3 * num_classes), dtype=np.float32) vertex_weight_blob = np.zeros( (num_images, height, width, 3 * num_classes), dtype=np.float32) else: vertex_target_blob = [] vertex_weight_blob = [] for i in xrange(num_images): depth_blob[i, :, :, 0] = processed_depth[i] label_blob[i, :, :, :] = processed_label[i] meta_data_blob[i, 0, 0, :] = processed_meta_data[i] if cfg.TRAIN.VERTEX_REG_2D or cfg.TRAIN.VERTEX_REG_3D: vertex_target_blob[i, :, :, :] = processed_vertex_targets[i] vertex_weight_blob[i, :, :, :] = processed_vertex_weights[i] # filter bad boxes if not cfg.TRAIN.SEGMENTATION: gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0 gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0 ind = np.where((gt_widths > 0) & (gt_heights > 0))[0] gt_boxes = gt_boxes[ind, :] return depth_blob, label_blob, meta_data_blob, vertex_target_blob, vertex_weight_blob, pose_blob, gt_boxes
def _get_image_blob(roidb, scale_ind): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] if cfg.TRAIN.GAN: processed_ims_rescale = [] for i in range(num_images): # meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # depth raw im_depth_raw = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) height = im_depth_raw.shape[0] width = im_depth_raw.shape[1] # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: label = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) im = chromatic_transform(im, label) # mask the color image according to depth if cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth_raw == 0) im[I[0], I[1], :] = 0 if roidb[i]['flipped']: im = im[:, ::-1, :] if cfg.TRAIN.GAN: im_orig = im.astype(np.float32, copy=True) / 127.5 - 1 im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im_rescale = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_rescale.append(im_rescale) im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float( im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3)) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals depth = im_depth_raw.astype(np.float32, copy=True) / float( meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) blob_normal = im_list_to_blob(processed_ims_normal, 3) if cfg.TRAIN.GAN: blob_rescale = im_list_to_blob(processed_ims_rescale, 3) else: blob_rescale = [] return blob, blob_rescale, blob_depth, blob_normal, im_scales
def _get_label_blob(roidb, voxelizer, im_scales): """ build the label blob """ num_images = len(roidb) num_classes = voxelizer.num_classes processed_depth = [] processed_label = [] processed_meta_data = [] if cfg.TRAIN.VERTEX_REG: processed_vertex_targets = [] processed_vertex_weights = [] pose_blob = np.zeros((0, 13), dtype=np.float32) else: pose_blob = [] for i in range(num_images): im_scale = im_scales[i] # load meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) im_depth = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # read label image im = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) height = im.shape[0] width = im.shape[1] # mask the label image according to depth if cfg.INPUT == 'DEPTH': I = np.where(im_depth == 0) if len(im.shape) == 2: im[I[0], I[1]] = 0 else: im[I[0], I[1], :] = 0 if roidb[i]['flipped']: if len(im.shape) == 2: im = im[:, ::-1] else: im = im[:, ::-1, :] im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) if num_classes == 2: I = np.where(im > 0) im[I[0], I[1]] = 1 for j in range(len(meta_data['cls_indexes'])): meta_data['cls_indexes'][j] = 1 im_cls, im_labels = _process_label_image(im, roidb[i]['class_colors'], roidb[i]['class_weights']) processed_label.append(im_cls) # vertex regression targets and weights if cfg.TRAIN.VERTEX_REG: poses = meta_data['poses'] if len(poses.shape) == 2: poses = np.reshape(poses, (3, 4, 1)) center_targets, center_weights = _vote_centers( im, meta_data['cls_indexes'], im_scale * meta_data['center'], poses, num_classes) processed_vertex_targets.append(center_targets) processed_vertex_weights.append(center_weights) num = poses.shape[2] qt = np.zeros((num, 13), dtype=np.float32) for j in range(num): R = poses[:, :3, j] T = poses[:, 3, j] qt[j, 0] = i qt[j, 1] = meta_data['cls_indexes'][j, 0] qt[j, 2:6] = 0 # fill box later, roidb[i]['boxes'][j, :] qt[j, 6:10] = mat2quat(R) qt[j, 10:] = T pose_blob = np.concatenate((pose_blob, qt), axis=0) # depth if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) depth = cv2.resize(depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_depth.append(depth) # voxelization # points = voxelizer.backproject_camera(im_depth, meta_data) # voxelizer.voxelized = False # voxelizer.voxelize(points) # RT_world = meta_data['rotation_translation_matrix'] # compute camera poses # RT_live = meta_data['rotation_translation_matrix'] # pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) # pose_live2world = se3_inverse(pose_world2live) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) * im_scale K[2, 2] = 1 Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() # mdata[18:30] = pose_world2live.flatten() # mdata[30:42] = pose_live2world.flatten() # mdata[42] = voxelizer.step_x # mdata[43] = voxelizer.step_y # mdata[44] = voxelizer.step_z # mdata[45] = voxelizer.min_x # mdata[46] = voxelizer.min_y # mdata[47] = voxelizer.min_z if cfg.FLIP_X: mdata[0] = -1 * mdata[0] mdata[9] = -1 * mdata[9] mdata[11] = -1 * mdata[11] processed_meta_data.append(mdata) # construct the blobs height = processed_depth[0].shape[0] width = processed_depth[0].shape[1] depth_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) label_blob = np.zeros((num_images, height, width, num_classes), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) if cfg.TRAIN.VERTEX_REG: vertex_target_blob = np.zeros( (num_images, height, width, 3 * num_classes), dtype=np.float32) vertex_weight_blob = np.zeros( (num_images, height, width, 3 * num_classes), dtype=np.float32) else: vertex_target_blob = [] vertex_weight_blob = [] if cfg.TRAIN.GAN: gan_z_blob = np.random.uniform(-1, 1, [num_images, 100]).astype(np.float32) else: gan_z_blob = [] for i in range(num_images): depth_blob[i, :, :, 0] = processed_depth[i] label_blob[i, :, :, :] = processed_label[i] meta_data_blob[i, 0, 0, :] = processed_meta_data[i] if cfg.TRAIN.VERTEX_REG: vertex_target_blob[i, :, :, :] = processed_vertex_targets[i] vertex_weight_blob[i, :, :, :] = processed_vertex_weights[i] return depth_blob, label_blob, meta_data_blob, vertex_target_blob, vertex_weight_blob, pose_blob, gan_z_blob
def run_network(sess, net, imdb, images, meta_data): """ :param sess: TensorFlow session :param net: Pretrained neural network to run model over. :param imdb: TODO: Find out essential features of this object. :param images: [(rgb_image[0], depth_image[0]), ...] :param meta_data: Dictionary including camera intrinsics under 'intrinsic_matrix', and scale factor under 'factor_depth' (default is 10,000). """ n_images = len(images) segmentations = [[] for _ in range(n_images)] # timers _t = {'im_segment': Timer(), 'misc': Timer()} # voxelizer voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes) voxelizer.setup(-3, -3, -3, 3, 3, 4) # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] perm = list(range(n_images)) if (cfg.TEST.VERTEX_REG_2D and cfg.TEST.POSE_REFINE) or (cfg.TEST.VERTEX_REG_3D and cfg.TEST.POSE_REG): import libsynthesizer synthesizer = libsynthesizer.Synthesizer(cfg.CAD, cfg.POSE) synthesizer.setup(cfg.TRAIN.SYN_WIDTH, cfg.TRAIN.SYN_HEIGHT) batched_detections = [] for i in perm: raw_rgb, raw_depth = images[i] # read color image rgba = pad_im(raw_rgb, 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba im_depth = pad_im(raw_depth, 16) _t['im_segment'].tic() labels, probs, vertex_pred, rois, poses = im_segment_single_frame( sess, net, im, im_depth, meta_data, voxelizer, imdb._extents, imdb._points_all, imdb._symmetry, imdb.num_classes) detections = [] for j in range(rois.shape[0]): cls_idx = int(rois[j, 1]) if cls_idx > 0: # projection # RT = np.zeros((3, 4), dtype=np.float32) # RT[:3, :3] = quat2mat(poses[j, :4]) # RT[:, 3] = poses[j, 4:7] # transform to world pose pose_t = np.zeros((6, ), dtype=np.float32) pose_t[:3] = poses[j, 4:7] # pose_t[[0,2]] = pose_t[[2,0]] # flip z-axis to match renderer pose_t[2] = -pose_t[2] poses[j, [1, 2]] = -poses[j, [1, 2]] pose_t[3:] = quat2euler(poses[j, :4], axes='sxyz') cls = imdb._classes[cls_idx] detections.append((cls, pose_t)) batched_detections.append(detections) labels = unpad_im(labels, 16) im_scale = cfg.TEST.SCALES_BASE[0] # build the label image im_label = imdb.labels_to_image(im, labels) poses_new = [] poses_icp = [] if cfg.TEST.VERTEX_REG_2D: if cfg.TEST.POSE_REG: # pose refinement fx = meta_data['intrinsic_matrix'][0, 0] * im_scale fy = meta_data['intrinsic_matrix'][1, 1] * im_scale px = meta_data['intrinsic_matrix'][0, 2] * im_scale py = meta_data['intrinsic_matrix'][1, 2] * im_scale factor = meta_data['factor_depth'] znear = 0.25 zfar = 6.0 poses_new = np.zeros((poses.shape[0], 7), dtype=np.float32) poses_icp = np.zeros((poses.shape[0], 7), dtype=np.float32) error_threshold = 0.01 if cfg.TEST.POSE_REFINE: labels_icp = labels.copy() rois_icp = rois if imdb.num_classes == 2: I = np.where(labels_icp > 0) labels_icp[I[0], I[1]] = imdb._cls_index rois_icp = rois.copy() rois_icp[:, 1] = imdb._cls_index im_depth = cv2.resize(im_depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) parameters = np.zeros((7, ), dtype=np.float32) parameters[0] = fx parameters[1] = fy parameters[2] = px parameters[3] = py parameters[4] = znear parameters[5] = zfar parameters[6] = factor height = labels_icp.shape[0] width = labels_icp.shape[1] num_roi = rois_icp.shape[0] channel_roi = rois_icp.shape[1] synthesizer.icp_python(labels_icp, im_depth, parameters, height, width, num_roi, channel_roi, \ rois_icp, poses, poses_new, poses_icp, error_threshold) _t['im_segment'].toc() _t['misc'].tic() labels_new = cv2.resize(labels, None, None, fx=1.0 / im_scale, fy=1.0 / im_scale, interpolation=cv2.INTER_NEAREST) seg = { 'labels': labels_new, 'rois': rois, 'poses': poses, 'poses_refined': poses_new, 'poses_icp': poses_icp } segmentations[i] = seg _t['misc'].toc() print(('im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i, n_images, _t['im_segment'].diff, _t['misc'].diff))) if cfg.TEST.VISUALIZE: img_dir = os.path.join("output", "vis") os.makedirs(img_dir, exist_ok=True) vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes) vis_segmentations_vertmaps_detection( im, im_depth, im_label, imdb._class_colors, vertmap, labels, rois, poses, poses_icp, meta_data['intrinsic_matrix'], imdb.num_classes, imdb._classes, imdb._points_all, f_name=os.path.join(img_dir, "%i.png") % i) return batched_detections
def read_label_data(src_path_prefix, meta_data, num_classes, im_scales, extents, blob_height, blob_width, depth, cls, instance, objects): """ build the label blob """ num_images = 1 processed_depth = [] processed_label = [] processed_meta_data = [] vertex_target_blob = np.zeros( (num_images, blob_height, blob_width, 3 * num_classes), dtype=np.float32) vertex_weight_blob = np.zeros( (num_images, blob_height, blob_width, 3 * num_classes), dtype=np.float32) pose_blob = np.zeros((0, 13), dtype=np.float32) gt_boxes = [] for i in xrange(num_images): im_scale = im_scales[i] meta_data['cls_indexes'] = meta_data['cls_indexes'].flatten() if os.path.exists(src_path_prefix + depth): im_depth = pad_im( cv2.imread(src_path_prefix + depth, cv2.IMREAD_UNCHANGED), 16) else: im_depth = np.zeros((blob_height, blob_width), dtype=np.float32) # read label image im = pad_im(cv2.imread(src_path_prefix + cls, cv2.IMREAD_UNCHANGED), 16) im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) # process annotation if training for two classes I = np.where(im == 1) im[:, :] = 0 im[I[0], I[1]] = 1 ind = np.where(meta_data['cls_indexes'] == 1)[0] cls_indexes_old = ind meta_data['cls_indexes'] = np.ones((len(ind), ), dtype=np.float32) if len(meta_data['poses'].shape) == 2: meta_data['poses'] = np.reshape(meta_data['poses'], (3, 4, 1)) meta_data['poses'] = meta_data['poses'][:, :, ind] meta_data['center'] = meta_data['center'][ind, :] im_labels = im.copy() processed_label.append(im_labels.astype(np.int32)) # vertex regression targets and weights poses = meta_data['poses'] if len(poses.shape) == 2: poses = np.reshape(poses, (3, 4, 1)) vertmap = [] center = meta_data['center'] # check if mutiple same instances cls_indexes = meta_data['cls_indexes'] if len(np.unique(cls_indexes)) < len(cls_indexes): is_multi_instances = 1 # read mask image mask_img = cv2.imread(src_path_prefix + instance, cv2.IMREAD_UNCHANGED) if objects: mask_img = linear_instance_segmentation_mask_image( objects, mask_img) try: # The mask image needs to be croped for simulation/dope data, because their masks are not black/white but are color masks. mask_img = mask_img[:, :, 0] except IndexError: pass mask = pad_im(mask_img, 16) else: is_multi_instances = 0 mask = [] vertex_target_blob[i, :, :, :], vertex_weight_blob[ i, :, :, :] = _generate_vertex_targets( im, meta_data['cls_indexes'], im_scale * center, poses, num_classes, vertmap, extents, mask, is_multi_instances, cls_indexes_old, vertex_target_blob[i, :, :, :], vertex_weight_blob[i, :, :, :]) num = poses.shape[2] qt = np.zeros((num, 13), dtype=np.float32) for j in xrange(num): R = poses[:, :3, j] T = poses[:, 3, j] qt[j, 0] = i qt[j, 1] = meta_data['cls_indexes'][j] qt[j, 2:6] = 0 # fill box later qt[j, 6:10] = mat2quat(R) qt[j, 10:] = T pose_blob = np.concatenate((pose_blob, qt), axis=0) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) * im_scale K[2, 2] = 1 Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() processed_meta_data.append(mdata) # depth depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) depth = cv2.resize(depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_depth.append(depth) # construct the blobs depth_blob = np.zeros((num_images, blob_height, blob_width, 1), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) for i in xrange(num_images): depth_blob[i, :, :, 0] = processed_depth[i] meta_data_blob[i, 0, 0, :] = processed_meta_data[i] label_blob = np.zeros((num_images, blob_height, blob_width), dtype=np.int32) for i in xrange(num_images): label_blob[i, :, :] = processed_label[i] return depth_blob, label_blob, meta_data_blob, vertex_target_blob, vertex_weight_blob, pose_blob, gt_boxes, mask
def _get_label_blob(roidb, voxelizer): """ build the label blob """ num_images = len(roidb) num_classes = voxelizer.num_classes processed_depth = [] processed_label = [] processed_meta_data = [] if cfg.TRAIN.VERTEX_REG: processed_vertex_targets = [] processed_vertex_weights = [] processed_vertex_images = [] for i in xrange(num_images): # load meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) im_depth = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # read label image im = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) height = im.shape[0] width = im.shape[1] # mask the label image according to depth if cfg.INPUT == 'DEPTH': I = np.where(im_depth == 0) if len(im.shape) == 2: im[I[0], I[1]] = 0 else: im[I[0], I[1], :] = 0 if roidb[i]['flipped']: if len(im.shape) == 2: im = im[:, ::-1] else: im = im[:, ::-1, :] im_cls, im_labels = _process_label_image(im, roidb[i]['class_colors'], roidb[i]['class_weights']) processed_label.append(im_cls) # vertex regression targets and weights if cfg.TRAIN.VERTEX_REG: # read vertmap image vertmap = pad_im(cv2.imread(roidb[i]['vertmap'], cv2.IMREAD_UNCHANGED), 16) if roidb[i]['flipped']: vertmap = vertmap[:, ::-1, :] vertmap = vertmap[:, :, (2, 1, 0)] vertmap = vertmap.astype(np.float32) / 255.0 vertex_targets, vertex_weights = _get_vertex_regression_labels(im_labels, vertmap, roidb[i]['class_extents'], num_classes) processed_vertex_targets.append(vertex_targets) processed_vertex_weights.append(vertex_weights) processed_vertex_images.append(vertmap) # center_targets, center_weights = _vote_centers(im, meta_data['cls_indexes'], meta_data['center'], num_classes) # processed_vertex_targets.append(np.concatenate((center_targets, vertex_targets), axis=2)) # processed_vertex_weights.append(np.concatenate((center_weights, vertex_weights), axis=2)) # depth if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] depth = im_depth.astype(np.float32, copy=True) / float(meta_data['factor_depth']) processed_depth.append(depth) # voxelization points = voxelizer.backproject_camera(im_depth, meta_data) voxelizer.voxelized = False voxelizer.voxelize(points) RT_world = meta_data['rotation_translation_matrix'] # compute camera poses RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() mdata[18:30] = pose_world2live.flatten() mdata[30:42] = pose_live2world.flatten() mdata[42] = voxelizer.step_x mdata[43] = voxelizer.step_y mdata[44] = voxelizer.step_z mdata[45] = voxelizer.min_x mdata[46] = voxelizer.min_y mdata[47] = voxelizer.min_z if cfg.FLIP_X: mdata[0] = -1 * mdata[0] mdata[9] = -1 * mdata[9] mdata[11] = -1 * mdata[11] processed_meta_data.append(mdata) # construct the blobs height = processed_depth[0].shape[0] width = processed_depth[0].shape[1] depth_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) label_blob = np.zeros((num_images, height, width, num_classes), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) if cfg.TRAIN.VERTEX_REG: vertex_target_blob = np.zeros((num_images, height, width, 2 * num_classes), dtype=np.float32) vertex_weight_blob = np.zeros((num_images, height, width, 2 * num_classes), dtype=np.float32) vertex_image_blob = np.zeros((num_images, height, width, 3), dtype=np.float32) else: vertex_target_blob = [] vertex_weight_blob = [] vertex_image_blob = [] for i in xrange(num_images): depth_blob[i,:,:,0] = processed_depth[i] label_blob[i,:,:,:] = processed_label[i] meta_data_blob[i,0,0,:] = processed_meta_data[i] if cfg.TRAIN.VERTEX_REG: vertex_target_blob[i,:,:,:] = processed_vertex_targets[i] vertex_weight_blob[i,:,:,:] = processed_vertex_weights[i] vertex_image_blob[i,:,:,:] = processed_vertex_images[i] return depth_blob, label_blob, meta_data_blob, vertex_target_blob, vertex_weight_blob, vertex_image_blob
def test_net(net, imdb): output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) seg_file = os.path.join(output_dir, 'segmentations.pkl') print imdb.name if os.path.exists(seg_file): with open(seg_file, 'rb') as fid: segmentations = cPickle.load(fid) imdb.evaluate_segmentations(segmentations, output_dir) return """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) segmentations = [[] for _ in xrange(num_images)] # timers _t = {'im_segment' : Timer(), 'misc' : Timer()} if cfg.TEST.VISUALIZE: perm = np.random.permutation(np.arange(num_images)) else: perm = xrange(num_images) for i in perm: # read color image rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 255 else: im = rgba # read depth image im_depth = cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED) _t['im_segment'].tic() labels = im_segment(net, im, im_depth, imdb.num_classes) _t['im_segment'].toc() # build the label image im_label = imdb.labels_to_image(im, labels) _t['misc'].tic() seg = {'labels': labels} segmentations[i] = seg _t['misc'].toc() # read label image labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:,:,:3]) im_label_gt[:,:,0] = labels_gt[:,:,2] im_label_gt[:,:,2] = labels_gt[:,:,0] if cfg.TEST.VISUALIZE: vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors) print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_segment'].average_time, _t['misc'].average_time) seg_file = os.path.join(output_dir, 'segmentations.pkl') with open(seg_file, 'wb') as f: cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # evaluation imdb.evaluate_segmentations(segmentations, output_dir)
def _get_label_blob(roidb, voxelizer): """ build the label blob """ num_images = len(roidb) num_classes = voxelizer.num_classes processed_depth = [] processed_label = [] processed_meta_data = [] if cfg.TRAIN.VERTEX_REG: processed_vertex_targets = [] processed_vertex_weights = [] for i in xrange(num_images): # load meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) im_depth = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # read label image im = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) height = im.shape[0] width = im.shape[1] # mask the label image according to depth if cfg.INPUT == 'DEPTH': I = np.where(im_depth == 0) if len(im.shape) == 2: im[I[0], I[1]] = 0 else: im[I[0], I[1], :] = 0 if roidb[i]['flipped']: if len(im.shape) == 2: im = im[:, ::-1] else: im = im[:, ::-1, :] im_cls = _process_label_image(im, roidb[i]['class_colors'], roidb[i]['class_weights']) processed_label.append(im_cls) # vertex regression targets and weights if cfg.TRAIN.VERTEX_REG: vertmap = meta_data['vertmap'] if roidb[i]['flipped']: vertmap = vertmap[:, ::-1, :] vertex_targets, vertex_weights = _get_vertex_regression_labels(im, vertmap, num_classes) processed_vertex_targets.append(vertex_targets) processed_vertex_weights.append(vertex_weights) # depth if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] depth = im_depth.astype(np.float32, copy=True) / float(meta_data['factor_depth']) processed_depth.append(depth) # voxelization points = voxelizer.backproject_camera(im_depth, meta_data) voxelizer.voxelized = False voxelizer.voxelize(points) RT_world = meta_data['rotation_translation_matrix'] # compute camera poses RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() mdata[18:30] = pose_world2live.flatten() mdata[30:42] = pose_live2world.flatten() mdata[42] = voxelizer.step_x mdata[43] = voxelizer.step_y mdata[44] = voxelizer.step_z mdata[45] = voxelizer.min_x mdata[46] = voxelizer.min_y mdata[47] = voxelizer.min_z if cfg.FLIP_X: mdata[0] = -1 * mdata[0] mdata[9] = -1 * mdata[9] mdata[11] = -1 * mdata[11] processed_meta_data.append(mdata) # construct the blobs height = processed_depth[0].shape[0] width = processed_depth[0].shape[1] depth_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) label_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) if cfg.TRAIN.VERTEX_REG: vertex_target_blob = np.zeros((num_images, height, width, 3 * num_classes), dtype=np.float32) vertex_weight_blob = np.zeros((num_images, height, width, 3 * num_classes), dtype=np.float32) else: vertex_target_blob = [] vertex_weight_blob = [] for i in xrange(num_images): depth_blob[i,:,:,0] = processed_depth[i] label_blob[i,:,:,:] = processed_label[i] meta_data_blob[i,0,0,:] = processed_meta_data[i] if cfg.TRAIN.VERTEX_REG: vertex_target_blob[i,:,:,:] = processed_vertex_targets[i] vertex_weight_blob[i,:,:,:] = processed_vertex_weights[i] channel_swap = (0, 3, 1, 2) depth_blob = depth_blob.transpose(channel_swap) label_blob = label_blob.transpose(channel_swap) meta_data_blob = meta_data_blob.transpose(channel_swap) if cfg.TRAIN.VERTEX_REG: vertex_target_blob = vertex_target_blob.transpose(channel_swap) vertex_weight_blob = vertex_weight_blob.transpose(channel_swap) return depth_blob, label_blob, meta_data_blob, vertex_target_blob, vertex_weight_blob
def _get_image_blob(roidb, scale_ind, num_classes, backgrounds, intrinsic_matrix, db_inds_syn, is_syn): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] roidb_syn = [] for i in xrange(num_images): if is_syn: # depth raw filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format( db_inds_syn[i]) im_depth_raw = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # rgba filename = cfg.TRAIN.SYNROOT + '{:06d}-color.png'.format( db_inds_syn[i]) rgba = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # sample a background image ind = np.random.randint(len(backgrounds), size=1)[0] filename = backgrounds[ind] background = cv2.imread(filename, cv2.IMREAD_UNCHANGED) try: background = cv2.resize(background, (rgba.shape[1], rgba.shape[0]), interpolation=cv2.INTER_LINEAR) except: if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': background = np.zeros((rgba.shape[0], rgba.shape[1]), dtype=np.uint16) else: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' if cfg.INPUT != 'DEPTH' and cfg.INPUT != 'NORMAL' and len( background.shape) != 3: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' # add background im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': im_depth_raw[I[0], I[1]] = background[I[0], I[1]] / 10 else: im[I[0], I[1], :] = background[I[0], I[1], :3] else: # depth raw im_depth_raw = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE: im = add_noise(im) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float( im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3)) if cfg.TRAIN.ADD_NOISE: im_depth = add_noise(im_depth) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals if cfg.INPUT == 'NORMAL': depth = im_depth_raw.astype(np.float32, copy=True) / 1000.0 fx = intrinsic_matrix[0, 0] * im_scale fy = intrinsic_matrix[1, 1] * im_scale cx = intrinsic_matrix[0, 2] * im_scale cy = intrinsic_matrix[1, 2] * im_scale nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_depth, blob_normal, im_scales
def test_net_single_frame(sess, net, imdb, weights_filename, rig_filename, is_kfusion): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) seg_file = os.path.join(output_dir, 'segmentations.pkl') print imdb.name if os.path.exists(seg_file): with open(seg_file, 'rb') as fid: segmentations = cPickle.load(fid) imdb.evaluate_segmentations(segmentations, output_dir) return """Test a FCN on an image database.""" num_images = len(imdb.image_index) segmentations = [[] for _ in xrange(num_images)] # timers _t = {'im_segment' : Timer(), 'misc' : Timer()} # kinect fusion if is_kfusion: KF = kfusion.PyKinectFusion(rig_filename) # pose estimation if cfg.TEST.VERTEX_REG and cfg.TEST.RANSAC: RANSAC = ransac.PyRansac3D() # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] if cfg.TEST.VISUALIZE: # perm = np.random.permutation(np.arange(num_images)) perm = xrange(0, num_images, 5) else: perm = xrange(num_images) video_index = '' have_prediction = False for i in perm: # parse image name image_index = imdb.image_index[i] pos = image_index.find('/') if video_index == '': video_index = image_index[:pos] have_prediction = False else: if video_index != image_index[:pos]: have_prediction = False video_index = image_index[:pos] print 'start video {}'.format(video_index) # read color image rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # read depth image im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16) # load meta data meta_data = scipy.io.loadmat(imdb.metadata_path_at(i)) # read label image labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:,:,:3]) im_label_gt[:,:,0] = labels_gt[:,:,2] im_label_gt[:,:,2] = labels_gt[:,:,0] _t['im_segment'].tic() labels, probs, vertex_pred = im_segment_single_frame(sess, net, im, im_depth, meta_data, imdb.num_classes) if cfg.TEST.VERTEX_REG: vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes) if cfg.TEST.RANSAC: # pose estimation using RANSAC fx = meta_data['intrinsic_matrix'][0, 0] fy = meta_data['intrinsic_matrix'][1, 1] px = meta_data['intrinsic_matrix'][0, 2] py = meta_data['intrinsic_matrix'][1, 2] depth_factor = meta_data['factor_depth'][0, 0] poses = RANSAC.estimate_pose(im_depth, probs, vertex_pred[0,:,:,:] / cfg.TRAIN.VERTEX_W, imdb._extents, fx, fy, px, py, depth_factor) # print gt poses # cls_indexes = meta_data['cls_indexes'] # poses_gt = meta_data['poses'] # for j in xrange(len(cls_indexes)): # print 'object {}'.format(cls_indexes[j]) # print poses_gt[:,:,j] else: poses = [] _t['im_segment'].toc() _t['misc'].tic() labels = unpad_im(labels, 16) # build the label image im_label = imdb.labels_to_image(im, labels) if not have_prediction: if is_kfusion: KF.set_voxel_grid(-3, -3, -3, 6, 6, 7) # run kinect fusion if is_kfusion: height = im.shape[0] width = im.shape[1] labels_kfusion = np.zeros((height, width), dtype=np.int32) im_rgb = np.copy(im) im_rgb[:, :, 0] = im[:, :, 2] im_rgb[:, :, 2] = im[:, :, 0] KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth'])) KF.back_project(); if have_prediction: pose_world2live, pose_live2world = KF.solve_pose() KF.feed_label(im_label, probs, colors) KF.fuse_depth() labels_kfusion = KF.extract_surface(labels_kfusion) im_label_kfusion = imdb.labels_to_image(im, labels_kfusion) KF.render() filename = os.path.join(output_dir, 'images', '{:04d}'.format(i)) KF.draw(filename, 0) have_prediction = True if is_kfusion: seg = {'labels': labels_kfusion} else: seg = {'labels': labels} segmentations[i] = seg _t['misc'].toc() print 'im_segment {}: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(video_index, i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) if cfg.TEST.VISUALIZE: if cfg.TEST.VERTEX_REG: # centers_gt = _vote_centers(labels_gt, meta_data['cls_indexes'], meta_data['center'], imdb.num_classes) vertmap_gt = pad_im(cv2.imread(imdb.vertmap_path_at(i), cv2.IMREAD_UNCHANGED), 16) vertmap_gt = vertmap_gt[:, :, (2, 1, 0)] vertmap_gt = vertmap_gt.astype(np.float32) / 255.0 vertmap_gt = _unscale_vertmap(vertmap_gt, imdb._process_label_image(labels_gt), imdb._extents, imdb.num_classes) print 'visualization' vis_segmentations_vertmaps(im, im_depth, im_label, im_label_gt, imdb._class_colors, \ vertmap_gt, vertmap, labels, labels_gt, poses, meta_data['intrinsic_matrix']) else: vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors) seg_file = os.path.join(output_dir, 'segmentations.pkl') with open(seg_file, 'wb') as f: cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # evaluation imdb.evaluate_segmentations(segmentations, output_dir)
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) seg_file = os.path.join(output_dir, 'segmentations.pkl') print imdb.name if os.path.exists(seg_file): with open(seg_file, 'rb') as fid: segmentations = cPickle.load(fid) imdb.evaluate_segmentations(segmentations, output_dir) return """Test a FCN on an image database.""" num_images = len(imdb.image_index) segmentations = [[] for _ in xrange(num_images)] # timers _t = {'im_segment' : Timer(), 'misc' : Timer()} # voxelizer voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes) voxelizer.setup(-3, -3, -3, 3, 3, 4) # voxelizer.setup(-2, -2, -2, 2, 2, 2) # kinect fusion if is_kfusion: KF = kfusion.PyKinectFusion(rig_filename) # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] if cfg.TEST.VISUALIZE: perm = np.random.permutation(np.arange(num_images)) else: perm = xrange(num_images) video_index = '' have_prediction = False for i in perm: rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16) height = rgba.shape[0] width = rgba.shape[1] # parse image name image_index = imdb.image_index[i] pos = image_index.find('/') if video_index == '': video_index = image_index[:pos] have_prediction = False state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points = np.zeros((1, height, width, 3), dtype=np.float32) else: if video_index != image_index[:pos]: have_prediction = False video_index = image_index[:pos] state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points = np.zeros((1, height, width, 3), dtype=np.float32) print 'start video {}'.format(video_index) # read color image if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # read depth image im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16) # load meta data meta_data = scipy.io.loadmat(imdb.metadata_path_at(i)) # backprojection for the first frame if not have_prediction: if is_kfusion: # KF.set_voxel_grid(-3, -3, -3, 6, 6, 7) KF.set_voxel_grid(voxelizer.min_x, voxelizer.min_y, voxelizer.min_z, voxelizer.max_x-voxelizer.min_x, voxelizer.max_y-voxelizer.min_y, voxelizer.max_z-voxelizer.min_z) # identity transformation RT_world = np.zeros((3,4), dtype=np.float32) RT_world[0, 0] = 1 RT_world[1, 1] = 1 RT_world[2, 2] = 1 else: # store the RT for the first frame RT_world = meta_data['rotation_translation_matrix'] # run kinect fusion if is_kfusion: im_rgb = np.copy(im) im_rgb[:, :, 0] = im[:, :, 2] im_rgb[:, :, 2] = im[:, :, 0] KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth'])) KF.back_project(); if have_prediction: pose_world2live, pose_live2world = KF.solve_pose() RT_live = pose_world2live else: RT_live = RT_world else: # compute camera poses RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) _t['im_segment'].tic() labels, probs, state, weights, points = im_segment(sess, net, im, im_depth, state, weights, points, meta_data, voxelizer, pose_world2live, pose_live2world) _t['im_segment'].toc() # time.sleep(3) _t['misc'].tic() labels = unpad_im(labels, 16) # build the label image im_label = imdb.labels_to_image(im, labels) if is_kfusion: labels_kfusion = np.zeros((height, width), dtype=np.int32) if probs.shape[2] < 10: probs_new = np.zeros((probs.shape[0], probs.shape[1], 10), dtype=np.float32) probs_new[:,:,:imdb.num_classes] = probs probs = probs_new KF.feed_label(im_label, probs, colors) KF.fuse_depth() labels_kfusion = KF.extract_surface(labels_kfusion) im_label_kfusion = imdb.labels_to_image(im, labels_kfusion) KF.render() filename = os.path.join(output_dir, 'images', '{:04d}'.format(i)) KF.draw(filename, 0) have_prediction = True # compute the delta transformation between frames RT_world = RT_live if is_kfusion: seg = {'labels': labels_kfusion} else: seg = {'labels': labels} segmentations[i] = seg _t['misc'].toc() if cfg.TEST.VISUALIZE: # read label image labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:,:,:3]) im_label_gt[:,:,0] = labels_gt[:,:,2] im_label_gt[:,:,2] = labels_gt[:,:,0] vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors) print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) if is_kfusion: KF.draw(filename, 1) seg_file = os.path.join(output_dir, 'segmentations.pkl') with open(seg_file, 'wb') as f: cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # evaluation imdb.evaluate_segmentations(segmentations, output_dir)
def _get_label_blob(roidb, voxelizer): """ build the label blob """ num_images = len(roidb) processed_depth = [] processed_label = [] processed_meta_data = [] for i in xrange(num_images): # load meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) im_depth = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # read label image im = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) # mask the label image according to depth if cfg.INPUT == 'DEPTH': I = np.where(im_depth == 0) if len(im.shape) == 2: im[I[0], I[1]] = 0 else: im[I[0], I[1], :] = 0 if roidb[i]['flipped']: im = im[:, ::-1, :] im_cls = _process_label_image(im, roidb[i]['class_colors'], roidb[i]['class_weights']) processed_label.append(im_cls) # depth if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] depth = im_depth.astype(np.float32, copy=True) / float(meta_data['factor_depth']) processed_depth.append(depth) # voxelization if i % cfg.TRAIN.NUM_STEPS == 0: points = voxelizer.backproject_camera(im_depth, meta_data) voxelizer.voxelized = False voxelizer.voxelize(points) # store the RT for the first frame RT_world = meta_data['rotation_translation_matrix'] # compute camera poses RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] pose_world2live: meta_data[18 ~ 29] pose_live2world: meta_data[30 ~ 41] voxel step size: meta_data[42, 43, 44] voxel min value: meta_data[45, 46, 47] """ K = np.matrix(meta_data['intrinsic_matrix']) Kinv = np.linalg.pinv(K) mdata = np.zeros(48, dtype=np.float32) mdata[0:9] = K.flatten() mdata[9:18] = Kinv.flatten() mdata[18:30] = pose_world2live.flatten() mdata[30:42] = pose_live2world.flatten() mdata[42] = voxelizer.step_x mdata[43] = voxelizer.step_y mdata[44] = voxelizer.step_z mdata[45] = voxelizer.min_x mdata[46] = voxelizer.min_y mdata[47] = voxelizer.min_z if cfg.FLIP_X: mdata[0] = -1 * mdata[0] mdata[9] = -1 * mdata[9] mdata[11] = -1 * mdata[11] processed_meta_data.append(mdata) # compute the delta transformation between frames RT_world = RT_live # construct the blobs height = processed_depth[0].shape[0] width = processed_depth[0].shape[1] num_classes = voxelizer.num_classes depth_blob = np.zeros((num_images, height, width, 1), dtype=np.float32) label_blob = np.zeros((num_images, height, width, num_classes), dtype=np.float32) meta_data_blob = np.zeros((num_images, 1, 1, 48), dtype=np.float32) for i in xrange(num_images): depth_blob[i,:,:,0] = processed_depth[i] label_blob[i,:,:,:] = processed_label[i] meta_data_blob[i,0,0,:] = processed_meta_data[i] state_blob = np.zeros((cfg.TRAIN.IMS_PER_BATCH, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights_blob = np.ones((cfg.TRAIN.IMS_PER_BATCH, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points_blob = np.zeros((cfg.TRAIN.IMS_PER_BATCH, height, width, 3), dtype=np.float32) return depth_blob, label_blob, meta_data_blob, state_blob, weights_blob, points_blob
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) print 'The Output DIR is:', output_dir # seg_file = os.path.join(output_dir, 'segmentations.pkl') # print imdb.name # if os.path.exists(seg_file): # with open(seg_file, 'rb') as fid: # segmentations = cPickle.load(fid) # imdb.evaluate_segmentations(segmentations, output_dir) # return """Test a FCN on an image database.""" print 'Test a FCN on an image database' num_images = len(imdb.image_index) # segmentations = [[] for _ in xrange(num_images)] # segmentations = [[] for _ in xrange(100)] # timers _t = {'im_segment': Timer(), 'misc': Timer()} # voxelizer voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes) voxelizer.setup(-3, -3, -3, 3, 3, 4) # voxelizer.setup(-2, -2, -2, 2, 2, 2) # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] # print colors if cfg.TEST.VISUALIZE: perm = np.random.permutation(np.arange(num_images)) else: perm = xrange(num_images) video_index = '' have_prediction = False i = 0 while True: print i # if i>=100: # seg_file = os.path.join('/home/weizhang/DA-RNN/data/LabScene/data/0000/', 'segmentations.pkl') # with open(seg_file, 'wb') as f: # cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # sys.exit() # im, im_depth = rgbd_getter.data_getter() # start_time = time.time() data_chunk = rgbd_getter.data_getter() # print "--- %s seconds ---" % (time.time() - start_time) im = data_chunk['rgb_image'] im_depth = data_chunk['depth_image'] # rgba = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED) # path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_rgba.png'.format(i) # # im = cv2.imread(path, cv2.IMREAD_UNCHANGED) rgba = im[..., [2, 1, 0]] rgba = rgba.astype(np.uint8) rgba = pad_im(rgba, 16) # rgba = pad_im(cv2.imread('/home/weizhang/DA-RNN/data/RGBDScene/data/scene_01/{:05d}-color.png'.format(i), cv2.IMREAD_UNCHANGED), 16) height = rgba.shape[0] width = rgba.shape[1] # parse image name image_index = imdb.image_index[i] # pos = image_index.find('/') # if video_index == '': # video_index = image_index[:pos] # have_prediction = False # state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) # weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) # points = np.zeros((1, height, width, 3), dtype=np.float32) # else: # if video_index != image_index[:pos]: # have_prediction = False # video_index = image_index[:pos] # state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) # weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) # points = np.zeros((1, height, width, 3), dtype=np.float32) # print 'start video {}'.format(video_index) if i == 0: have_prediction = False state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points = np.zeros((1, height, width, 3), dtype=np.float32) # read color image if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # read depth image # path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_depth.png'.format(i) # im_depth = cv2.imread(path, -1) # thres = np.percentile(im_depth,60) # idx = np.where(im_depth>thres) im_depth = pad_im(im_depth, 16) # im_depth = cv2.imread('/home/weizhang/DA-RNN/data/RGBDScene/data/scene_01/{:05d}-color.png'.format(i), cv2.IMREAD_UNCHANGED) # im_depth = cv2.cvtColor(im_depth, cv2.COLOR_BGR2GRAY) # im_depth = im_depth.astype(np.uint16) # im_depth = pad_im(im_depth, 16) # load meta data # meta_data = form_meta_data() meta_data = data_chunk['meta_data'] # backprojection for the first frame if not have_prediction: RT_world = meta_data['rotation_translation_matrix'] RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) # print "--- %s seconds ---" % (time.time() - start_time) _t['im_segment'].tic() print 'before feed dict----------------------------------' labels, probs, state, weights, points = im_segment( sess, net, im, im_depth, state, weights, points, meta_data, voxelizer, pose_world2live, pose_live2world) print 'after feed dict----------------------------------' _t['im_segment'].toc() # print "--- %s seconds ---" % (time.time() - start_time) # time.sleep(3) _t['misc'].tic() labels = unpad_im(labels, 16) # build the label image im_label = imdb.labels_to_image(im, labels) # im_label[idx[0],idx[1],0] = 0 # im_label[idx[0], idx[1], 1] = 0 # im_label[idx[0], idx[1], 2] = 0 # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_label.png'.format(i) # cv2.imwrite(label_path,im_label) # print "--- %s seconds ---" % (time.time() - start_time) im_label_post, lbl_pcd_color = post_proc_da.post_proc( im, data_chunk['point_cloud_array'], im_label, data_chunk['camera_info'], data_chunk['rgb_image']) # print "--- %s seconds ---" % (time.time() - start_time) # kernel = np.ones((3,3),np.uint8) # # im_ero = cv2.erode(im_label,kernel,iterations=1) # # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_ero_3by3.png'.format(i) # # cv2.imwrite(label_path,im_ero) # label_path = '/home/weizhang/DA-RNN/data/LabScene/data/0000/' + '{:04d}_label.png'.format(i) # cv2.imwrite(label_path,im_label) # Press Q on keyboard to exit # if cv2.waitKey(25) & 0xFF == ord('q'): # break have_prediction = True # compute the delta transformation between frames RT_world = RT_live # seg = {'labels': labels} # segmentations[i] = seg _t['misc'].toc() print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) # csv_file_path = os.path.join('/home/weizhang/Documents/domain-adaptation/data/LabScene/data/0025/', "lbl_pcd_color_{:04d}.csv".format(i)) # np.savetxt(csv_file_path, lbl_pcd_color, delimiter=",") # s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # s.bind((HOST, PORT)) # s.listen(10) # conn, addr = s.accept() # conn.sendall(b'Hello, world') if cfg.TEST.VISUALIZE: # read label image labels_gt = pad_im( cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:, :, :3]) im_label_gt[:, :, 0] = labels_gt[:, :, 2] im_label_gt[:, :, 2] = labels_gt[:, :, 0] vis_segmentations(im, im_depth, im_label, im_label_post, imdb._class_colors) # print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ # .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) # data = s.recv(1024) i += 1
def _get_image_blob(roidb, scale_ind): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] for i in xrange(num_images): # meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # depth raw im_depth_raw = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) height = im_depth_raw.shape[0] width = im_depth_raw.shape[1] # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 255 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: im = chromatic_transform(im) # mask the color image according to depth if cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth_raw == 0) im[I[0], I[1], :] = 0 if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float(im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:,:,np.newaxis], (1,1,3)) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals depth = im_depth_raw.astype(np.float32, copy=True) / float(meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) blob_normal = im_list_to_blob(processed_ims_normal, 3) return blob, blob_depth, blob_normal, im_scales
print('loading 3D models') cfg.renderer = RendererAdapter(width=cfg.TRAIN.SYN_WIDTH, height=cfg.TRAIN.SYN_HEIGHT) cfg.renderer.load_object(int(obj)) # initialize tensors for testing test_data = init_tensors() result_file = f'/cvlabdata2/cvlab/datasets_protopap/linemod/test/{int(obj):06d}/scene_gt.json' print(f'fetching poses from {result_file}') with open(result_file, 'r') as f: results = json.load(f) # for each image for i in index_images: im = pad_im(cv2.imread(images_color[i], cv2.IMREAD_COLOR), 16) print(images_color[i]) if len(images_depth) > 0 and osp.exists(images_depth[i]): depth = pad_im( cv2.imread(images_depth[i], cv2.IMREAD_UNCHANGED), 16) depth = depth.astype('float') / 1000.0 print(images_depth[i]) else: depth = None print('no depth image') # rescale image if necessary if cfg.TEST.SCALES_BASE[0] != 1: im_scale = cfg.TEST.SCALES_BASE[0] im = pad_im( cv2.resize(im,
def _get_label_blob(self, roidb, num_classes, im_scale, height, width): """ build the label blob """ meta_data = scipy.io.loadmat(roidb['meta_data']) meta_data['cls_indexes'] = meta_data['cls_indexes'].flatten() classes = np.array(cfg.TRAIN.CLASSES) # read label image im_label = pad_im(cv2.imread(roidb['label'], cv2.IMREAD_UNCHANGED), 16) if roidb['flipped']: if len(im_label.shape) == 2: im_label = im_label[:, ::-1] else: im_label = im_label[:, ::-1, :] if im_scale != 1.0: im_label = cv2.resize(im_label, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) label_blob = np.zeros((num_classes, height, width), dtype=np.float32) label_blob[0, :, :] = 1.0 for i in range(1, num_classes): I = np.where(im_label == classes[i]) if len(I[0]) > 0: label_blob[i, I[0], I[1]] = 1.0 label_blob[0, I[0], I[1]] = 0.0 # foreground mask seg = torch.from_numpy((im_label != 0).astype(np.float32)) mask = seg.unsqueeze(0).repeat((3, 1, 1)).float() # poses poses = meta_data['poses'] if len(poses.shape) == 2: poses = np.reshape(poses, (3, 4, 1)) if roidb['flipped']: poses = _flip_poses(poses, meta_data['intrinsic_matrix'], width) num = poses.shape[2] pose_blob = np.zeros((num_classes, 9), dtype=np.float32) gt_boxes = np.zeros((num_classes, 5), dtype=np.float32) count = 0 for i in range(num): cls = int(meta_data['cls_indexes'][i]) ind = np.where(classes == cls)[0] if len(ind) > 0: R = poses[:, :3, i] T = poses[:, 3, i] pose_blob[count, 0] = 1 pose_blob[count, 1] = ind qt = mat2quat(R) # egocentric to allocentric qt_allocentric = egocentric2allocentric(qt, T) if qt_allocentric[0] < 0: qt_allocentric = -1 * qt_allocentric pose_blob[count, 2:6] = qt_allocentric pose_blob[count, 6:] = T # compute box x3d = np.ones((4, self._points_all.shape[1]), dtype=np.float32) x3d[0, :] = self._points_all[ind,:,0] x3d[1, :] = self._points_all[ind,:,1] x3d[2, :] = self._points_all[ind,:,2] RT = np.zeros((3, 4), dtype=np.float32) RT[:3, :3] = quat2mat(qt) RT[:, 3] = T x2d = np.matmul(meta_data['intrinsic_matrix'], np.matmul(RT, x3d)) x2d[0, :] = np.divide(x2d[0, :], x2d[2, :]) x2d[1, :] = np.divide(x2d[1, :], x2d[2, :]) gt_boxes[count, 0] = np.min(x2d[0, :]) * im_scale gt_boxes[count, 1] = np.min(x2d[1, :]) * im_scale gt_boxes[count, 2] = np.max(x2d[0, :]) * im_scale gt_boxes[count, 3] = np.max(x2d[1, :]) * im_scale gt_boxes[count, 4] = ind count += 1 # construct the meta data """ format of the meta_data intrinsic matrix: meta_data[0 ~ 8] inverse intrinsic matrix: meta_data[9 ~ 17] """ K = np.matrix(meta_data['intrinsic_matrix']) * im_scale K[2, 2] = 1 Kinv = np.linalg.pinv(K) meta_data_blob = np.zeros(18, dtype=np.float32) meta_data_blob[0:9] = K.flatten() meta_data_blob[9:18] = Kinv.flatten() # vertex regression target if cfg.TRAIN.VERTEX_REG: center = meta_data['center'] if roidb['flipped']: center[:, 0] = width - center[:, 0] vertex_targets, vertex_weights = self._generate_vertex_targets(im_label, meta_data['cls_indexes'], center, poses, classes, num_classes) else: vertex_targets = [] vertex_weights = [] return label_blob, mask, meta_data_blob, pose_blob, gt_boxes, vertex_targets, vertex_weights