def get_image_blob(self, im, im_depth, meta_data): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ # RGB im_orig = im.astype(np.float32, copy=True) # mask the color image according to depth if self.cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth == 0) im_orig[I[0], I[1], :] = 0 processed_ims_rescale = [] im_scale = self.cfg.TEST.SCALES_BASE[0] im_rescale = cv2.resize(im_orig / 127.5 - 1, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_rescale.append(im_rescale) im_orig -= self.cfg.PIXEL_MEANS processed_ims = [] im_scale_factors = [] assert len(self.cfg.TEST.SCALES_BASE) == 1 im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # depth im_orig = im_depth.astype(np.float32, copy=True) # im_orig = im_orig / im_orig.max() * 255 im_orig = np.clip(im_orig / 2000.0, 0, 1) * 255 im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3)) im_orig -= self.cfg.PIXEL_MEANS processed_ims_depth = [] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im) if cfg.INPUT == 'NORMAL': # meta data K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # normals depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) processed_ims_normal = [] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_rescale = im_list_to_blob(processed_ims_rescale, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_rescale, blob_depth, blob_normal, np.array( im_scale_factors)
def _get_image_blob(roidb, scale_ind, num_classes, backgrounds, intrinsic_matrix, db_inds_syn, is_syn): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] roidb_syn = [] for i in xrange(num_images): if is_syn: # depth raw filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format( db_inds_syn[i]) im_depth_raw = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # rgba filename = cfg.TRAIN.SYNROOT + '{:06d}-color.png'.format( db_inds_syn[i]) rgba = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # sample a background image ind = np.random.randint(len(backgrounds), size=1)[0] filename = backgrounds[ind] background = cv2.imread(filename, cv2.IMREAD_UNCHANGED) try: background = cv2.resize(background, (rgba.shape[1], rgba.shape[0]), interpolation=cv2.INTER_LINEAR) except: if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': background = np.zeros((rgba.shape[0], rgba.shape[1]), dtype=np.uint16) else: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' if cfg.INPUT != 'DEPTH' and cfg.INPUT != 'NORMAL' and len( background.shape) != 3: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' # add background im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': im_depth_raw[I[0], I[1]] = background[I[0], I[1]] / 10 else: im[I[0], I[1], :] = background[I[0], I[1], :3] else: # depth raw im_depth_raw = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE: im = add_noise(im) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float( im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3)) if cfg.TRAIN.ADD_NOISE: im_depth = add_noise(im_depth) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals if cfg.INPUT == 'NORMAL': depth = im_depth_raw.astype(np.float32, copy=True) / 1000.0 fx = intrinsic_matrix[0, 0] * im_scale fy = intrinsic_matrix[1, 1] * im_scale cx = intrinsic_matrix[0, 2] * im_scale cy = intrinsic_matrix[1, 2] * im_scale nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_depth, blob_normal, im_scales
def get_image_blob(im, im_depth, meta_data, cfg): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ # RGB im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS processed_ims = [] im_scale_factors = [] assert len(cfg.TEST.SCALES_BASE) == 1 im_scale = cfg.TEST.SCALES_BASE[0] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) height = processed_ims[0].shape[0] width = processed_ims[0].shape[1] # depth if im_depth is not None: im_orig = im_depth.astype(np.float32, copy=True) im_orig = im_orig / im_orig.max() * 255 im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3)) im_orig -= cfg.PIXEL_MEANS processed_ims_depth = [] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im) blob_depth = im_list_to_blob(processed_ims_depth, 3) else: blob_depth = None if cfg.INPUT == 'NORMAL': # meta data K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # normals depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) processed_ims_normal = [] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) # Create a blob to hold the input images blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] return blob, blob_depth, blob_normal, np.array( im_scale_factors), height, width
def _get_image_blob(roidb, scale_ind): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] if cfg.TRAIN.GAN: processed_ims_rescale = [] for i in range(num_images): # meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # depth raw im_depth_raw = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) height = im_depth_raw.shape[0] width = im_depth_raw.shape[1] # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: label = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED), 16) im = chromatic_transform(im, label) # mask the color image according to depth if cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth_raw == 0) im[I[0], I[1], :] = 0 if roidb[i]['flipped']: im = im[:, ::-1, :] if cfg.TRAIN.GAN: im_orig = im.astype(np.float32, copy=True) / 127.5 - 1 im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im_rescale = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_rescale.append(im_rescale) im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float( im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3)) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals depth = im_depth_raw.astype(np.float32, copy=True) / float( meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) blob_normal = im_list_to_blob(processed_ims_normal, 3) if cfg.TRAIN.GAN: blob_rescale = im_list_to_blob(processed_ims_rescale, 3) else: blob_rescale = [] return blob, blob_rescale, blob_depth, blob_normal, im_scales
def _get_image_blob(roidb, scale_ind): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] for i in xrange(num_images): # meta data meta_data = scipy.io.loadmat(roidb[i]['meta_data']) K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # depth raw im_depth_raw = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) height = im_depth_raw.shape[0] width = im_depth_raw.shape[1] # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 255 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: im = chromatic_transform(im) # mask the color image according to depth if cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth_raw == 0) im[I[0], I[1], :] = 0 if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float(im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:,:,np.newaxis], (1,1,3)) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals depth = im_depth_raw.astype(np.float32, copy=True) / float(meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) blob_normal = im_list_to_blob(processed_ims_normal, 3) return blob, blob_depth, blob_normal, im_scales