def _get_image_blob(roidb, target_size): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im[:, :, :].astype(np.float32, copy=True) # im_orig -= cfg.PIXEL_MEANS # changed to use pytorch models im_orig /= 255. # Convert range to [0,1] pixel_means = [0.485, 0.456, 0.406] im_orig -= pixel_means # Minus mean pixel_stdens = [0.229, 0.224, 0.225] im_orig /= pixel_stdens # divide by stddev # im_orig = im im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_seg_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_seglabel = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) seg_label_name = (roidb[i]['image']).replace( 'JPEGImages', 'SegmentationClass').replace('.jpg', '.png') seg_label = imread(seg_label_name, mode='P') if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] seg_label = seg_label[:, ::-1] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, seg_label, im_scale = prep_im_seg_for_blob(im, seg_label, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) processed_seglabel.append(seg_label) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) seg_blob = seg_list_to_blob(processed_ims) return blob, seg_blob, im_scales
def _get_image_blob(roidb, scale_inds,depth=False): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) depth_name = roidb[i]['image'].replace("JPEGImages","DepthImages") depth_val = imread(depth_name) depth_val = np.expand_dims(depth_val,-1) # st() # DepthImages/ if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] # st() if depth: im = np.concatenate([im,depth_val],-1) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] if depth: im, im_scale = prep_im_for_blob(im, cfg.DEPTH_MEANS, target_size, cfg.TRAIN.MAX_SIZE) else: im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, training): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): ''' This part might need to be changed delete cv2 related code, change to 2d if possible, of preferable ''' #im = cv2.imread(roidb[i]['image']) im = imageio.imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # 2d image to 3d image # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # flip height-wise target_size = cfg.TRAIN.SCALES[scale_inds[i]] # 1 is always expected im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, training) # im is resized with im_scale ratio im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # change image lists to blob. blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, transfrom): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, transfrom) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # print blob.shape # print blob # print im_scale_factors # raw_input('Continue?') return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image'], mode='RGB') if len(im.shape) == 0: im = np.zeros((roidb[i]['height'], roidb[i]['width'], 3)) if len(im.shape) != 3: pdb.set_trace() if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) if im.shape[2] > 3: im = im[:,:,:3] # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds = -1): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # rgb -> bgr im = im[:, :, ::-1] # flip the channel, since the original one using cv2 im = np.rot90(im, roidb[i]['rotated']) if roidb[i]['flipped']: im = im[:, ::-1, :] if scale_inds == -1: # origion code target_size = cfg.TRAIN.COMMON.INPUT_SIZE im, im_scale = prep_im_for_blob_fixed_size(im, cfg.PIXEL_MEANS, target_size) else: # origion code target_size = cfg.TRAIN.RCNN_COMMON.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.COMMON.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # print('num images, ', num_images) processed_ims = [] im_scales = [] for i in range(num_images): # im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # if there is four channels, remove the alpha channel if im.shape[-1] == 4: im = im[:, :, :-1] # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] # print('in get image blob') # print(im) # print(im.shape) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(self, im, frame_id): '''Convert image into network input. :param im: BGR nd.array :param frame_id: frame number in the given video :return image (frame) blob ''' im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) blob = im_list_to_blob(processed_ims) scales = np.array(im_scale_factors) blobs = {'data': blob} blobs['im_info'] = np.array( [[blob.shape[1], blob.shape[2], scales[0]]], dtype=np.float32) blobs['frame_number'] = np.array([[frame_id]]) return blobs
def get_image_blob(im): """Converts an image into a network input. Arguments: im: data of image Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_scales = [] processed_ims = [] scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1) target_size = cfg.TRAIN.SCALES[scale_inds[0]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image'], cv2.IMREAD_GRAYSCALE) #im = imread(roidb[i]['image']) #im = cv2.imread(roidb[i]['image'],-1) #print('im_shape') #print(im.shape) if len(im.shape) == 2: #print('im.shape==2') im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr #print(im.shape) im = im[:, :, ::-1] #print('image before mean subtraction') #print(im[:,:,0]) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """ Given an image, normalise and reshape it to size (600, x) where x<=800 @param img: BGR images (nd array) @return: blob, 4D array, (num_images, h_max, w_max, 3) im_scale_factors, 1D array of image scale_factor """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape # (h, w, 3) im_size_min = np.min(im_shape[0:2]) # w or h im_size_max = np.max(im_shape[0:2]) # w or h processed_ims = [] im_scale_factors = [] # reshape img size to (600, x) where x<=800 for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float( im_size_min) # scale = 600 / shorter_side(w/h) if np.round(im_scale * im_size_max ) > cfg.TEST.MAX_SIZE: # make sure the longer_size <= 1000 im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob (curtain) to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) # im = imread(roidb[i]['image']) im = imageio.imread( roidb[i]['image'] ) # ImportError: cannot import name 'imread' from 'scipy.misc' if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['ver_flipped']: im = im[::-1, :, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) # BGR im = imread(roidb[i]['image']) # RGB # tile channels for 1-channel images if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # drop the last channel for 4-channel images if im.shape[-1] == 4: im = im[:, :, :-1] # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """ load the image from local path, subtract pixel mean and resize the image :param roidb: annotation list [{}] for one image, the {} contains all labels :param scale_inds: [0] :return blob: an image 4D array (1, 3, h, w) im_scales: a float number """ num_images = len(roidb) # 1 processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) # im = imread(roidb[i]['image']) # if len(im.shape) == 2: # im = im[:,:,np.newaxis] # im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr # im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # subtract pixel mean and resize the image target_size = cfg.TRAIN.SCALES[scale_inds[i]] # 600 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, RGB, NIR, DEPTH): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) if RGB: im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if NIR | DEPTH: #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat') I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') if NIR: im = np.concatenate( (im, I_D['NIR_DEPTH_res_crop'][:, :, :1]), axis=2) if DEPTH: im = np.concatenate( (im, I_D['NIR_DEPTH_res_crop'][:, :, 1:]), axis=2) elif NIR: if not DEPTH: #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat') I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'][:, :, :1] im = np.concatenate((im, im, im), axis=2) else: #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat') I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'] im = np.concatenate((im, im), axis=2) elif DEPTH: #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat') I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'][:, :, 1:] im = np.concatenate((im, im, im), axis=2) else: print('Any color space was selected') #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat') #NIR = imread(roidb[i]['image'][:-11] + '_intensity_' + roidb[i]['image'][-6:-4] + '.jpg') #depth = imread(roidb[i]['image'][:-11] + '_depth_' + roidb[i]['image'][-6:-4] + '.jpg') #im = np.concatenate((im,I_D['NIR_DEPTH_res_crop']), axis=2) #im = np.concatenate((im, NIR[:,:,:1], depth[:,:,:1]), axis=2) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, RGB, NIR, DEPTH) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH) return blob, im_scales
def _get_image_blob_with_aug(roidb, scale_inds = -1, training = True): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] gt_boxes = None gt_boxes_keep = None gt_classes = None gt_grasps = None gt_grasps_keep = None if 'boxes' in roidb[0]: gt_boxes = [] gt_classes = [] gt_boxes_keep = [] if 'grasps' in roidb[0] and roidb[0]['grasps'].size > 0 : gt_grasps = [] gt_grasps_keep=[] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) boxes = None cls = None boxes_keep = None if 'boxes' in roidb[i]: boxes = np.array(roidb[i]['boxes'], dtype=np.int32) cls = roidb[i]['gt_classes'] boxes_keep = np.array(range(boxes.shape[0]), dtype=np.int32) grasps = None grasps_keep = None # grasps should be floats if 'grasps' in roidb[i] and roidb[i]['grasps'].size > 0: grasps = np.array(roidb[i]['grasps'], dtype=np.int32) grasps_keep = np.array(range(grasps.shape[0]), dtype=np.int32) # flip the channel, since the original one using cv2 im = np.rot90(im, roidb[i]['rotated']) if roidb[i]['flipped']: im = im[:, ::-1, :] im, boxes, cls, grasps, boxes_keep, grasps_keep = \ prep_im_for_blob_aug(im, boxes, cls, grasps, boxes_keep, grasps_keep, training) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # rgb -> bgr im = im[:, :, ::-1] # origion code if scale_inds == -1: target_size = cfg.TRAIN.COMMON.INPUT_SIZE im, im_scale = prep_im_for_blob_fixed_size(im, cfg.PIXEL_MEANS, target_size) else: target_size = cfg.TRAIN.RCNN_COMMON.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.COMMON.MAX_SIZE) processed_ims.append(im) im_scales.append(im_scale) if gt_boxes is not None: gt_boxes.append(np.array(boxes, dtype=np.int32)) gt_classes.append(cls) gt_boxes_keep.append(np.array(boxes_keep,dtype=np.uint16)) if gt_grasps is not None: gt_grasps.append(np.array(grasps, dtype=np.int32)) gt_grasps_keep.append(np.array(grasps_keep,dtype=np.uint16)) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, gt_boxes, gt_classes, gt_grasps, gt_boxes_keep, gt_grasps_keep
def __getitem__(self, index): """ get item by index of segment """ # parse segment index to video id and video-level segment id. vid_index, vid_id, seg_ind = self.parse_index(index) # get segment entities if len(self.entity_type) == 2: entities = self.word_dict[vid_id][self.entity_type[0]][seg_ind] sent = self.word_dict[vid_id][self.entity_type[1]][seg_ind][:20] else: entities = self.word_dict[vid_id][self.entity_type[0]][seg_ind] # get segment images (read n images in series) vid_path = self.vid_paths[vid_index] image_list = glob.glob(os.path.join(vid_path, '*.jpg')) image_lists = self.div_imglst_by_name(image_list) # now we only can parse frame by name (reconstruct the name) image_list = image_lists[seg_ind] f_inds = self.get_frm_inds(image_list) image_list = [image_list[f_ind] for f_ind in f_inds] imgs = [] img_paths = [] ################### #Box from OIdetect# ################### DetectBox_path = [] DetectBox_class = [] DetectBox_score = [] DetectBox = [] if self.phase == 'train': for i, img_path in enumerate(image_list): # read image img = cv2.imread(img_path) img = img.astype(np.float32, copy=True) img -= 127.5 # resize image if img.shape[0] != self.args.img_h or img.shape[ 1] != self.args.img_w: img = cv2.resize(img, (self.args.img_h, self.args.img_w)) # append image to ims imgs.append(img) img_paths.append(img_path) # get box info box_path = img_path.split('.')[0] + ".txt" #print("loading ", box_path) DetectBox_path.append(box_path) with open(box_path, 'rb') as handle: info = pickle.load(handle) #print(info) temp_class = [] temp_score = [] temp_box = [] if len(info) > 0: for eachinfo in info: temp_class.append(eachinfo[0].lower()) temp_score.append(eachinfo[1]) temp_box.append(eachinfo[2]) DetectBox_class.append(temp_class) DetectBox_score.append(temp_score) DetectBox.append(temp_box) # transfer to blob (batch, 3, h, w) # preclude the condition that no entity in such action blob = im_list_to_blob(imgs) # blob = blob.transpose(0, 3, 1, 2)[0] # new video: true if current seg_ind is the last segment in a video, else false new_vid = True if self.seg_accumulate_num[ vid_index] + seg_ind in self.seg_accumulate_num else False # get action_length action_length = self.actions_length[vid_index] action_ind = seg_ind # yeild image blob, word entity, image_path and new video flag if len(self.entity_type) == 2: return blob, entities, sent, img_paths, new_vid, action_length, action_ind, DetectBox_path, DetectBox_class, DetectBox_score, DetectBox elif len(self.entity_type) == 1: return blob, entities, img_paths, new_vid, action_length, action_ind, DetectBox_path, DetectBox_class, DetectBox_score, DetectBox else: for i, img_path in enumerate(image_list): # read image img = cv2.imread(img_path) img = img.astype(np.float32, copy=True) img -= 127.5 # resize image if img.shape[0] != self.args.img_h or img.shape[ 1] != self.args.img_w: img = cv2.resize(img, (self.args.img_h, self.args.img_w)) # append image to ims imgs.append(img) img_paths.append(img_path) # transfer to blob (batch, 3, h, w) # preclude the condition that no entity in such action blob = im_list_to_blob(imgs) # blob = blob.transpose(0, 3, 1, 2)[0] # new video: true if current seg_ind is the last segment in a video, else false new_vid = True if self.seg_accumulate_num[ vid_index] + seg_ind in self.seg_accumulate_num else False # get action_length action_length = self.actions_length[vid_index] action_ind = seg_ind # yeild image blob, word entity, image_path and new video flag if len(self.entity_type) == 2: return blob, entities, sent, img_paths, new_vid, action_length, action_ind elif len(self.entity_type) == 1: return blob, entities, img_paths, new_vid, action_length, action_ind
def _get_image_blob(im, RGB, NIR, DEPTH): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) #im_orig -= cfg.PIXEL_MEANS pixel_means = cfg.PIXEL_MEANS if RGB: p_means = pixel_means[:, :, :3] if NIR: p_means = np.concatenate((p_means, pixel_means[:, :, 3:4]), axis=2) if DEPTH: p_means = np.concatenate((p_means, pixel_means[:, :, 4:5]), axis=2) elif NIR: if not DEPTH: p_means = np.concatenate( (pixel_means[:, :, 3:4], pixel_means[:, :, 3:4], pixel_means[:, :, 3:4]), axis=2) else: p_means = np.concatenate( (pixel_means[:, :, 3:5], pixel_means[:, :, 3:5]), axis=2) elif DEPTH: p_means = np.concatenate( (pixel_means[:, :, 4:5], pixel_means[:, :, 4:5], pixel_means[:, :, 4:5]), axis=2) else: print('Any color space was selected') im_orig -= p_means im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH) return blob, np.array(im_scale_factors)
def load_query(self, choice, id=0): if self.training: # Random choice query catgory image all_data = self._query[choice] # data = random.choice(all_data) # todo: check changed code is acceptable. while True: data = random.choice(all_data) if int(data['boxes'][1]) == int(data['boxes'][3]) or int( data['boxes'][0]) == int(data['boxes'][2]): continue else: break else: # Take out the purpose category for testing catgory = self.cat_list[choice] # list all the candidate image all_data = self._query[catgory] # Use image_id to determine the random seed # The list l is candidate sequence, which random by image_id random.seed(id) l = list(range(len(all_data))) random.shuffle(l) # choose the candidate sequence and take out the data information position = l[self.query_position % len(l)] data = all_data[position] # Get image path = data['image_path'] im = imread(path) # todo: check changed code is acceptable. # check_zero = True # while check_zero: # path = data['image_path'] # im = imread(path) # if 0 not in im.shape[0:3]: # check_zero = False # break # elif 0 in im.shape[0:3]: # data = random.choice(all_data) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) im = crop(im, data['boxes'], cfg.TRAIN.query_size) # flip the channel, since the original one using cv2 # rgb -> bgr # im = im[:,:,::-1] if random.randint(0, 99) / 100 > 0.5 and self.training: im = im[:, ::-1, :] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TRAIN.query_size, cfg.TRAIN.MAX_SIZE) query = im_list_to_blob([im]) return query
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # print("num iamges{}".format(num_images)) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # im = Random_crop(roidb, im, i) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales # def Random_crop(roidb, im, index): # image = im # annots = roidb[index]["boxes"] # if not annots.shape[0]: # return image # if random.choice([0, 1]): # return image # else: # rows, cols, cns = image.shape # flag = 0 # while True: # flag += 1 # if flag > 10: # return image # crop_ratio = random.uniform(0.5, 1) # rows_zero = int(rows * random.uniform(0, 1 - crop_ratio)) # cols_zero = int(cols * random.uniform(0, 1 - crop_ratio)) # crop_rows = int(rows * crop_ratio) # crop_cols = int(cols * crop_ratio) # ''' # new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :] # new_image = cv2.resize(new_image, (cols, rows)) # #new_image = skimage.transform.resize(new_image, (rows, cols)) # new_annots = np.zeros((0, 5)) # for i in range(annots.shape[0]): # x1 = max(annots[i, 0] - cols_zero, 0) # y1 = max(annots[i, 1] - rows_zero, 0) # x2 = min(annots[i, 2] - cols_zero, crop_cols) # y2 = min(annots[i, 3] - rows_zero, crop_rows) # label = annots[i, 4] # if x1 + 10 < x2 and y1 + 10 < y2: # x1 /= crop_ratio # y1 /= crop_ratio # x2 /= crop_ratio # y2 /= crop_ratio # new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0) # if not new_annots.shape[0]: # continue # ''' # new_image = np.zeros((rows , cols , cns)) # new_image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :] = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :] # im = new_image # # new_annots = np.zeros((0, 4)) # NUM_CLASS = 2 # new_annots = np.zeros((0, 4), dtype=np.uint16) # gt_classes = np.zeros((0), dtype=np.int32) # overlaps = np.zeros((0, NUM_CLASS), dtype=np.float32) # max_classes = np.zeros((0), dtype=np.int64) # max_overlaps = np.zeros((0), dtype=np.float32) # # seg_areas = np.zeros((0), dtype=np.float32) # for i in range(annots.shape[0]): # x1 = max(cols_zero, annots[i, 0]) # y1 = max(rows_zero, annots[i, 1]) # x2 = min(cols_zero+crop_cols, annots[i, 2]) # y2 = min(rows_zero+crop_rows, annots[i, 3]) # if x1+10 < x2 and y1+10 < y2: # new_annots = np.append(new_annots, np.array([[x1,y1,x2,y2]]), axis=0) # gt_classes = np.append(gt_classes,roidb[index]['gt_classes'][i]) # if roidb[index]['gt_overlaps'].data[i] <= 0: # # Set overlap to -1 for all classes for crowd objects # # so they will be excluded during training # tmp_overlap = np.zeros((1, NUM_CLASS), dtype=np.float32) # tmp_overlap[0,:] = -1.0 # overlaps = np.append(overlaps, tmp_overlap) # # overlaps[ix, :] = -1.0 # else: # tmp_overlap = np.zeros((1, NUM_CLASS), dtype=np.float32) # tmp_overlap[0,gt_classes] = 1.0 # overlaps = np.append(overlaps, tmp_overlap) # # overlaps[ix, cls] = 1.0 # max_classes = np.append(max_classes, roidb[index]['max_classes'][i]) # max_overlaps = np.append(max_overlaps, roidb[index]['max_overlaps'][i]) # if not new_annots.shape[0]: # continue # overlaps = scipy.sparse.csr_matrix(overlaps) # roidb[index]['boxes'] = new_annots # roidb[index]['gt_classes'] = gt_classes # roidb[index]['gt_overlaps'] = overlaps # roidb[index]['max_classes'] = max_classes # roidb[index]['max_overlaps'] = max_overlaps # roidb[index]['height'] = new_image.shape[0] # roidb[index]['width'] = new_image.shape[1] # return new_image
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] assert isinstance(cfg.SHIFT_X, int) and isinstance(cfg.SHIFT_X, int), \ 'wrong shift number, please check' for i in range(num_images): im = [] # the reference and sensed modality for j in range(2): im.append(imread(roidb[i]['image'][j])) if len(im[j].shape) == 2: im[j] = im[j][:, :, np.newaxis] im[j] = np.concatenate((im[j], im[j], im[j]), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im[j] = im[j][:, :, ::-1] if j == 1 and (cfg.SHIFT_X != 0 or cfg.SHIFT_Y != 0): new_img = np.zeros(im[j].shape) if cfg.SHIFT_X > 0: if cfg.SHIFT_Y > 0: new_img[:-cfg.SHIFT_Y, cfg.SHIFT_X:, :] = im[j][ cfg.SHIFT_Y:, :-cfg.SHIFT_X, :] elif cfg.SHIFT_Y < 0: new_img[-cfg.SHIFT_Y:, cfg.SHIFT_X:, :] = im[j][:cfg.SHIFT_Y, :-cfg. SHIFT_X, :] else: new_img[:, cfg.SHIFT_X:, :] = im[j][:, :-cfg.SHIFT_X, :] elif cfg.SHIFT_X < 0: if cfg.SHIFT_Y > 0: new_img[:-cfg.SHIFT_Y, :cfg.SHIFT_X, :] = im[j][ cfg.SHIFT_Y:, -cfg.SHIFT_X:, :] elif cfg.SHIFT_Y < 0: new_img[-cfg.SHIFT_Y:, :cfg. SHIFT_X, :] = im[j][:cfg.SHIFT_Y, -cfg.SHIFT_X:, :] else: new_img[:, :cfg.SHIFT_X, :] = im[j][:, -cfg.SHIFT_X:, :] else: if cfg.SHIFT_Y > 0: new_img[:-cfg.SHIFT_Y, :, :] = im[j][ cfg.SHIFT_Y:, :, :] elif cfg.SHIFT_Y < 0: new_img[ -cfg.SHIFT_Y:, :, :] = im[j][:cfg.SHIFT_Y, :, :] else: pass im[j] = new_img if roidb[i]['flipped']: im[j] = im[j][:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im[j], im_scale = prep_im_for_blob(im[j], cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales