def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims_left = [] processed_ims_right = [] im_scales = [] for i in range(num_images): img_left = cv2.imread(roidb[i]['img_left']) img_right = cv2.imread(roidb[i]['img_right']) if roidb[i]['flipped']: img_left_flip = img_right[:, ::-1, :].copy() img_right = img_left[:, ::-1, :].copy() img_left = img_left_flip target_size = cfg.TRAIN.SCALES[scale_inds[i]] img_left, img_right, im_scale = prep_im_for_blob(img_left, img_right, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims_left.append(img_left) processed_ims_right.append(img_right) # Create a blob to hold the input images blob_left, blob_right = im_list_to_blob(processed_ims_left, processed_ims_right) return blob_left, blob_right, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _imagePreprocess(self, blob, fix_size = False): assert not fix_size, "When grasp labels are included, the input image can not be fixed-size." keep_b = np.arange(blob['gt_boxes'].shape[0]) keep_g = np.arange(blob['gt_grasps'].shape[0]) if self.augmentation: blob['data'] = self.augImageOnly(blob['data']) blob['data'], blob['gt_boxes'], blob['gt_grasps'], keep_b, keep_g = \ self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], grasps=blob['gt_grasps'], boxes_keep=keep_b, grasps_keep=keep_g) # choose one predefined size, TODO: support multi-instance batch random_scale_ind = np.random.randint(0, high=len(cfg.SCALES)) blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size) blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1]) blob['im_info'][2:4] = (im_scale['y'], im_scale['x']) # modify bounding boxes according to resize parameters blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x'] blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y'] blob['gt_grasps'][:, 0::2] *= im_scale['x'] blob['gt_grasps'][:, 1::2] *= im_scale['y'] blob['gt_grasp_inds'] = blob['gt_grasp_inds'][keep_g] blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS) blob['node_inds'] = blob['node_inds'][keep_b] blob['parent_lists'] = [blob['parent_lists'][p_ind] for p_ind in list(keep_b)] blob['child_lists'] = [blob['child_lists'][c_ind] for c_ind in list(keep_b)] return blob
def _get_image_blob(roidb, target_size): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_shapes = np.zeros((0, 2), dtype=np.float32) for i in range(num_images): img_path = roidb[i]['image'] im = cv2.imread(roidb[i]['image']) target_size = cfg.TRAIN.SCALES[scale_inds[i]] if roidb[i]['flipped']: im = im[:, ::-1, :] im, im_scale, im_shape = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) im_shapes = np.vstack((im_shapes, im_shape)) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_shapes
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] # normalize (minus the mean) and scale, return the scaled_img & scale im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # im_list_to_blob has zero padding for different size of imgs blob = im_list_to_blob(processed_ims) # a np.array image of [B, H, W, C] return blob, im_scales # batch of imgs, list of scales
def support_im_preprocess(im_list, cfg, support_im_size): n_of_shot = len(im_list) support_data_all = np.zeros( (n_of_shot, 3, support_im_size, support_im_size), dtype=np.float32) for i, im in enumerate(im_list): im = im[:, :, ::-1] # rgb -> bgr target_size = np.min(im.shape[0:2]) # don't change the size im, _ = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) _h, _w = im.shape[0], im.shape[1] if _h > _w: resize_scale = float(support_im_size) / float(_h) unfit_size = int(_w * resize_scale) im = cv2.resize(im, (unfit_size, support_im_size), interpolation=cv2.INTER_LINEAR) else: resize_scale = float(support_im_size) / float(_w) unfit_size = int(_h * resize_scale) im = cv2.resize(im, (support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR) h, w = im.shape[0], im.shape[1] support_data_all[i, :, :h, :w] = np.transpose(im, (2, 0, 1)) support_data = torch.from_numpy(support_data_all).unsqueeze(0) return support_data
def __getitem__(self, index): # testing index_ratio = index # though it is called minibatch, in fact it contains only one img here minibatch_db = [self._roidb[index_ratio]] # load query blobs = get_minibatch(minibatch_db) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # (H, W, scale) data_height, data_width = data.size(1), data.size(2) data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.from_numpy(blobs['gt_boxes']) num_boxes = gt_boxes.size(0) all_cls_in_im = [] for i in range(num_boxes): _cls = int(gt_boxes[i, 4]) all_cls_in_im.append(_cls) all_cls_in_im = list(set(all_cls_in_im)) if len(all_cls_in_im) > self.num_way: random.seed(self.epi_random_seed) # fix selected_ways = random.sample(all_cls_in_im, k=self.num_way) else: other_cls = list(range(self._num_classes)) other_cls.remove(0) for _cls_ind in all_cls_in_im: other_cls.remove(_cls_ind) random.seed(self.epi_random_seed) # fix random_neg_cls = random.sample(other_cls, k=(self.num_way - len(all_cls_in_im))) selected_ways = all_cls_in_im selected_ways.extend(random_neg_cls) # get supports support_data_all = np.zeros((self.testing_shot * self.num_way, 3, self.support_im_size, self.support_im_size), dtype=np.float32) for n in range(self.num_way): selected_supports = self.support_pool[selected_ways[n]] for i, _path in enumerate(selected_supports): support_im = imread(_path)[:,:,::-1] # rgb -> bgr target_size = np.min(support_im.shape[0:2]) # don't change the size support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) _h, _w = support_im.shape[0], support_im.shape[1] if _h > _w: resize_scale = float(self.support_im_size) / float(_h) unfit_size = int(_w * resize_scale) support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR) else: resize_scale = float(self.support_im_size) / float(_w) unfit_size = int(_h * resize_scale) support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR) h, w = support_im.shape[0], support_im.shape[1] support_data_all[self.testing_shot*n+i, :, :h, :w] = np.transpose(support_im, (2, 0, 1)) supports = torch.from_numpy(support_data_all) return data, im_info, gt_boxes, num_boxes, supports, selected_ways
def _get_image_blob(roidb, scale_inds, RGB, NIR, DEPTH): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): if RGB: im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if NIR | DEPTH: I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') if NIR: im = np.concatenate( (im, I_D['NIR_DEPTH_res_crop'][:, :, :1]), axis=2) if DEPTH: im = np.concatenate( (im, I_D['NIR_DEPTH_res_crop'][:, :, 1:]), axis=2) elif NIR: if not DEPTH: I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'][:, :, :1] im = np.concatenate((im, im, im), axis=2) else: I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'] im = np.concatenate((im, im), axis=2) elif DEPTH: I_D = scipy.io.loadmat(roidb[i]['image'][:87] + '_intensity_depth.mat') im = I_D['NIR_DEPTH_res_crop'][:, :, 1:] im = np.concatenate((im, im, im), axis=2) else: print('Any color space was selected') if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, RGB, NIR, DEPTH) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH) return blob, im_scales
def _get_clip_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ # print(roidb) clip_len = 8 num_center_images = len(roidb) processed_clips = [] im_scales = [] for i in xrange(num_center_images): numf = roidb[i]['numf'] key_frame = roidb[i]['image'] key_frame_root_dir = key_frame[:-16] key_frame = key_frame.split('/')[-1] center_index = int(key_frame[5:-4]) clip = [] for j in range(clip_len): if center_index - clip_len // 2 + j > 0 and center_index - clip_len // 2 + j < int( numf): im_path = os.path.join( key_frame_root_dir, "frame{:06d}.jpg".format(center_index - clip_len // 2 + j)) elif center_index - clip_len // 2 + j <= 0: im_path = os.path.join(key_frame_root_dir, "frame{:06d}.jpg".format(1)) else: im_path = os.path.join(key_frame_root_dir, "frame{:06d}.jpg".format(int(numf))) im = imread(im_path) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) clip.append(im) im_scales.append(im_scale) processed_clips.append(clip) # Create a blob to hold the input clips blob = clip_list_to_blob(processed_clips, clip_len) return blob, im_scales
def _get_image_blob(roidb, scale_inds,depth=False): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) depth_name = roidb[i]['image'].replace("JPEGImages","DepthImages") depth_val = imread(depth_name) depth_val = np.expand_dims(depth_val,-1) # st() # DepthImages/ if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] # st() if depth: im = np.concatenate([im,depth_val],-1) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] if depth: im, im_scale = prep_im_for_blob(im, cfg.DEPTH_MEANS, target_size, cfg.TRAIN.MAX_SIZE) else: im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def query_im_preprocess(im_data, cfg): target_size = cfg.TRAIN.SCALES[0] im_data, im_scale = prep_im_for_blob(im_data, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_data = torch.from_numpy(im_data) im_info = np.array([[im_data.shape[0], im_data.shape[1], im_scale]], dtype=np.float32) im_info = torch.from_numpy(im_info) gt_boxes = torch.from_numpy(np.array([0])) num_boxes = torch.from_numpy(np.array([0])) query = im_data.permute(2, 0, 1).contiguous().unsqueeze(0) return query, im_info, gt_boxes, num_boxes
def _get_video_blob(roidb, scale_inds): """Builds an input blob from the videos in the roidb at the specified scales. """ processed_videos = [] video_scales = [] for i, item in enumerate(roidb): # just one scale implementated video_length = cfg.TRAIN.LENGTH[scale_inds[0]] video = np.zeros( (video_length, cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE, 3)) #if cfg.INPUT == 'video': j = 0 #random_idx = [np.random.randint(cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE), # np.random.randint(cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE)] image_w, image_h, crop_w, crop_h = cfg.TRAIN.FRAME_SIZE[ 1], cfg.TRAIN.FRAME_SIZE[ 0], cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) random_idx = offsets[npr.choice(len(offsets))] if DEBUG: print("offsets: {}, random_idx: {}".format(offsets, random_idx)) for video_info in item['frames']: prefix = item['fg_name'] if video_info[0] else item['bg_name'] step = video_info[3] if cfg.INPUT == 'video' else 1 for idx in range(video_info[1], video_info[2], video_info[3]): frame = cv2.imread('%s/image_%s.jpg' % (prefix, str(idx + 1).zfill(5))) frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx) if item['flipped']: frame = frame[:, ::-1, :] if DEBUG: cv2.imshow('frame', frame / 255.0) cv2.waitKey(0) cv2.destroyAllWindows() video[j] = frame j = j + 1 # padding for the same length while (j < video_length): video[j] = frame j = j + 1 processed_videos.append(video) # Create a blob to hold the input images blob = video_list_to_blob(processed_videos) return blob
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) im_scales = [] processed_ims = [] processed_dps = [] for i in range(num_images): im = imread(roidb[i]['image']) dp = np.load(roidb[i]['depth']) # dp = np.zeros((im.shape[0], im.shape[1], 7)) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] dp = dp[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) dp, de_scale = prep_im_for_blob(dp, cfg.DEPTH_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) processed_dps.append(dp) # Create a blob to hold the input images im_blob = im_list_to_blob(processed_ims, 3) dp_blob = im_list_to_blob(processed_dps, 7) return im_blob, dp_blob, im_scales
def _get_image_blob(roidb, scale_inds, augment=False, seed=2020): """Builds an input blob from the images in the roidb at the specified scales. """ assert len(roidb) == 1, "Single batch only" # gt boxes: (x1, y1, x2, y2, cls) if cfg.TRAIN.USE_ALL_GT: # Include all ground truth boxes gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] else: # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' gt_inds = np.where( (roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] # gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): # im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) # print(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # data augmentation if augment: im, gt_boxes = augmentor(im, gt_boxes, seed=seed) # imsave("target_aug.jpg", im[:, :, ::-1]) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) gt_boxes[:, 0:4] = gt_boxes[:, 0:4] * im_scale # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, gt_boxes
def get_video_blob(roidb): """Builds an input blob from the videos in the roidb at the specified scales. """ processed_videos = [] item = roidb for key in item: print(key, ": ", item[key]) video_length = cfg.TRAIN.LENGTH[0] video = np.zeros( (video_length, cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE, 3)) j = 0 random_idx = [ int((cfg.TRAIN.FRAME_SIZE[1] - cfg.TRAIN.CROP_SIZE) / 2), int((cfg.TRAIN.FRAME_SIZE[0] - cfg.TRAIN.CROP_SIZE) / 2) ] for video_info in item['frames']: step = video_info[3] if cfg.INPUT == 'video' else 1 prefix = item['fg_name'] if video_info[0] else item['bg_name'] for idx in xrange(video_info[1], video_info[2], step): frame = cv2.imread('%s/image_%s.jpg' % (prefix, str(idx + 1).zfill(5))) frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx) if item['flipped']: frame = frame[:, ::-1, :] if DEBUG: cv2.imshow('frame', frame / 255.0) cv2.waitKey(0) cv2.destroyAllWindows() video[j] = frame j = j + 1 # padding for the same length while (j < video_length): video[j] = frame j = j + 1 processed_videos.append(video) # Create a blob to hold the input images blob = video_list_to_blob(processed_videos) return torch.from_numpy(blob)
def _imagePreprocess(self, blob, fix_size = True): keep = np.arange(blob['gt_grasps'].shape[0]) if self.augmentation: blob['data'] = self.augImageOnly(blob['data']) blob['data'], _, blob['gt_grasps'], _, _ = self.augmGraspdet(image=blob['data'], grasps=blob['gt_grasps'], grasps_keep=keep) # choose one predefined size, TODO: support multi-instance batch random_scale_ind = np.random.randint(0, high=len(cfg.SCALES)) blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size) blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1]) blob['im_info'][2:4] = (im_scale['y'], im_scale['x']) blob['gt_grasps'][:, 0::2] *= im_scale['x'] blob['gt_grasps'][:, 1::2] *= im_scale['y'] blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS) return blob
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_dls = [] # processed drive line segmentation GT im_scales = [] # dl_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) # print("_get_image_blob() roidb[i]['image']", roidb[i]['image']) # Add by Jie, Read drive line mask dl = imread(roidb[i]['gt_line_mask']) dl = dl.astype(np.uint8) # print("_get_image_blob() roidb[i]['gt_line_mask']", roidb[i]['gt_line_mask']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] dl = dl[:, ::-1] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) dl, dl_scale = prep_dl_for_blob(dl, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) # dl_scales.append(dl_scale) processed_ims.append(im) processed_dls.append(dl) # Create a blob to hold the input images blob_im = im_list_to_blob(processed_ims) blob_dl = dl_list_to_blob(processed_dls) return blob_im, im_scales, blob_dl # , dl_scales
def __getitem__(self, index): # testing index_ratio = index # though it is called minibatch, in fact it contains only one img here minibatch_db = [self._roidb[index_ratio]] # load query blobs = get_minibatch(minibatch_db) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # (H, W, scale) data_height, data_width = data.size(1), data.size(2) data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.from_numpy(blobs['gt_boxes']) num_boxes = gt_boxes.size(0) # get supports support_data_all = np.zeros( (self.testing_shot, 3, self.support_im_size, self.support_im_size), dtype=np.float32) current_gt_class_id = int(gt_boxes[0][4]) selected_supports = self.support_pool[current_gt_class_id] for i, _path in enumerate(selected_supports): support_im = imread(_path)[:, :, ::-1] # rgb -> bgr target_size = np.min( support_im.shape[0:2]) # don't change the size support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) _h, _w = support_im.shape[0], support_im.shape[1] if _h > _w: resize_scale = float(self.support_im_size) / float(_h) unfit_size = int(_w * resize_scale) support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR) else: resize_scale = float(self.support_im_size) / float(_w) unfit_size = int(_h * resize_scale) support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR) h, w = support_im.shape[0], support_im.shape[1] support_data_all[i, :, :h, :w] = np.transpose( support_im, (2, 0, 1)) supports = torch.from_numpy(support_data_all) return data, im_info, gt_boxes, num_boxes, supports
def load_query(self, choice, id=0): if self.training: # Random choice query catgory image all_data = self._query[choice] data = random.choice(all_data) else: # Take out the purpose category for testing catgory = self.cat_list[choice] # list all the candidate image all_data = self._query[catgory] # Use image_id to determine the random seed # The list l is candidate sequence, which random by image_id random.seed(id) l = list(range(len(all_data))) random.shuffle(l) # print ("l:", l) # choose the candidate sequence and take out the data information # position=l[self.query_position%len(l)] position = l[0] data = all_data[position] # Get image path = data['image_path'] im = imread(path) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) im = crop(im, data['boxes'], cfg.TRAIN.query_size) # flip the channel, since the original one using cv2 # rgb -> bgr # im = im[:,:,::-1] if random.randint(0, 99) / 100 > 0.5 and self.training: im = im[:, ::-1, :] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TRAIN.query_size, cfg.TRAIN.MAX_SIZE) query = im_list_to_blob([im]) return query
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = fits.open(roidb[i]['image'], ignore_missing_end=True)[0].data ### use log transpoze # im = np.log(1 + np.abs(im)) ### make normalization by liuqiang max_value = np.max(im) min_value = np.min(im) mean_value = np.mean(im) im = (im - mean_value)/(max_value - min_value) H = im.shape[0] W = im.shape[1] if len(im.shape) == 2: im = im[:,:,np.newaxis] im_empty = np.zeros((H,W),dtype=float) im_empty = im_empty[:,:,np.newaxis] im = np.concatenate((im,im,im),axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ # 有几张图片,根据输入->只有一张 num_images = len(roidb) processed_ims = [] im_scales = [] # 只有一张 for i in range(num_images): #im = cv2.imread(roidb[i]['image']) 因为版本问题进行修改 # 读取字典中image的键值 -> 文件的路径,读取图片 im = imageio.imread(roidb[i]['image']) # 如果图像是二维(无色彩信息) if len(im.shape) == 2: # 增加了第三个维度 im = im[:, :, np.newaxis] #对第三个维度进行扩展(为了程序兼容2维图像) im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr # 使im倒叙(对第三个通道),(特殊用法[i:j:s(步长)]) # 为了兼容cv2 im = im[:, :, ::-1] # 如果需要反转对第二通道进行倒叙 if roidb[i]['flipped']: im = im[:, ::-1, :] # 获取短边像素 target_size = cfg.TRAIN.SCALES[scale_inds[i]] #(cfg.PIXEL_MEANS)是像素均值,(cfg.TRAIN.MAX_SIZE)是长边像素 # 返回缩放后的图片和缩放比 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # 形成缩放列表 im_scales.append(im_scale) # 形成图片表 processed_ims.append(im) # 其实这里列表中也就只有一个元素,这么做可能是为了兼容性?? # Create a blob to hold the input images # 得到图片的np数组 blob = im_list_to_blob(processed_ims) # 返回图片的np数组,和缩放比 return blob, im_scales
def _get_image_blob(roidb, scale_inds): num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['file_path']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) blob = im_list_to_blob(processed_ims) return blob, im_scales
def _imagePreprocess(self, blob, fix_size=True): keep = np.arange(blob['gt_boxes'].shape[0]) if self.augmentation: blob['data'] = self.augImageOnly(blob['data']) blob['data'], blob['gt_boxes'], _, keep, _ = self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], boxes_keep=keep) # choose one predefined size, TODO: support multi-instance batch random_scale_ind = np.random.randint(0, high=len(cfg.SCALES)) blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size) # modify bounding boxes according to resize parameters blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1]) blob['im_info'][2:4] = (im_scale['y'], im_scale['x']) blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x'] blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y'] blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS) blob['node_inds'] = blob['node_inds'][keep] blob['parent_lists'] = [blob['parent_lists'][p_ind] for p_ind in list(keep)] blob['child_lists'] = [blob['child_lists'][c_ind] for c_ind in list(keep)] return blob
def _get_image_blob(roidb, scale_inds, training): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): ''' This part might need to be changed delete cv2 related code, change to 2d if possible, of preferable ''' #im = cv2.imread(roidb[i]['image']) im = imageio.imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:,:,np.newaxis] im = np.concatenate((im,im,im), axis=2) # 2d image to 3d image # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # flip height-wise target_size = cfg.TRAIN.SCALES[scale_inds[i]] # 1 is always expected im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, training) # im is resized with im_scale ratio im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # change image lists to blob. blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, transfrom): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, transfrom) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def prepare_im_func(prefix, random_idx, frame_idx, flipped): frame_path = os.path.join(prefix, 'image_'+str(frame_idx).zfill(5)+'.jpg') frame = cv2.imread(frame_path) # process the boundary frame if frame is None: frames = sorted(os.listdir(prefix)) frame_path = os.path.join(prefix, frames[-1]) frame = cv2.imread(frame_path) frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx) if flipped: frame = frame[:, ::-1, :] if DEBUG: cv2.imshow('frame', frame/255.0) cv2.waitKey(0) cv2.destroyAllWindows() return frame
def _imagePreprocess(self, blob, fix_size=True): keep = np.arange(blob['gt_boxes'].shape[0]) if self.augmentation: if self.augImageOnly is not None: blob['data'] = self.augImageOnly(blob['data']) if self.augObjdet is not None: blob['data'], blob['gt_boxes'], _, _, _ = \ self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], boxes_keep=keep) # choose one predefined size, TODO: support multi-instance batch random_scale_ind = np.random.randint(0, high=len(cfg.SCALES)) blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size) # modify bounding boxes according to resize parameters blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1]) blob['im_info'][2:4] = (im_scale['y'], im_scale['x']) blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x'] blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y'] blob['data'] = image_normalize(blob['data'], mean=self.pixel_means, std=self.pixel_stds) return blob
def get_image_blob(im): """Converts an image into a network input. Arguments: im: data of image Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_scales = [] processed_ims = [] scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1) target_size = cfg.TRAIN.SCALES[scale_inds[0]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """ load the image from local path, subtract pixel mean and resize the image :param roidb: annotation list [{}] for one image, the {} contains all labels :param scale_inds: [0] :return blob: an image 4D array (1, 3, h, w) im_scales: a float number """ num_images = len(roidb) # 1 processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) # im = imread(roidb[i]['image']) # if len(im.shape) == 2: # im = im[:,:,np.newaxis] # im = np.concatenate((im,im,im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr # im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # subtract pixel mean and resize the image target_size = cfg.TRAIN.SCALES[scale_inds[i]] # 600 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_video_blob(roidb, scale_inds, phase='train', step_frame=1, length_support=768): """Builds an input blob from the videos in the roidb at the specified scales. """ processed_videos = [] for i, item in enumerate(roidb): # just one scale implementated video_length = length_support video = np.zeros((video_length, cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE, 3)) j = 0 if phase == 'train': random_idx = [np.random.randint(cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE), np.random.randint(cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE)] # TODO: data argumentation #image_w, image_h, crop_w, crop_h = cfg.TRAIN.FRAME_SIZE[1], cfg.TRAIN.FRAME_SIZE[0], cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE #offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) #random_idx = offsets[ npr.choice(len(offsets)) ] else: random_idx = [int((cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE) / 2), int((cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE) / 2)] if DEBUG: print ("offsets: {}, random_idx: {}".format(offsets, random_idx)) video_info = item['frames'][0] #for video_info in item['frames']: step = step_frame prefix = item['fg_name'] if video_info[0] else item['bg_name'] if cfg.TEMP_SPARSE_SAMPLING: if phase == 'train': segment_offsets = npr.randint(step, size=len(range(video_info[1], video_info[2], step))) else: segment_offsets = np.zeros(len(range(video_info[1], video_info[2], step))) + step // 2 else: segment_offsets = np.zeros(len(range(video_info[1], video_info[2], step))) times = math.ceil((video_info[2]-video_info[1])/length_support) for i, idx in enumerate(range(video_info[1], video_info[2], times*step)): frame_idx = int(segment_offsets[i]+idx+1) frame_path = os.path.join(prefix, 'image_'+str(frame_idx).zfill(5)+'.jpg') frame = cv2.imread(frame_path) # process the boundary frame if frame is None: frames = sorted(os.listdir(prefix)) frame_path = os.path.join(prefix, frames[-1]) frame = cv2.imread(frame_path) # crop to 112 with a random offset frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx) if item['flipped']: frame = frame[:, ::-1, :] if DEBUG: cv2.imshow('frame', frame/255.0) cv2.waitKey(0) cv2.destroyAllWindows() video[j] = frame j = j + 1 video[j:video_length] = video[j-1] processed_videos.append(video) # Create a blob to hold the input images, dimension trans CLHW blob = video_list_to_blob(processed_videos) return blob