def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ #pdb.set_trace() num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): imT = cv2.imread(roidb[i]['imageT']) imRGB = cv2.imread(roidb[i]['imageRGB']) if roidb[i]['flipped']: imT = imT[:, ::-1, :] imRGB = imRGB[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] imT, im_scale = prep_im_for_blob(imT, cfg.PIXEL_MEANS_T, target_size, cfg.TRAIN.MAX_SIZE) imRGB, im_scale = prep_im_for_blob(imRGB, cfg.PIXEL_MEANS_RGB, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(imT) processed_ims.append(imRGB) # Create a blob to hold the input images #blob = im_list_to_blob(processed_ims) blobT, blobRGB = im_list_to_blob(processed_ims) return blobT, blobRGB, im_scales
def _get_image_blob_joint(roidb_s, roidb_ws, net_name, scale_inds, is_training): """Builds an input blob from the images in the roidbs at the specified scales. """ num_images_s = len(roidb_s) num_images_ws = len(roidb_ws) processed_ims = [] im_scales = [] for i in xrange(num_images_s): temp = skimage.io.imread(roidb_s[i]['image']) im = np.dstack((temp, temp, temp)) if roidb_s[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, net_name, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, \ cfg.TRAIN.MAX_SIZE, is_training, is_ws=False) im_scales.append(im_scale) processed_ims.append(im) for i in xrange(num_images_ws): temp = skimage.io.imread(roidb_ws[i]['image']) im = np.dstack((temp, temp, temp)) if roidb_ws[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i + num_images_s]] """im, im_scale = prep_im_for_blob(im, net_name, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, \ cfg.TRAIN.MAX_SIZE, is_training, is_ws=False)""" im, im_scale = prep_im_for_blob(im, net_name, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, \ cfg.TRAIN.MAX_SIZE, is_training, is_ws=True) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob_2in(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims_1 = [] processed_ims_2 = [] im_scales = [] for i in xrange(num_images): im_1 = cv2.imread(roidb[i]['image_1']) im_2 = cv2.imread(roidb[i]['image_2']) if roidb[i]['flipped']: im_1 = im_1[:, ::-1, :] im_2 = im_2[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im_1, im_scale = prep_im_for_blob(im_1, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_2, im_scale = prep_im_for_blob(im_2, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims_1.append(im_1) processed_ims_2.append(im_2) # Create a blob to hold the input images blob_1 = im_list_to_blob(processed_ims_1) blob_2 = im_list_to_blob(processed_ims_2) return blob_1, blob_2, im_scales
def _get_image_blob(roidb, scale_inds, aug, target_size, iter=None): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if iter >= (cfg.TRAIN.STEP_ITERS - cfg.TRAIN.SNAPSHOT_ITERS): im, _, _ = aug(im, None, None) if roidb[i]['flipped']: im_flip = im im = im[:, ::-1, :] else: im_flip = im[:, ::-1, :] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) im_flip, _ = prep_im_for_blob(im_flip, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) processed_ims.append(im) processed_ims.append(im_flip) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_video_blob(roidb, vocab): """Builds an input blob from the videos in the roidb at the specified scales. """ processed_videos = [] item = roidb video_length = cfg.TEST.LENGTH[0] video = np.zeros((video_length, cfg.TEST.CROP_SIZE, cfg.TEST.CROP_SIZE, 3)) j = 0 random_idx = [ int(cfg.TEST.FRAME_SIZE[1] - cfg.TEST.CROP_SIZE) / 2, int(cfg.TEST.FRAME_SIZE[0] - cfg.TEST.CROP_SIZE) / 2 ] if cfg.INPUT == 'video': for video_info in item['frames']: prefix = item['fg_name'] if video_info[0] else item['bg_name'] for idx in xrange(video_info[1], video_info[2], video_info[3]): frame = cv2.imread('%s/image_%s.jpg' % (prefix, str(idx + 1).zfill(5))) frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx) if item['flipped']: frame = frame[:, ::-1, :] video[j] = frame j = j + 1 else: for video_info in item['frames']: prefix = item['fg_name'] if video_info[0] else item['bg_name'] for idx in xrange(video_info[1], video_info[2]): frame = cv2.imread('%s/image_%s.jpg' % (prefix, str(idx + 1).zfill(5))) frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TEST.FRAME_SIZE[::-1]), cfg.TEST.CROP_SIZE, random_idx) if item['flipped']: frame = frame[:, ::-1, :] video[j] = frame j = j + 1 while (j < video_length): video[j] = frame j = j + 1 processed_videos.append(video) # Create a blob to hold the input images blob = video_list_to_blob(processed_videos) return blob
def get_dicom_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): # Segment the breast area #y1,x1,y2,x2 = roidb[i]['bbox'] im = cv2.imread(roidb[i]['image']) #[y1:y2, x1:x2] #(h, w) other_im = cv2.imread(roidb[i]['other_image']) # im = np.tile(image, (3, 1, 1)) # im = np.transpose(im, (1, 2, 0)) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] other_im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] transform_cv = cv_transforms.Compose( [cv_transforms.ColorJitter(brightness=0.5, contrast=0.25)]) im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, transform_cv, [target_size], cfg.TRAIN.MAX_SIZE) other_im, other_im_scale = blob_utils.prep_im_for_blob( other_im, cfg.PIXEL_MEANS, transform_cv, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) processed_ims.append(other_im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_rprocessed_image_blob(roidb, scale_inds, angles): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['rotated']: # get the size of image (h, w) = im.shape[:2] # set the rotation center center = (w / 2, h / 2) # get the rotation matrix no scale changes scale = 1.0 # anti-clockwise angle in the function M = cv2.getRotationMatrix2D(center, angles[i], scale) im = cv2.warpAffine(im,M,(w,h)) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(imdb, roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): proto = imdb.get_proto_at(roidb[i]['image']) mem = BytesIO(proto.data) im = io.imread(mem) im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, cfg.TRAIN.SCALE_MULTIPLE_OF) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def __call__(self, sample): # resizes image and returns scale factors original_im_size=sample['image'].shape im_list,im_scales = prep_im_for_blob(sample['image'], pixel_means=self.mean, target_sizes=self.target_sizes, max_size=self.max_size) sample['image'] = torch.FloatTensor(im_list_to_blob(im_list,self.fpn_on)) # im_list_to blob swaps channels and adds stride in case of fpn sample['scaling_factors'] = im_scales[0] sample['original_im_size'] = torch.FloatTensor(original_im_size) if len(sample['dbentry']['boxes'])!=0 and not self.sample_proposals_for_training: # Fast RCNN test proposals = sample['dbentry']['boxes']*im_scales[0] if self.remove_dup_proposals: proposals,_ = self.remove_dup_prop(proposals) if self.fpn_on==False: sample['rois'] = torch.FloatTensor(proposals) else: multiscale_proposals = add_multilevel_rois_for_test({'rois': proposals},'rois') for k in multiscale_proposals.keys(): sample[k] = torch.FloatTensor(multiscale_proposals[k]) elif self.sample_proposals_for_training: # Fast RCNN training sampled_rois_labels_and_targets = fast_rcnn_sample_rois(roidb=sample['dbentry'], im_scale=im_scales[0], batch_idx=0) # ok as long as we keep batch_size=1 sampled_rois_labels_and_targets = {key: torch.FloatTensor(value) for key,value in sampled_rois_labels_and_targets.items()} # add to sample sample = {**sample, **sampled_rois_labels_and_targets} # remove dbentry from sample del sample['dbentry'] return sample
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im_bgr = cv2.imread(roidb[i]['image']) if cfg.DEBUG: print im_bgr.shape #****************************** # Add deformed mask to input #****************************** deformed_mask = cv2.imread(roidb[i]['deformed_mask'],0) im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4)) im[:,:,0:3] = im_bgr im[:,:,3] = deformed_mask if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): if cfg.TRAIN.IS_COLOR == True: im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] else: im = cv2.imread(roidb[i]['image'], flags= cv2.CV_LOAD_IMAGE_GRAYSCALE) #im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR) if roidb[i]['flipped']: im = im[:, ::-1] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] # always 600 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # prep_im_for_blob(im, [[[102.9801, 115.9465, 122.7717]]], 600, 1000) # im_scale = 600 / float(im_size_min) or 1000 / float(im_size_max) 缩放的比例 # im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # blobs:[2,maxL,maxH,3] return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #im = cv2.imread(roidb[i]['image']) #Multi channels supported im = np.load(roidb[i]['image']) if im.ndim != 3: im = np.expand_dims(im, axis=2) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) ##################################################### name_parts = roidb[i]['image'].split('/') file_name = '/disks/data4/zyli/Faster-RCNN-AlphaPose/heatmap/%s/%s.npy' % ( name_parts[-2], name_parts[-1].replace('.jpg', '')) assert (os.path.exists(file_name)) hm = np.array(np.load(file_name), np.float32) hm = cv2.resize(hm, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_LINEAR) im = np.concatenate([im, hm], axis=2) ##################################################### if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = None if cfg.TRAIN.FORMAT == "pickle": with gzip(roidb[i][image], 'rb') as f: im = cPickle.load(f) else: im = cv2.imread(roidb[i]['image']) # print roidb[i]['image'] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images ) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if cfg.TRAIN.USE_INVERSE and random.choice([True, False]): im = 255 - im target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE ) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): ims = image_utils.read_image_video(roidb[i]) for im_id, im in enumerate(ims): if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) ims[im_id] = im[0] # Just taking the im_scale for the last im in ims is fine (all are same) im_scales.append(im_scale[0]) processed_ims += ims # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): ims = image_utils.read_image_video(roidb[i]) for im_id, im in enumerate(ims): if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) ims[im_id] = im[0] # Just taking the im_scale for the last im in ims is fine (all are same) im_scales.append(im_scale[0]) processed_ims += ims # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """ Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_shapes = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) # Check flipped or not if roidb[i]['flipped']: im = im[:, ::-1, :] # record the shape of origin image: (height, width, channels) im_shapes.append(im.shape) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_shapes
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images ) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE ) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (list of ndarray): a list of color images in BGR order. In case of video it is a list of frames, else is is a list with len = 1. Returns: blob (ndarray): a data blob holding an image pyramid (or video pyramid) im_scale_factors (ndarray): array of image scales (relative to im) used in the image pyramid """ all_processed_ims = [] # contains a a list for each frame, for each scale all_im_scale_factors = [] for frame in im: processed_ims, im_scale_factors = blob_utils.prep_im_for_blob( frame, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE) all_processed_ims.append(processed_ims) all_im_scale_factors.append(im_scale_factors) # All the im_scale_factors will be the same, so just take the first one for el in all_im_scale_factors: assert(all_im_scale_factors[0] == el) im_scale_factors = all_im_scale_factors[0] # Now get all frames with corresponding scale next to each other processed_ims = [] for i in range(len(all_processed_ims[0])): for frames_at_specific_scale in all_processed_ims: processed_ims.append(frames_at_specific_scale[i]) # Now processed_ims contains # [frame1_scale1, frame2_scale1..., frame1_scale2, frame2_scale2...] etc blob = blob_utils.im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, net_name, scale_inds, is_training, is_ws): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #im = cv2.imread(roidb[i]['image']) #temp = np.array(PIL.Image.open(roidb[i]['image'])) temp = skimage.io.imread(roidb[i]['image']) im = np.dstack((temp, temp, temp)) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, net_name, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, \ cfg.TRAIN.MAX_SIZE, is_training, \ is_ws=(True if is_ws else False)) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] # gt boxes: (x1, y1, x2, y2, theta, cls) 5->6,4->5 im = cv2.imread(roidb[0]['image']) if im is None: print "Read image failed:", roidb[0]['image'] if roidb[0]['flipped']: im = im[:, ::-1, :] gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] im, new_gt_boxes = _augment_data(im, roidb[0]['boxes'][gt_inds, 0:8]) gt_boxes = np.empty((len(new_gt_boxes), 9), dtype=np.float32) gt_boxes[:, 0:8] = new_gt_boxes target_size = cfg.TRAIN.SCALES[scale_inds[0]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) gt_boxes[:, 0:8] *= im_scales gt_boxes[:, 8] = roidb[0]['gt_classes'][gt_inds[:len(new_gt_boxes)]] return blob, im_scales, gt_boxes
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) # BGR format if roidb[i]['flipped']: im = im[:, ::-1, :] if cfg.TRAIN.MULTI_SCALE: target_size = roidb[i]['scale'] else: target_size = cfg.TRAIN.SCALES[scale_inds[i]] # mean subtract and scale # if cfg.ENABLE_RON: # im, im_scale = prep_im_for_blob_ron(im, cfg.PIXEL_MEANS, target_size) # else: im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images (change channel (0, 3, 1, 2)) blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) original_ims = [] for i in xrange(num_images): im_ori = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im_ori = im_ori[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im_ori, im_scale = prep_im_original_for_blob(im_ori, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) #im_scales.append(im_scale) original_ims.append(im_ori) #images_copy = processed_ims.copy() blob_original = im_list_to_blob_without_channel_transpose(original_ims) return blob, blob_original,im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) cnt = 1 while im is None: cnt += 1 logger.info( 'NoneType image found. Trying to read for {:d} times'.format( cnt)) im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): if DEBUG: print 'Loading:', roidb[i]['image'] if cfg.TRAIN.FOURCHANNELS: im = cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED) else: im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, four_channels=cfg.TRAIN.FOURCHANNELS) return blob, im_scales
def _prepare_mnc_args(self, image): """Taken from https://github.com/daijifeng001/MNC/blob/master/tools/demo.py. I have no idea what this does. :param image: An image (numpy array) of shape (height, width, 3). :return: Whatever, I have no idea. """ # Prepare image data blob blobs = {'data': None} processed_ims = [] image, im_scale_factors = \ prep_im_for_blob(image, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE) processed_ims.append(image) blobs['data'] = im_list_to_blob(processed_ims) # Prepare image info blob im_scales = [np.array(im_scale_factors)] assert len(im_scales) == 1, 'Only single-image batch implemented' im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # Reshape network inputs and do forward self._net.blobs['data'].reshape(*blobs['data'].shape) self._net.blobs['im_info'].reshape(*blobs['im_info'].shape) forward_kwargs = { 'data': blobs['data'].astype(np.float32, copy=False), 'im_info': blobs['im_info'].astype(np.float32, copy=False) } return forward_kwargs, im_scales
def _get_image_blob(roidb, scale_inds, is_reshape=True): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): imgpath = roidb[i]['image'] im = cv2.imread(imgpath) # imgpath = '/unsullied/sharefs/lizeming/work_isilon/tf_works/tf-faster-rcnn-rfcn-multigpu/data/VOCdevkit2007/VOC2007/JPEGImages/002573.jpg' #im = np.load('/tmp/hehe.npy') if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] if is_reshape: im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) else: im = im.astype(np.float32, copy=False) im -= cfg.PIXEL_MEANS im_scale = 1.0 im_scales.append(im_scale) im = im[:, :, [2, 1, 0]] processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, data_i): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): imname1 = roidb[i]["image"][data_i] imname2 = imname1 + "_norm.png" im1 = cv2.imread(imname1) im2 = cv2.imread(imname2) if roidb[i]["flipped"]: im1 = im1[:, ::-1, :] im2 = im2[:, ::-1, :] im2[:, :, 2] = 255 - im2[:, :, 2] im = np.zeros((im1.shape[0], im1.shape[1], 6)) im = im.astype("uint8") im1 = im1[:, :, ::-1] im2 = im2[:, :, ::-1] im[:, :, 0:3] = im1 im[:, :, 3:6] = im2 target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, 127.5, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) head, fname = os.path.split(roidb[i]['image']) head, dirname = os.path.split(head) symfile = os.path.join(head, dirname, '../../phasesym/phasesym/', dirname, fname) sym = cv2.imread(symfile, 0) if roidb[i]['flipped']: im = im[:, ::-1, :] sx, sy, sz = im.shape temp = np.zeros([sx, sy, 4]) temp[:, :, 0:3] = im #put symmetry in extra chan #temp[:,:,3] = sym; im = temp target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ ## 一次传的图片数,为每一个roidb为一个dict num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: ## 水平反转图片有'flipped'标签,但是'image'标签里存的是正常图片 im = im[:, ::-1, :] ## cfg.TRAIN.SCALES为(0.25, 0.5, 1.0, 2.0, 3.0) ## scale_inds为建立的一个最低为0,最高为5的(最低最高取不到)的(2,)大小的array ## 即target_size为从(0.25, 0.5, 1.0, 2.0, 3.0)随机取出的一个值 target_size = cfg.TRAIN.SCALES[scale_inds[i]] ## cfg.PIXEL_MEANS 为 np.array([[[102.9801, 115.9465, 122.7717]]]) ## cfg.TRAIN.MAX_SIZE为1000 ## 对图像进行缩放,返回缩放后的image以及缩放比例 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) ## 以此存入im_scales和processed_ims列表 ## 其中im信息为ndarray,im_scale为int im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images ## processed_ims为缩放后的image信息 ## 返回blob,该blob存的是减去均值且缩放后的im信息,该blob可能右边与下边值为0 blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): # print roidb[i]['image'] # im_name = roidb[i]['image'] # image_name=im_name.split("/")[-1] # cache_file = 'cache/'+image_name+'.pkl' # if os.path.exists(cache_file): # im_data=open(cache_file,'rb') # im =pickle.load(im_data) # else: im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ if cfg.DEBUG: from ipdb import set_trace set_trace() num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if np.random.rand() < cfg.TRAIN.DEC_PROB: im = im_to_lr(im, 2) decimated = True else: decimated = False target_size = cfg.TRAIN.SCALES[scale_inds[i]] if not decimated and cfg.TRAIN.USE_SR and np.random.rand( ) < cfg.TRAIN.SR_PROB: im, im_scale = prep_im_for_blob_sr(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) else: im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): # roidb[i]['image']这是一个路径,获取路径图片(3维的) im = cv2.imread(roidb[i]['image']) # 如果之前roi有翻转,水平翻转该图片 if roidb[i]['flipped']: im = im[:, ::-1, :] # __C.TRAIN.SCALES = (800,) # 因为config就设置了800,没有多的,所有的target_size均为800 target_size = cfg.TRAIN.SCALES[scale_inds[i]] # 对图像进行缩放,返回缩放后的image以及缩放比例 # prep_im_for_blob再blob.py中 # __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # 保存图片的缩放比例 im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # im_list_to_blob :将缩放后的图片放入blob中.(返回blob) blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, cfg=cfg): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] teacher_ims = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_DIV, cfg.PIXEL_STD, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im_bgr = cv2.imread(roidb[i]['image']) if cfg.DEBUG: print im_bgr.shape #****************************** # Add deformed mask to input #****************************** deformed_mask = cv2.imread(roidb[i]['deformed_mask'], 0) im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4)) im[:, :, 0:3] = im_bgr im[:, :, 3] = deformed_mask if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_rprocessed_image_blob(roidb, scale_inds, angles): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] #print 'num_images', num_images for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['rotated']: # get the size of image (h, w) = im.shape[:2] # set the rotation center center = (w / 2, h / 2) # get the rotation matrix no scale changes scale = 1.0 # anti-clockwise angle in the function M = cv2.getRotationMatrix2D(center, angles[i], scale) im = cv2.warpAffine(im, M, (w, h)) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] mean = np.array([[[0.485, 0.456, 0.406]]]) std = np.array([[[0.229, 0.224, 0.225]]]) for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) / 255.0 if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] #im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, # cfg.TRAIN.MAX_SIZE) im, im_scale = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE, mean=mean, std=std) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): im = cv2.imread(roidb['image']) if roidb['flipped']: im = im[:, ::-1, :] target_size = np.random.choice(cfg.TRAIN.SCALES) im, im_scale = prep_im_for_blob( im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) blob = im_list_to_blob([im]) return blob, im_scale
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ # change to read flow images, assuming the names are without ".jpg" # path/000000 num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): imname = roidb[i]["image"] # print imname imnames = imname.split("/") imname2 = imnames[-1] imid = int(imname2) srcdir = imname[0 : -len(imname2)] im_scale = 1 im = 0 for j in range(10): nowimid = imid + j nowname = "{0:06d}".format(nowimid) nowname = srcdir + nowname xname = nowname + "_x.jpg" yname = nowname + "_y.jpg" imx = cv2.imread(xname, cv2.CV_LOAD_IMAGE_GRAYSCALE) imy = cv2.imread(yname, cv2.CV_LOAD_IMAGE_GRAYSCALE) if roidb[i]["flipped"]: imx = imx[:, ::-1] imx = 255 - imx # target_size = cfg.TRAIN.SCALES[scale_inds[i]] # imx, im_scale = prep_im_for_blob(imx, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # imy, im_scale = prep_im_for_blob(imy, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) if j == 0: im = np.zeros((imx.shape[0], imx.shape[1], 20)) im = im.astype("uint8") im[:, :, j * 2] = imx im[:, :, j * 2 + 1] = imy target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(self,sample): im_blob = [] labels_blob = [] for i in range(self.batch_size): im = cv2.imread(cfg.IMAGEPATH + sample[i]['picname']) if sample[i]['flipped']: im = im[:, ::-1, :] personname = sample[i]['picname'].split('/')[0] labels_blob.append(self._data._sample_label[personname]) im = prep_im_for_blob(im) im_blob.append(im) # Create a blob to hold the input images blob = im_list_to_blob(im_blob) return blob,labels_blob
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (ndarray): array of image scales (relative to im) used in the image pyramid """ processed_ims, im_scale_factors = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE ) blob = blob_utils.im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im, roidb, scale_inds): """ Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def proposal_locate_anchors_single_scale(im, target_size, anchordb): """ generate anchors in single scale """ im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_size = im.shape output_size = [ anchordb['output_height_map'][im_size[0]], anchordb['output_width_map'][im_size[1]] ] shift_x = np.array([ i/cfg.DEDUP_BOXES for i in range(0, output_size[1]) ]) shift_y = np.array([ i/cfg.DEDUP_BOXES for i in range(0, output_size[0]) ]) shift_x, shift_y = np.meshgrid(shift_x, shift_y) # obtain all anchor boxes base_anchors = anchordb['anchors'] shift_x_y = np.array([shift_x.flatten('F'), shift_y.flatten('F'), shift_x.flatten('F'), shift_y.flatten('F')]).T # final_anchors = np.repeat(base_anchors, shift_x_y.shape[0], axis=0) + np.tile(shift_x_y, (base_anchors.shape[0], 1)) final_anchors = np.tile(base_anchors, (shift_x_y.shape[0], 1)) + np.repeat(shift_x_y, base_anchors.shape[0], axis=0) return final_anchors, im_scale
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image'],cv2.IMREAD_COLOR) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(self, roidb, scale_inds, im_names): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(im_names[i]) # here [0][0] is due to the nature of scipy.io.savemat # since it will change True/False to [[1]] or [[0]] with shape (1,1) # so we judge whether flip image in this un-normal way if roidb[i]['Flip'][0][0]: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def prepare_mnc_args(im, net): # Prepare image data blob blobs = {'data': None} processed_ims = [] im, im_scale_factors = \ prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE) processed_ims.append(im) blobs['data'] = im_list_to_blob(processed_ims) # Prepare image info blob im_scales = [np.array(im_scale_factors)] assert len(im_scales) == 1, 'Only single-image batch implemented' im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # Reshape network inputs and do forward net.blobs['data'].reshape(*blobs['data'].shape) net.blobs['im_info'].reshape(*blobs['im_info'].shape) forward_kwargs = { 'data': blobs['data'].astype(np.float32, copy=False), 'im_info': blobs['im_info'].astype(np.float32, copy=False) } return forward_kwargs, im_scales
def _convert_sample(sample, scale_indx): target_size = sample.scales[scale_indx] if cfg.TRAIN.SCALE_JITTERING > 0: r = (2 * cfg.TRAIN.SCALE_JITTERING * random.random() - cfg.TRAIN.SCALE_JITTERING) / 100.0 target_size = int(target_size * (1 + r)) ar_mult = 1.0 if cfg.TRAIN.RATIO_JITTERING > 0: r = (2 * cfg.TRAIN.RATIO_JITTERING * random.random() - cfg.TRAIN.RATIO_JITTERING) / 100.0 ar_mult += r sample_image = sample.bgr_data.copy() if cfg.TRAIN.GAUSS_BLUR_JITTERING > 0: if random.random() < cfg.TRAIN.GAUSS_BLUR_JITTERING: ksize = 2 * np.random.randint(1, 3) + 1 sample_image = cv2.GaussianBlur(sample_image, (ksize, ksize), 0) if cfg.TRAIN.ILLUMINATION_JITTERING > 0: value = cfg.TRAIN.ILLUMINATION_JITTERING # 0.3 sample_image = illumination_jittering(sample_image, value) if cfg.TRAIN.CONTRAST_JITTERING > 0: value = cfg.TRAIN.CONTRAST_JITTERING # 0.3 sample_image = contrast_jittering(sample_image, value) if cfg.TRAIN.GAUSS_NOISE_JITTERING > 0 and random.random() < 0.5: value = cfg.TRAIN.GAUSS_NOISE_JITTERING # 10 sample_image = gauss_noise_jittering(sample_image, value) if cfg.TRAIN.DEBUG_VIZ: global DEBUG_VIZ_INDX debug_viz_dir = Path(get_exp_dir()) / 'debug_viz' if not debug_viz_dir.exists(): debug_viz_dir.mkdir() cv2.imwrite(str(debug_viz_dir / ('%02d.jpg' % DEBUG_VIZ_INDX)), sample_image) DEBUG_VIZ_INDX += 1 if DEBUG_VIZ_INDX == 20: DEBUG_VIZ_INDX = 0 im, im_scale = prep_im_for_blob(sample_image, cfg.PIXEL_MEANS, target_size, sample.max_size, ar_mult) old_ratio = sample.bgr_data.shape[1] / sample.bgr_data.shape[0] new_ratio = im.shape[1] / im.shape[0] ar_mult = new_ratio / old_ratio gt_boxes = [] ignored_boxes = [] for x in sample.marking: if x['class'] < 1: continue box = [x['x'] * ar_mult, x['y'], (x['x'] + x['w'] - 1) * ar_mult, x['y'] + x['h'] - 1, x['class']] if x['ignore']: ignored_boxes.append(box) else: gt_boxes.append(box) if gt_boxes: gt_boxes = np.array(gt_boxes, dtype=np.float32) else: gt_boxes = np.empty(shape=(0,5), dtype=np.float32) if ignored_boxes: ignored_boxes = np.array(ignored_boxes, dtype=np.float32) else: ignored_boxes = np.empty(shape=(0, 5), dtype=np.float32) gt_boxes[:, 0:4] *= im_scale ignored_boxes[:, 0:4] *= im_scale blob = im_list_to_blob([im]) return blob, gt_boxes, ignored_boxes, im_scale