def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_inds[i]] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images ) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE ) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def __call__(self, sample): # resizes image and returns scale factors original_im_size=sample['image'].shape im_list,im_scales = prep_im_for_blob(sample['image'], pixel_means=self.mean, target_sizes=self.target_sizes, max_size=self.max_size) sample['image'] = torch.FloatTensor(im_list_to_blob(im_list,self.fpn_on)) # im_list_to blob swaps channels and adds stride in case of fpn sample['scaling_factors'] = im_scales[0] sample['original_im_size'] = torch.FloatTensor(original_im_size) if len(sample['dbentry']['boxes'])!=0 and not self.sample_proposals_for_training: # Fast RCNN test proposals = sample['dbentry']['boxes']*im_scales[0] if self.remove_dup_proposals: proposals,_ = self.remove_dup_prop(proposals) if self.fpn_on==False: sample['rois'] = torch.FloatTensor(proposals) else: multiscale_proposals = add_multilevel_rois_for_test({'rois': proposals},'rois') for k in multiscale_proposals.keys(): sample[k] = torch.FloatTensor(multiscale_proposals[k]) elif self.sample_proposals_for_training: # Fast RCNN training sampled_rois_labels_and_targets = fast_rcnn_sample_rois(roidb=sample['dbentry'], im_scale=im_scales[0], batch_idx=0) # ok as long as we keep batch_size=1 sampled_rois_labels_and_targets = {key: torch.FloatTensor(value) for key,value in sampled_rois_labels_and_targets.items()} # add to sample sample = {**sample, **sampled_rois_labels_and_targets} # remove dbentry from sample del sample['dbentry'] return sample
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS processed_ims = [] im_scale_factors = [] scales = cfg.TEST.SCALES_BASE for im_scale in scales: im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS processed_ims = [] assert len(cfg.TEST.SCALES_BASE) == 1 im_scale = cfg.TRAIN.SCALES_BASE[0] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_info
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im_bgr = cv2.imread(roidb[i]['image']) if cfg.DEBUG: print im_bgr.shape #****************************** # Add deformed mask to input #****************************** deformed_mask = cv2.imread(roidb[i]['deformed_mask'],0) im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4)) im[:,:,0:3] = im_bgr im[:,:,3] = deformed_mask if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): ims = image_utils.read_image_video(roidb[i]) for im_id, im in enumerate(ims): if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) ims[im_id] = im[0] # Just taking the im_scale for the last im in ims is fine (all are same) im_scales.append(im_scale[0]) processed_ims += ims # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #im = cv2.imread(roidb[i]['image']) #Multi channels supported im = np.load(roidb[i]['image']) if im.ndim != 3: im = np.expand_dims(im, axis=2) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): if cfg.TRAIN.IS_COLOR == True: im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] else: im = cv2.imread(roidb[i]['image'], flags= cv2.CV_LOAD_IMAGE_GRAYSCALE) #im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR) if roidb[i]['flipped']: im = im[:, ::-1] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (list of ndarray): a list of color images in BGR order. In case of video it is a list of frames, else is is a list with len = 1. Returns: blob (ndarray): a data blob holding an image pyramid (or video pyramid) im_scale_factors (ndarray): array of image scales (relative to im) used in the image pyramid """ all_processed_ims = [] # contains a a list for each frame, for each scale all_im_scale_factors = [] for frame in im: processed_ims, im_scale_factors = blob_utils.prep_im_for_blob( frame, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE) all_processed_ims.append(processed_ims) all_im_scale_factors.append(im_scale_factors) # All the im_scale_factors will be the same, so just take the first one for el in all_im_scale_factors: assert(all_im_scale_factors[0] == el) im_scale_factors = all_im_scale_factors[0] # Now get all frames with corresponding scale next to each other processed_ims = [] for i in range(len(all_processed_ims[0])): for frames_at_specific_scale in all_processed_ims: processed_ims.append(frames_at_specific_scale[i]) # Now processed_ims contains # [frame1_scale1, frame2_scale1..., frame1_scale2, frame2_scale2...] etc blob = blob_utils.im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_rprocessed_image_blob(roidb, scale_inds, angles): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['rotated']: # get the size of image (h, w) = im.shape[:2] # set the rotation center center = (w / 2, h / 2) # get the rotation matrix no scale changes scale = 1.0 # anti-clockwise angle in the function M = cv2.getRotationMatrix2D(center, angles[i], scale) im = cv2.warpAffine(im,M,(w,h)) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """ Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_shapes = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) # Check flipped or not if roidb[i]['flipped']: im = im[:, ::-1, :] # record the shape of origin image: (height, width, channels) im_shapes.append(im.shape) target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_shapes
def _get_image_blob(roidb, scale_inds, data_i): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): imname1 = roidb[i]["image"][data_i] imname2 = imname1 + "_norm.png" im1 = cv2.imread(imname1) im2 = cv2.imread(imname2) if roidb[i]["flipped"]: im1 = im1[:, ::-1, :] im2 = im2[:, ::-1, :] im2[:, :, 2] = 255 - im2[:, :, 2] im = np.zeros((im1.shape[0], im1.shape[1], 6)) im = im.astype("uint8") im1 = im1[:, :, ::-1] im2 = im2[:, :, ::-1] im[:, :, 0:3] = im1 im[:, :, 3:6] = im2 target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, 127.5, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(imdb, roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): proto = imdb.get_proto_at(roidb[i]['image']) mem = BytesIO(proto.data) im = io.imread(mem) im = im[:,:,::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, cfg.TRAIN.SCALE_MULTIPLE_OF) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): im = cv2.imread(roidb['image']) if roidb['flipped']: im = im[:, ::-1, :] target_size = np.random.choice(cfg.TRAIN.SCALES) im, im_scale = prep_im_for_blob( im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) blob = im_list_to_blob([im]) return blob, im_scale
def _get_image_blob(im,im_scales): """ :param im: input image :param im_scales: a list of scale coefficients :return: A list of network blobs each containing a resized ver. of the image """ # Subtract the mean im_copy = im.astype(np.float32, copy=True) - cfg.PIXEL_MEANS # Append all scales to form a blob blobs = [] for scale in im_scales: if scale==1.0: blobs.append({'data':im_list_to_blob([im_copy])}) else: blobs.append({'data':im_list_to_blob([cv2.resize(im_copy, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)])}) return blobs
def _get_image_blob(roidb, scale_ind): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] im_scales = [] for i in xrange(num_images): # rgba rgba = cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED) im = rgba[:,:,:3] alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 255 if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED).astype(np.float32) im_depth = im_depth / im_depth.max() * 255 im_depth = np.tile(im_depth[:,:,np.newaxis], (1,1,3)) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_depth, im_scales
def _get_image_blob(im, im_depth): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ # RGB im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS processed_ims = [] im_scale_factors = [] assert len(cfg.TEST.SCALES_BASE) == 1 im_scale = cfg.TEST.SCALES_BASE[0] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # im_info im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] # depth im_orig = im_depth.astype(np.float32, copy=True) im_orig = im_orig / im_orig.max() * 255 im_orig = np.tile(im_orig[:,:,np.newaxis], (1,1,3)) im_orig -= cfg.PIXEL_MEANS processed_ims_depth = [] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_depth, im_info, np.array(im_scale_factors)
def _get_image_blob(self, im): im = im.astype(np.float32, copy=True) im -= cfg.PIXEL_MEANS processed_ims = [] processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob
def _get_roi_blob(roidb, pid): im = cv2.imread(roidb['image']) if roidb['flipped']: im = im[:, ::-1, :] im = im.astype(np.float32, copy=False) k = np.where(roidb['gt_pids'] == pid)[0][0] x1, y1, x2, y2 = roidb['boxes'][k] im = im[y1:y2+1, x1:x2+1, :] im -= cfg.PIXEL_MEANS im = cv2.resize(im, (64, 160), interpolation=cv2.INTER_LINEAR) blob = im_list_to_blob([im]) return blob
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ # change to read flow images, assuming the names are without ".jpg" # path/000000 num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): imname = roidb[i]["image"] # print imname imnames = imname.split("/") imname2 = imnames[-1] imid = int(imname2) srcdir = imname[0 : -len(imname2)] im_scale = 1 im = 0 for j in range(10): nowimid = imid + j nowname = "{0:06d}".format(nowimid) nowname = srcdir + nowname xname = nowname + "_x.jpg" yname = nowname + "_y.jpg" imx = cv2.imread(xname, cv2.CV_LOAD_IMAGE_GRAYSCALE) imy = cv2.imread(yname, cv2.CV_LOAD_IMAGE_GRAYSCALE) if roidb[i]["flipped"]: imx = imx[:, ::-1] imx = 255 - imx # target_size = cfg.TRAIN.SCALES[scale_inds[i]] # imx, im_scale = prep_im_for_blob(imx, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # imy, im_scale = prep_im_for_blob(imy, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) if j == 0: im = np.zeros((imx.shape[0], imx.shape[1], 20)) im = im.astype("uint8") im[:, :, j * 2] = imx im[:, :, j * 2 + 1] = imy target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(self,sample): im_blob = [] labels_blob = [] for i in range(self.batch_size): im = cv2.imread(cfg.IMAGEPATH + sample[i]['picname']) if sample[i]['flipped']: im = im[:, ::-1, :] personname = sample[i]['picname'].split('/')[0] labels_blob.append(self._data._sample_label[personname]) im = prep_im_for_blob(im) im_blob.append(im) # Create a blob to hold the input images blob = im_list_to_blob(im_blob) return blob,labels_blob
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (ndarray): array of image scales (relative to im) used in the image pyramid """ processed_ims, im_scale_factors = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE ) blob = blob_utils.im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_label_blob(roidb, im_scales, num_classes): """ build the label blob """ num_images = len(roidb) processed_ims_cls = [] for i in xrange(num_images): # read label image im = cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED) if roidb[i]['flipped']: im = im[:, ::-1, :] im_cls = _process_label_image(im, roidb[i]['class_colors']) # rescale image im_scale = im_scales[i] im = cv2.resize(im_cls, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST) processed_ims_cls.append(im) # Create a blob to hold the input images blob_cls = im_list_to_blob(processed_ims_cls, 1) #""" # blob image size image_height = blob_cls.shape[2] image_width = blob_cls.shape[3] # height and width of the heatmap height = np.floor(image_height / 2.0 + 0.5) height = np.floor(height / 2.0 + 0.5) height = np.floor(height / 2.0 + 0.5) height = np.floor(height / 2.0 + 0.5) height = int(height * 8) width = np.floor(image_width / 2.0 + 0.5) width = np.floor(width / 2.0 + 0.5) width = np.floor(width / 2.0 + 0.5) width = np.floor(width / 2.0 + 0.5) width = int(width * 8) # rescale the blob blob_cls_rescale = np.zeros((num_images, 1, height, width), dtype=np.float32) for i in xrange(num_images): blob_cls_rescale[i,0,:,:] = cv2.resize(blob_cls[i,0,:,:], dsize=(width, height), interpolation=cv2.INTER_NEAREST) return blob_cls_rescale
def _get_image_blob(im, roidb, scale_inds): """ Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) stride = cfg.TRAIN.IMAGE_STRIDE processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) if stride == 0: im_scale_factors.append(im_scale) processed_ims.append(im) else: # pad to product of stride im_height = int(np.ceil(im.shape[0] / float(stride)) * stride) im_width = int(np.ceil(im.shape[1] / float(stride)) * stride) im_channel = im.shape[2] padded_im = np.zeros((im_height, im_width, im_channel)) padded_im[:im.shape[0], :im.shape[1], :] = im im_scale_factors.append(im_scale) processed_ims.append(padded_im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(image): """Converts an image into a network input. Arguments: image: a PIL Image. Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ width, height = image.size im_size_min = min(width, height) im_size_max = max(width, height) processed_ims = [] im_scale_factors = [] # scale the image to net input for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) image = image.resize((int(round(width * im_scale)), int(round(height * im_scale))), PIL.Image.BILINEAR) im_scale_factors.append(im_scale) # from PIL.Image to Numpy image = np.asarray(image, dtype=np.float32) # RGB to BGR image = np.flip(image, axis=2) image -= cfg.PIXEL_MEANS processed_ims.append(image) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image'],cv2.IMREAD_COLOR) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_info = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) im_info.append([im.shape[0], im.shape[1]]) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_info
def _get_image_blob(ims, target_size): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_infos(ndarray): a data blob holding input size pyramid """ processed_ims = [] for im in ims: im = im.astype(np.float32, copy=False) im = im - cfg.PIXEL_MEANS im_shape = im.shape[0:2] im = cv2.resize(im, None, None, fx=float(target_size) / im_shape[1], \ fy=float(target_size) / im_shape[0], interpolation=cv2.INTER_LINEAR) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] #按照cfg.TRAIN.SCALES中的尺寸,读入图片金字塔,不过配置文件只给了一种尺寸 target_size = cfg.TRAIN.SCALES[scale_inds[i]] #去均值 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) #返回图片的二维数组, 所用尺寸的列表 return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_shapes = np.zeros((0, 2), dtype=np.float32) for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale, im_shape = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size) im_scales.append(im_scale) processed_ims.append(im) im_shapes = np.vstack((im_shapes, im_shape)) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_shapes
def get_image_blob(self, im): im_orig = im.astype(np.float32, copy=True) / 255.0 im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] mean = np.array([[[0.485, 0.456, 0.406]]]) std = np.array([[[0.229, 0.224, 0.225]]]) for target_size in self.SCALES: im, im_scale = prep_im_for_blob(im_orig, target_size, self.MAX_SIZE, mean=mean, std=std) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def prepare_mnc_args(im, net): # Prepare image data blob blobs = {'data': None} processed_ims = [] im, im_scale_factors = \ prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE) processed_ims.append(im) blobs['data'] = im_list_to_blob(processed_ims) # Prepare image info blob im_scales = [np.array(im_scale_factors)] assert len(im_scales) == 1, 'Only single-image batch implemented' im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # Reshape network inputs and do forward net.blobs['data'].reshape(*blobs['data'].shape) net.blobs['im_info'].reshape(*blobs['im_info'].shape) forward_kwargs = { 'data': blobs['data'].astype(np.float32, copy=False), 'im_info': blobs['im_info'].astype(np.float32, copy=False) } return forward_kwargs, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) #每次输入图像数 processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) #读取图像 if 'hor_flipped' in roidb[i] and roidb[i]['hor_flipped']: #被翻转 im = im[:, ::-1, :] #图像翻转 if 'ver_flipped' in roidb[i] and roidb[i]['ver_flipped']: im = im[::-1, :, :] if 'bright_scala' in roidb[i] and roidb[i]['bright_scala']!=1: im=data_augment._bright_adjuest(im, roidb[i]['bright_scala']) if 'rotate_angle' in roidb[i] and roidb[i]['rotate_angle']!=0: im=data_augment._rotate_image(im, roidb[i]['rotate_angle']) if 'shift_x' in roidb[i] and 'shift_y' in roidb[i]: offset = (int(roidb[i]['shift_x']), int(roidb[i]['shift_y'])) im = data_augment._shift_image(im, offset) if 'zoom_x' in roidb[i] and 'zoom_y' in roidb[i]: factor_x,factor_y=roidb[i]['zoom_x'],roidb[i]['zoom_y'] im = data_augment._zoom_image(im, factor_x, factor_y) if 'position' in roidb[i] and 'crop_size_width' in roidb[i] and 'crop_size_height' in roidb[i]: crop_size =(roidb[i]['crop_size_width'],roidb[i]['crop_size_height']) scale = cfg.TRAIN.RESIZE_SCALE position=roidb[i]['position'] im=data_augment.random_crop_image(im, crop_size, scale, position) target_size = cfg.TRAIN.SCALES[scale_inds[i]] #设置size im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) #得到缩放后的图像和缩放系数 im_scales.append(im_scale) #存放起缩放系数 processed_ims.append(im) #存放缩放后的图像 # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) #利用blob最大的框架来装在缩放后的图像 return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['blurred']: im = cv2.GaussianBlur(im, (7, 7), 20) if 'scale' in roidb[i].keys(): if roidb[i]['scale'] != 1: resized_im = cv2.resize(im, ( np.floor(im.shape[1] * roidb[i]['scale']).astype(np.int32), np.floor(im.shape[0] * roidb[i]['scale']).astype(np.int32))) im = resized_im if 'crop_box' in roidb[i].keys(): im = im[roidb[i]['crop_box'][1]:roidb[i]['crop_box'][3], roidb[i]['crop_box'][0]:roidb[i]['crop_box'][2]] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #im = cv2.imread(roidb[i]['image']) im = imread(roidb[i]['image']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] # target_size = cfg.TRAIN.SCALES[scale_inds[i]] height, width, channel = im.shape max_side = max(height, width) small_side = min(height, width) target_size = 600 / max_side * small_side im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #----其实只循环一次---- im = cv2.imread(roidb[i]['image']) #----读入图片---- if roidb[i]['flipped']: im = im[:, ::-1, :] #????????????图片翻转 target_size = cfg.TRAIN.SCALES[scale_inds[i]] #----- 其实就是600 ----- # prep_im_for_blob 函数的功能是获取经过resize的图像以及缩放的比例 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images #调用 im_list_to_blob 来将经过预处理的 processed_ims 转换成 caffe 支持的数据结构,即 N * C * H * W的四维结构 blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob_multiscale(roidb): """Builds an input blob from the images in the roidb at multiscales. """ num_images = len(roidb) processed_ims = [] im_scales = [] scales = cfg.TRAIN.SCALES_BASE for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS for im_scale in scales: im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = 1 #num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] #print ("load file:",roidb[i]['image']) #print ("load im:",im) target_size = config.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, config.PIXEL_MEANS, target_size, config.TRAIN_MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) #print ("load image blob:",blob) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] #cai jian tu pian #if roidb[i]['cropped']: ''' for j in xrange(4): if j==0: im=im[0:512,0:1024] if j==1: im = im[0:512, 1024:2048] if j==1: im = im[512:1024, 0:1024] if j==1: im = im[512:1024, 1024:2048] ''' target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, cfg.TRAIN.IMAGE_STRIDE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, data_i): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): imname1 = roidb[i]['image'][data_i] imname2= imname1 + '_norm.png' im1= cv2.imread(imname1) im2= cv2.imread(imname2) if roidb[i]['flipped']: im1 = im1[:, ::-1, :] im2 = im2[:, ::-1, :] im2[:,:,2] = 255 - im2[:,:,2] im = np.zeros((im1.shape[0], im1.shape[1], 6)) im = im.astype('uint8') im1 = im1[:, :, ::-1] im2 = im2[:, :, ::-1] im[:,:,0:3] = im1 im[:,:,3:6] = im2 target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, 127.5, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, args): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] crop_box = [] for i in range(num_images): im = Image.open(roidb[i]['image']) im = np.array(im, dtype=np.float32) if "VOC2012" in roidb[i]['image']: # 1 2 0 im = im[:, :, (0, 1, 2)] # substract mean if args.substract_mean == "True": mean = (122.67891434, 116.66876762, 104.00698793) im -= mean #im = im.transpose((2, 0, 1)) if roidb[i]['flipped']: im = im[:, ::-1, :] global_scale = args.scale_list[scale_inds[i]] im, im_scale, im_crop_box = prep_im_for_blob(im, global_scale, args) crop_box.append(im_crop_box) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, crop_box
def _get_image_blob(roidb, scale_inds): # 将roidb中的图片按照特定的scale转换成blob格式 """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: # 水平翻转,将width倒序排列即可 im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] #SCALES 600 # 对图片去均值并进行放缩,返回放缩后的图片以及放缩倍数 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # MAX_SIZE 1000 im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): # print roidb[i]['image'],roidb[i]['flipped'],num_images # print roidb[i]['image'] im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # original_ims = [] # for i in xrange(num_images): # im_ori = cv2.imread(roidb[i]['image']) # if roidb[i]['flipped']: # im_ori = im_ori[:, ::-1, :] # target_size = cfg.TRAIN.SCALES[scale_inds[i]] # im_ori, im_scale = prep_im_original_for_blob(im_ori, cfg.PIXEL_MEANS, target_size, # cfg.TRAIN.MAX_SIZE) # #im_scales.append(im_scale) # original_ims.append(im_ori) # #images_copy = processed_ims.copy() # blob_original = im_list_to_blob_without_channel_transpose(original_ims) # return blob, blob_original,im_scales return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) # im.shape (rows,columns,channel), with respect to height,width # print ('im_shape:',im.shape),roidb[i]['flipped'] if DEBUG else '' if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # print ('blob_shape:',np.shape(blob)) if DEBUG else '' return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] #prep_im_for_blob,this is preprocess data for train,the thing you must notice is the im is just one pic im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # yeah,you konw,the blob is just one,im_list_to_blob,because the precessed_ims is just one blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid im_shapes: the list of image shapes """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] im = cv2.resize(im_orig, (417, 417), interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_orig.shape[:2]) processed_ims.append(im_list_to_blob([im])) blob = processed_ims return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ # cfg.TRAIN.SCALES=(600,) # cfg.PIXEL_MEANS=np.array([[[102.9801, 115.9465, 122.7717]]]) # cfg.TRAIN.MAX_SIZE=1000 num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #print roidb[i]['image'],'\n' im = sio.imread(roidb[i]['image']) if len(im.shape) == 2 or im.shape[2] == 1: im = skimage.color.gray2rgb(im) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_VARS, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds, images_subdir): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): #im = cv2.imread(roidb[i]['image']) default_subdir = roidb[i]['image'].split("/")[-2:-1][0] im = cv2.imread(roidb[i]['image'].replace(default_subdir, images_subdir)) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_ind, num_classes, backgrounds, intrinsic_matrix, db_inds_syn, is_syn): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_ims_depth = [] processed_ims_normal = [] im_scales = [] roidb_syn = [] for i in xrange(num_images): if is_syn: # depth raw filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format( db_inds_syn[i]) im_depth_raw = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # rgba filename = cfg.TRAIN.SYNROOT + '{:06d}-color.png'.format( db_inds_syn[i]) rgba = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16) # sample a background image ind = np.random.randint(len(backgrounds), size=1)[0] filename = backgrounds[ind] background = cv2.imread(filename, cv2.IMREAD_UNCHANGED) try: background = cv2.resize(background, (rgba.shape[1], rgba.shape[0]), interpolation=cv2.INTER_LINEAR) except: if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': background = np.zeros((rgba.shape[0], rgba.shape[1]), dtype=np.uint16) else: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' if cfg.INPUT != 'DEPTH' and cfg.INPUT != 'NORMAL' and len( background.shape) != 3: background = np.zeros((rgba.shape[0], rgba.shape[1], 3), dtype=np.uint8) print 'bad background image' # add background im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL': im_depth_raw[I[0], I[1]] = background[I[0], I[1]] / 10 else: im[I[0], I[1], :] = background[I[0], I[1], :3] else: # depth raw im_depth_raw = pad_im( cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16) # rgba rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # chromatic transform if cfg.TRAIN.CHROMATIC: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE: im = add_noise(im) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_scale = cfg.TRAIN.SCALES_BASE[scale_ind] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scales.append(im_scale) processed_ims.append(im) # depth im_depth = im_depth_raw.astype(np.float32, copy=True) / float( im_depth_raw.max()) * 255 im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3)) if cfg.TRAIN.ADD_NOISE: im_depth = add_noise(im_depth) if roidb[i]['flipped']: im_depth = im_depth[:, ::-1] im_orig = im_depth.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im_depth) # normals if cfg.INPUT == 'NORMAL': depth = im_depth_raw.astype(np.float32, copy=True) / 1000.0 fx = intrinsic_matrix[0, 0] * im_scale fy = intrinsic_matrix[1, 1] * im_scale cx = intrinsic_matrix[0, 2] * im_scale cy = intrinsic_matrix[1, 2] * im_scale nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) if roidb[i]['flipped']: im_normal = im_normal[:, ::-1, :] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_depth, blob_normal, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] im_crops = [] im_shapes = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] im_shapes.append(im.shape) if cfg.TRAIN.USE_DISTORTION: hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV) s0 = npr.random() * (cfg.TRAIN.SATURATION - 1) + 1 s1 = npr.random() * (cfg.TRAIN.EXPOSURE - 1) + 1 s0 = s0 if npr.random() > 0.5 else 1.0 / s0 s1 = s1 if npr.random() > 0.5 else 1.0 / s1 hsv = np.array(hsv, dtype=np.float) hsv[:, :, 1] = np.minimum(s0 * hsv[:, :, 1], 255) hsv[:, :, 2] = np.minimum(s1 * hsv[:, :, 2], 255) hsv = np.array(hsv, dtype=np.uint8) im = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) if cfg.TRAIN.USE_CROP: im_shape = np.array(im.shape) crop_dims = im_shape[:2] * cfg.TRAIN.CROP r0 = npr.random() r1 = npr.random() s = im_shape[:2] - crop_dims s[0] *= r0 s[1] *= r1 im_crop = np.array( [s[0], s[1], s[0] + crop_dims[0] - 1, s[1] + crop_dims[1] - 1], dtype=np.uint16) im = im[im_crop[0]:im_crop[2] + 1, im_crop[1]:im_crop[3] + 1, :] else: im_crop = np.array([0, 0, im.shape[0] - 1, im.shape[1] - 1], dtype=np.uint16) if cfg.CSC_DEBUG: im_save = im target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) if cfg.CSC_DEBUG: im_save = cv2.resize(im_save, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) cv2.imwrite('tmp/' + str(cfg.TRAIN.PASS_IM) + '_.png', im_save) cfg.TRAIN.PASS_IM = cfg.TRAIN.PASS_IM + 1 im_scales.append(im_scale) im_crops.append(im_crop) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, im_crops, im_shapes
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] processed_mask = [] processed_noise = [] im_scales = [] mask_shapes = [] if cfg.USE_MASK is True: for i in range(num_images): im = cv2.imread(roidb[i]['image']) mask = cv2.imread(roidb[i]['mask']) mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) ret, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY) mask_shape = im.shape[0:2] mask = np.expand_dims(mask, 2) if roidb[i]['flipped']: im = im[:, ::-1, :] mask = mask[:, ::-1, :] if roidb[i]['noised']: row, col, ch = im.shape for bb in roidb[i]['boxes']: bcol = bb[2] - bb[0] brow = bb[3] - bb[1] mean = 0 var = 5 sigma = var**0.5 gauss = np.random.normal(mean, sigma, (brow, bcol, ch)) gauss = gauss.reshape(brow, bcol, ch) im = im.astype(np.float32, copy=False) im[bb[1]:bb[3], bb[0]:bb[2], :] = im[bb[1]:bb[3], bb[0]:bb[2], :] + gauss if roidb[i]['JPGed']: for bb in roidb[i]['boxes']: cv2.imwrite('JPGed.jpg', im[bb[1]:bb[3], bb[0]:bb[2], :], [cv2.IMWRITE_JPEG_QUALITY, 70]) bb_jpged = cv2.imread('JPGed.jpg') im[bb[1]:bb[3], bb[0]:bb[2], :] = bb_jpged target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale, mask = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE, mask) mask = np.expand_dims(mask, 2) im_scales.append(im_scale) mask_shapes.append(mask_shape) processed_ims.append(im) processed_mask.append(mask) noise, im_scale = prep_noise_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) processed_noise.append(noise) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) noise_blob = im_list_to_blob(processed_noise) mask_blob = mask_list_to_blob(processed_mask) return blob, noise_blob, im_scales, mask_blob, mask_shapes else: for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] if roidb[i]['noised']: row, col, ch = im.shape for bb in roidb[i]['boxes']: bcol = bb[2] - bb[0] brow = bb[3] - bb[1] mean = 0 var = 5 sigma = var**0.5 gauss = np.random.normal(mean, sigma, (brow, bcol, ch)) gauss = gauss.reshape(brow, bcol, ch) im = im.astype(np.float32, copy=False) im[bb[1]:bb[3], bb[0]:bb[2], :] = im[bb[1]:bb[3], bb[0]:bb[2], :] + gauss if roidb[i]['JPGed']: for bb in roidb[i]['boxes']: cv2.imwrite('JPGed.jpg', im[bb[1]:bb[3], bb[0]:bb[2], :], [cv2.IMWRITE_JPEG_QUALITY, 70]) bb_jpged = cv2.imread('JPGed.jpg') im[bb[1]:bb[3], bb[0]:bb[2], :] = bb_jpged target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale, _ = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) # print(mask.shape) im_scales.append(im_scale) processed_ims.append(im) noise, im_scale = prep_noise_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) processed_noise.append(noise) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) noise_blob = im_list_to_blob(processed_noise) return blob, noise_blob, im_scales
def get_image_blob(self, im, im_depth, meta_data): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ # RGB im_orig = im.astype(np.float32, copy=True) # mask the color image according to depth if self.cfg.EXP_DIR == 'rgbd_scene': I = np.where(im_depth == 0) im_orig[I[0], I[1], :] = 0 processed_ims_rescale = [] im_scale = self.cfg.TEST.SCALES_BASE[0] im_rescale = cv2.resize(im_orig / 127.5 - 1, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_rescale.append(im_rescale) im_orig -= self.cfg.PIXEL_MEANS processed_ims = [] im_scale_factors = [] assert len(self.cfg.TEST.SCALES_BASE) == 1 im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # depth im_orig = im_depth.astype(np.float32, copy=True) # im_orig = im_orig / im_orig.max() * 255 im_orig = np.clip(im_orig / 2000.0, 0, 1) * 255 im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3)) im_orig -= self.cfg.PIXEL_MEANS processed_ims_depth = [] im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_depth.append(im) if cfg.INPUT == 'NORMAL': # meta data K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] # normals depth = im_depth.astype(np.float32, copy=True) / float( meta_data['factor_depth']) nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID) im_normal = 127.5 * nmap + 127.5 im_normal = im_normal.astype(np.uint8) im_normal = im_normal[:, :, (2, 1, 0)] im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75) processed_ims_normal = [] im_orig = im_normal.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims_normal.append(im_normal) blob_normal = im_list_to_blob(processed_ims_normal, 3) else: blob_normal = [] # Create a blob to hold the input images blob = im_list_to_blob(processed_ims, 3) blob_rescale = im_list_to_blob(processed_ims_rescale, 3) blob_depth = im_list_to_blob(processed_ims_depth, 3) return blob, blob_rescale, blob_depth, blob_normal, np.array( im_scale_factors)
def _get_image_blob(self, im, im_dim): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im = im.astype(np.float32, copy=True) print('Image im.shape = {}'.format(im.shape)) im = im - cfg.PIXEL_MEANS im /= cfg.PIXEL_STDS print('\nAfter substract mean') #print(im[0:10,0:10,0]) #print(im[0:10,0:10,1]) #print(im[0:10,0:10,2]) im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) print("im_size_min: %d, im_size_max: %d\n" %(im_size_min,im_size_max)) processed_ims = [] im_scale_factors = [] print('cfg.TEST.SCALES = {},cfg.TEST.MAX_SIZE = {}, im_dim[1] = {}'.format(cfg.TEST.SCALES,cfg.TEST.MAX_SIZE,im_dim[1])) TEST_MAX_SIZE = im_dim[1] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > TEST_MAX_SIZE: im_scale = float(TEST_MAX_SIZE) / float(im_size_max) multiple = cfg.TEST.SCALE_MULTIPLE_OF if multiple > 1: im_scale_x = np.floor(im.shape[1] * im_scale / multiple) * multiple / im.shape[1] im_scale_y = np.floor(im.shape[0] * im_scale / multiple) * multiple / im.shape[0] print('im_scale = {}, multiple = {}'.format(im_scale,multiple)) print('im.shape[0] = {}, im.shape[1] = {}'.format(im.shape[0],im.shape[1])) print('im_scale_x = {}, im_scale_y = {}\n'.format(im_scale_x,im_scale_y)) im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR) else: im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) print('im_scale = {}\n'.format(im_scale)) im_scale_factors.append(im_scale) processed_ims.append(im) #print('\nAfter resize') #print(im[0:10,0:10,0]) #print(im[0:10,0:10,1]) #print(im[0:10,0:10,2]) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) blob = blob.astype(np.int32, copy=True) blob = blob.astype(np.float32, copy=True) self.build_json("blob_pre_clip",blob) blob = np.clip(blob,-128,127) self.build_json("blob_post_clip",blob) #print(blob[0,0:10,0:10,0]) #print(blob[0,0:10,0:10,1]) #print(blob[0,0:10,0:10,2]) #np.savetxt('a0_processed_ims0.csv',blob[0,:,:,0],fmt='%d,') #np.savetxt('a0_processed_ims1.csv',blob[0,:,:,1],fmt='%d,') #np.savetxt('a0_processed_ims2.csv',blob[0,:,:,2],fmt='%d,') print('pvanet_8bit_ob_roip8 blob.shape = {}'.format(blob.shape)) print('pvanet_8bit_ob_roip8 im.shape = {}'.format(im.shape)) return blob, np.array(im_scale_factors)