def get_minibatch(self): imgt = ImageCorrespondenceTransformer(self.actual_phase) ims1 = [] ims2 = [] img1_azimuths = [] img2_azimuths = [] coords1 = [] coords2 = [] classes = [] similarities = [] pair_gen = self.corrdb.get_set(self.actual_phase.lower(), repeat=self.repeat) for (imf1, img1_azimuth, (kp1, mask1)), (imf2, img2_azimuth, (kp2, mask2)) in pair_gen: im1 = scipy.misc.imread(imf1).astype(float) im2 = scipy.misc.imread(imf2).astype(float) if len(mask1) == 0 or len(mask2) == 0: # Some may lack keypoint labels. continue mask = (mask1 & mask2).astype(bool) if not any(mask): # Must have at least one overlapping keypoint. continue kp1, kp2 = kp1[:, mask].T, kp2[:, mask].T im1, im2, kp1, kp2, sim = imgt.transform(im1, im2, kp1, kp2, None, True) ims1.append(im1) ims2.append(im2) img1_azimuths.append(int((img1_azimuth%360)/(360.0/16))) img2_azimuths.append(int((img2_azimuth%360)/(360.0/16))) coords1.append(kp1) coords2.append(kp2) similarities.append(sim) if len(ims1) >= self.batch_size: break coord, num_coord = coord_list_to_blob(coords1, coords2, similarities) blobs = {'image_1': im_list_to_blob(ims1), 'image_2': im_list_to_blob(ims2), 'correspondence': coord, 'num_coord': num_coord, 'img_size': np.array([img.shape[:2] for img in ims2], dtype='float32', order='C'), 'image_1_azimuth': np.array(img1_azimuths), 'image_2_azimuth': np.array(img2_azimuths) } return blobs
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) #如果roidb是水平翻转过的,则读取的image也相应的水平翻转 if roidb[i]['flipped']: im = im[:, ::-1, :] #[y,x,深度] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) #转换list of images为numpy输入形式,四个维度 # blob是一个四维数组,第一维表示每一个minibatch中的第几张图片 # im_scales是一个列表,列表元素为minibatch中每一张图片的缩放比例 return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: print('???????????????????????????????????????????') #im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] # im缩放后的图像,缩放数值 # PIXEL_MEANS means : [[[102.9801, 115.9465, 122.7717]]] # MAX_SIZE : 1000 im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] flag = 0 for i in range(num_images): print('--------------------' + roidb[i]['image']) im = cv2.imread(roidb[i]['image']) if im is None: flag = 1 break if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) if flag == 1: print('+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++空类型+++++++++++++++++++++++++-', roidb[i]['image']) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(scale_inds) processed_ims = [] im_scales = [] if cfg.LIMIT_RAM: # roidb is the pickle file path assert num_images == 1, "LIMIT_RAM version, it has to be one image." with open(roidb, 'rb') as f: roidb = [cPickle.load(f)] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales, roidb
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) print(roidb[i]) # cv2.waitKey() print(im.shape) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) # print('看看你有什么特长',blob.shape) # (1, 600, 800, 3) # print('有个鸡巴特长',im_scales) print(blob[0].shape) # cv2.imshow('imgs', blob[0]) # cv2.waitKey() return blob, im_scales # blob缩放后的图像
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): # filepath = os.path.join(,roidb[i]['name']) if roidb[i] == None: continue im = cv2.imread(filename=roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images if len(processed_ims) == 0 or processed_ims == []: return [], [] else: blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = imread(roidb[i]['image']) while im is None: print('roidb', i, 'image', roidb[i]['image'], '为空') if not os.path.exists(roidb[i]['image']): print('路径不存在') im = imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.ZLRM.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.ZLRM.PIXEL_MEANS, target_size, cfg.ZLRM.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im): # 处理像素值,均值化 im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.FLAGS2["pixel_means"] # 获取最大和最小边长 im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.FLAGS2[ "test_scales"]: # 测试scale和训练一样,都是600,还可以多设几个规模 # 计算缩放比例 im_scale = float(target_size) / float(im_size_min) # 如果缩放过后最大边长超过1000,则按最大边长1000进行缩放 if np.round(im_scale * im_size_max) > cfg.FLAGS.test_max_size: im_scale = float(cfg.FLAGS.test_max_size) / float(im_size_max) # 进行缩放 im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # 将不同尺寸像素合为一个blob,我们只用了一个尺寸,所以没有意义 blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) #print('vertical',roidb[i]['flippedh'],roidb[i]['flippedv'],roidb[i]['flippedb']) if roidb[i]['flippedh']: im = im[:, ::-1, :] if roidb[i]['flippedv']: im = im[::-1, ::, :] if roidb[i]['flippedb']: im = im[::-1, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """ Builds an input blob from the images in the roidb at the specified scales. 将输入的图片减掉均值,统一尺寸,并转为适合网络输入的形式 Returns ------- blob: 适合网络输入的im im_scale: float target_size/im_min_size 或 cfg.FLAGS.max_size/im_max_size """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(im): """ 如有多个缩放尺度变化,将一张图像进行多次缩放 返回打包好的图像矩阵,和对应的缩放数值 :param im: :return: """ # 图像减去像素均值,像素均值是指对所有训练图像的某一通道的均值 im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # print('333', im.shape) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the different scales. """ num_images = len(roidb) processed_ims = [] for i in range(num_images): # read image im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.ZLRM.PIXEL_MEANS # build image pyramid for im_scale in cfg.ZLRM.TRAIN.SCALES_BASE: im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob
def get_image_blob(db, pixel_means): processed_ims = [] for data in db: img = data['img'] img = img.astype(np.float32, copy=False) img -= pixel_means for sample in data['samples']: box = list(sample['box']) for i in range(len(box)): box[i] = int(box[i]) im = img[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] im = prep_im_for_blob(im, cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE) processed_ims.append(im) blob = im_list_to_blob(processed_ims) return blob
def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.FLAGS2["pixel_means"] im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] # 根据小边=600为基准,并且保持最大边不大于1000,对输入图片进行大小调整 for target_size in cfg.FLAGS2[ "test_scales"]: #FLAGS2["test_scales"] = (600,) im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.FLAGS.test_max_size: im_scale = float(cfg.FLAGS.test_max_size) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images # 将列表形式的ims转变为四维数组形式 # 因resize之后的image pyramid 大小不一致,找有image pyramid所有图片的最大hight和最大weight,以保证数组和保存所有比例的图片 blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds): num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): # 读取图片的数据,H*W*3 im = cv2.imread(roidb[i]['image']) # 如果GT翻转了,则图片也翻转 if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] # 600 # 得到去均值和缩放后的像素点和比例 im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # 将所有的图片像素合为一个blob,我们只用了一张图,所以没有意义 blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, im_scales
def _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): #读取图片 矩阵 im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: #如果图片是水平对称的 那么将三维矩阵中第二维数据做对称操作 im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] #确定选定的 缩放尺寸(最短边)的大小 im, im_scale = prep_im_for_blob( im, cfg.PIXEL_MEANS, target_size, #调用blob 函数对图片进行缩放 并获取 scale cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale) #缩放系数保存在 list 里面 processed_ims.append(im) #把三维数据作为一个元素放到list 里面去 # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) #填充后的图片 放入 blob 每张图片加入了 scale 在里面 return blob, im_scales
def _get_image_blob(roidb, scale_inds): # 对roidb的图像进行缩放,并返回blob和缩放比例 """builds an input blob from the images in the roidb at the specified scales""" num_images = len(roidb) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) # 获取路径图片 if roidb[i]['flipped']: # 如果之前翻转过,则水平翻转该图片 im = im[:, ::-1, :] target_size = cfg.FLAGS2["scales"][scale_inds[i]] # cfg.FLAGS2.scales = (600,)没有多的,所有的target_size均为600 im, im_scale = prep_im_for_blob(im, cfg.FLAGS2["pixel_means"], target_size, cfg.FLAGS.max_size) im_scales.append(im_scale) processed_ims.append(im) # 对图片进行缩放,保存缩放比例 # create a blob to hold the input images blob = im_list_to_blob(processed_ims) # 将缩放后的图片放入blob中 return blob, im_scales