Exemple #1
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Exemple #2
0
def _get_image_blob(im):
    """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape


    processed_ims = []
    im_scale_factors = []

    size = cfg.TEST.SIZE
    im_scale_w = float(size) / float(im_shape[1])
    im_scale_h = float(size) / float(im_shape[0])
    # Prevent the biggest axis from being more than MAX_SIZE

    im = cv2.resize(im_orig,
                    (size,size),
                        interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale_w)
    im_scale_factors.append(im_scale_h)
    processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Exemple #3
0
def _get_image_blob(im):
    """Converts an image into a network input.
    Arguments:
      im (ndarray): a color image in BGR order
    Returns:
      blob (ndarray): a data blob holding an image pyramid
      im_scale_factors (list): list of image scales (relative to im) used
        in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)
    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)  # num_images = 1
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = imread(roidb[i]['image'])
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]  # 对图像进行水平翻转
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        # im_scale = (target_size) / float(im_size_min),表示原始图像的短边到训练尺寸600的变换倍数
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    # 返回blob形式[1,w,h,c],im_scales表示图像resize的倍数
    return blob, im_scales
  def get_evaluate_batch(self, im_path, index):
      # Sample random scales to use for each image in this batch

      # Get the input image blob, formatted for caffe
      # im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
      im = imread(im_path)

      if len(im.shape) == 2:
          im = im[:, :, np.newaxis]
          im = np.concatenate((im, im, im), axis=2)
      # flip the channel, since the original one using cv2
      # rgb -> bgr
      im = im[:, :, ::-1]

      target_size = cfg.TRAIN.SCALES[0]
      im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                      cfg.TRAIN.MAX_SIZE)
      im_blob = im_list_to_blob([im])
      blobs = {'data': im_blob}

      # gt boxes: (x1, y1, x2, y2, cls)
      gt_boxes = np.empty((0, 5), dtype=np.float32)
      blobs['gt_boxes'] = gt_boxes
      blobs['im_info'] = np.array([[im.shape[0], im.shape[1], im_scale]], dtype=np.float32)
      blobs['img_id'] = index
      return blobs
def _get_image_blob(im):
    """将一幅图像转化为网络需要的输入
    Arguments:输入一个通道顺序为BGR的图像
      im (ndarray): a color image in BGR order
    Returns:返回一个图像金字塔列表
      blob (ndarray): a data blob holding an image pyramid
      im_scale_factors (list): list of image scales (relative to im) used
        in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    #减去数据训练集的统计平均值,来消除公共的部分,以凸显个体之间的特征和差异
    im_orig -= cfg.PIXEL_MEANS
    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    processed_ims = []
    im_scale_factors = []
    for target_size in cfg.TEST.SCALES:
        # 限制最小边为600,最大边为1000,对于输入图像优先考虑最大边的限制
        # 输入图像的大小是:375*500*3,则resize后的图像大小为:600*800*3
        # 输入图像的大小是:375*800*3,则resize后的图像大小为:469*1000*3
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(
                im_scale * im_size_max) > cfg.TEST.MAX_SIZE:  #np.round返回四舍五入值
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        #resize参数:输入图像、输出图像、输出尺寸、w方向缩放因子、h方向…、插值方法
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)
    #Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    return blob, np.array(im_scale_factors)