Beispiel #1
0
def _get_image_blob(roidb, target_size):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'])

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i],
                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
Beispiel #2
0
def _get_image_blob(im):
    """Converts an image into a network input.
    Arguments:
      im (ndarray): a color image in BGR order
    Returns:
      blob (ndarray): a data blob holding an image pyramid
      im_scale_factors (list): list of image scales (relative to im) used
        in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
def _get_image_blob(im):
    """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
    im_orig = im[:, :, :].astype(np.float32, copy=True)
    #  im_orig -= cfg.PIXEL_MEANS
    # changed to use pytorch models
    im_orig /= 255.  # Convert range to [0,1]
    pixel_means = [0.485, 0.456, 0.406]
    im_orig -= pixel_means  # Minus mean
    pixel_stdens = [0.229, 0.224, 0.225]
    im_orig /= pixel_stdens  # divide by stddev
    #  im_orig = im
    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
def _get_image_seg_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    processed_seglabel = []
    im_scales = []

    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])
        seg_label_name = (roidb[i]['image']).replace(
            'JPEGImages', 'SegmentationClass').replace('.jpg', '.png')
        seg_label = imread(seg_label_name, mode='P')

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
            seg_label = seg_label[:, ::-1]

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, seg_label, im_scale = prep_im_seg_for_blob(im, seg_label,
                                                       cfg.PIXEL_MEANS,
                                                       target_size,
                                                       cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)
        processed_seglabel.append(seg_label)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    seg_blob = seg_list_to_blob(processed_ims)

    return blob, seg_blob, im_scales
Beispiel #5
0
def _get_image_blob(roidb, scale_inds,depth=False):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)
  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'])
    depth_name = roidb[i]['image'].replace("JPEGImages","DepthImages")
    depth_val = imread(depth_name)
    depth_val = np.expand_dims(depth_val,-1)
    # st()
    # DepthImages/

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]
    # st()
    if depth:
      im = np.concatenate([im,depth_val],-1)

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    if depth:  
      im, im_scale = prep_im_for_blob(im, cfg.DEPTH_MEANS, target_size,
                      cfg.TRAIN.MAX_SIZE)
    else:
      im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                  cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
Beispiel #6
0
def _get_image_blob(roidb, scale_inds, training):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        '''
        This part might need to be changed
        delete cv2 related code,
        change to 2d if possible, of preferable
        '''
        #im = cv2.imread(roidb[i]['image'])
        im = imageio.imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:,:,np.newaxis]
            im = np.concatenate((im,im,im), axis=2)
        # 2d image to 3d image

        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:,:,::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        # flip height-wise
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        # 1 is always expected
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, training)
        # im is resized with im_scale ratio
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    # change image lists to blob.
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Beispiel #7
0
def _get_image_blob(roidb, scale_inds, transfrom):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, transfrom)
        im_scales.append(im_scale)
        processed_ims.append(im)
    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Beispiel #8
0
def _get_image_blob(im):
    """Converts an image into a network input.
    Arguments:
      im (ndarray): a color image in BGR order
    Returns:
      blob (ndarray): a data blob holding an image pyramid
      im_scale_factors (list): list of image scales (relative to im) used
        in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    # print blob.shape
    # print blob
    # print im_scale_factors
    # raw_input('Continue?')
    return blob, np.array(im_scale_factors)
def _get_image_blob(roidb, scale_inds):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'], mode='RGB')

    if len(im.shape) == 0:
      im = np.zeros((roidb[i]['height'], roidb[i]['width'], 3))

    if len(im.shape) != 3:
      pdb.set_trace()

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)

    if im.shape[2] > 3:
      im = im[:,:,:3]

    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
Beispiel #10
0
def _get_image_blob(roidb, scale_inds = -1):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:,:,np.newaxis]
            im = np.concatenate((im,im,im), axis=2)

        # rgb -> bgr
        im = im[:, :, ::-1]

        # flip the channel, since the original one using cv2
        im = np.rot90(im, roidb[i]['rotated'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        if scale_inds == -1:
            # origion code
            target_size = cfg.TRAIN.COMMON.INPUT_SIZE
            im, im_scale = prep_im_for_blob_fixed_size(im, cfg.PIXEL_MEANS, target_size)
        else:
            # origion code
            target_size = cfg.TRAIN.RCNN_COMMON.SCALES[scale_inds[i]]
            im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.COMMON.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # print('num images, ', num_images)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        # im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        # if there is four channels, remove the alpha channel
        if im.shape[-1] == 4:
            im = im[:, :, :-1]

        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        # print('in get image blob')
        # print(im)
        # print(im.shape)

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Beispiel #12
0
    def _get_image_blob(self, im, frame_id):
        '''Convert image into network input.
        :param im: BGR nd.array
        :param frame_id: frame number in the given video
        :return image (frame) blob
        '''
        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS

        im_shape = im_orig.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        processed_ims = []
        im_scale_factors = []

        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_min)
            # Prevent the biggest axis from being more than MAX_SIZE
            if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
            im = cv2.resize(im_orig,
                            None,
                            None,
                            fx=im_scale,
                            fy=im_scale,
                            interpolation=cv2.INTER_LINEAR)
            im_scale_factors.append(im_scale)
            processed_ims.append(im)

        blob = im_list_to_blob(processed_ims)
        scales = np.array(im_scale_factors)

        blobs = {'data': blob}
        blobs['im_info'] = np.array(
            [[blob.shape[1], blob.shape[2], scales[0]]], dtype=np.float32)
        blobs['frame_number'] = np.array([[frame_id]])

        return blobs
Beispiel #13
0
def get_image_blob(im):
  """Converts an image into a network input.
  Arguments:
    im: data of image
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
  im_scales = []
  processed_ims = []
  scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1)

  target_size = cfg.TRAIN.SCALES[scale_inds[0]]
  im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, cfg.TRAIN.MAX_SIZE)

  im_scales.append(im_scale)
  processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
Beispiel #14
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'], cv2.IMREAD_GRAYSCALE)
        #im = imread(roidb[i]['image'])
        #im = cv2.imread(roidb[i]['image'],-1)
        #print('im_shape')
        #print(im.shape)

        if len(im.shape) == 2:
            #print('im.shape==2')
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        #print(im.shape)
        im = im[:, :, ::-1]
        #print('image before mean subtraction')
        #print(im[:,:,0])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Beispiel #15
0
def _get_image_blob(im):
    """
    Given an image, normalise and reshape it to size (600, x) where x<=800
    @param img: BGR images (nd array)
    @return: blob, 4D array, (num_images, h_max, w_max, 3)
             im_scale_factors, 1D array of image scale_factor
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape  # (h, w, 3)
    im_size_min = np.min(im_shape[0:2])  # w or h
    im_size_max = np.max(im_shape[0:2])  # w or h

    processed_ims = []
    im_scale_factors = []

    # reshape img size to (600, x) where x<=800
    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(
            im_size_min)  # scale = 600 / shorter_side(w/h)
        if np.round(im_scale * im_size_max
                    ) > cfg.TEST.MAX_SIZE:  # make sure the longer_size <= 1000
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob (curtain) to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Beispiel #16
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        # im = imread(roidb[i]['image'])
        im = imageio.imread(
            roidb[i]['image']
        )  # ImportError: cannot import name 'imread' from 'scipy.misc'

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        if roidb[i]['ver_flipped']:
            im = im[::-1, :, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Beispiel #17
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])  # BGR
        im = imread(roidb[i]['image'])  # RGB

        # tile channels for 1-channel images
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        # drop the last channel for 4-channel images
        if im.shape[-1] == 4:
            im = im[:, :, :-1]

        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
def _get_image_blob(roidb, scale_inds):
    """
    load the image from local path, subtract pixel mean and resize the image
    :param roidb: annotation list [{}] for one image, the {} contains all labels
    :param scale_inds: [0]
    :return blob: an image 4D array (1, 3, h, w)
            im_scales: a float number
    """
    num_images = len(roidb)  # 1
    processed_ims = []
    im_scales = []

    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        # im = imread(roidb[i]['image'])
        # if len(im.shape) == 2:
        #   im = im[:,:,np.newaxis]
        #   im = np.concatenate((im,im,im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        # im = im[:,:,::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        # subtract pixel mean and resize the image
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]  # 600
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
def _get_image_blob(roidb, scale_inds, RGB, NIR, DEPTH):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        if RGB:
            im = imread(roidb[i]['image'])
            if len(im.shape) == 2:
                im = im[:, :, np.newaxis]
                im = np.concatenate((im, im, im), axis=2)
            # flip the channel, since the original one using cv2
            # rgb -> bgr
            im = im[:, :, ::-1]
            if NIR | DEPTH:
                #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat')
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                if NIR:
                    im = np.concatenate(
                        (im, I_D['NIR_DEPTH_res_crop'][:, :, :1]), axis=2)
                if DEPTH:
                    im = np.concatenate(
                        (im, I_D['NIR_DEPTH_res_crop'][:, :, 1:]), axis=2)
        elif NIR:
            if not DEPTH:
                #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat')
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                im = I_D['NIR_DEPTH_res_crop'][:, :, :1]
                im = np.concatenate((im, im, im), axis=2)
            else:
                #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat')
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                im = I_D['NIR_DEPTH_res_crop']
                im = np.concatenate((im, im), axis=2)
        elif DEPTH:
            #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat')
            I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                   '_intensity_depth.mat')
            im = I_D['NIR_DEPTH_res_crop'][:, :, 1:]
            im = np.concatenate((im, im, im), axis=2)
        else:
            print('Any color space was selected')

        #I_D = scipy.io.loadmat(roidb[i]['image'][:-8] + '_intensity_depth.mat')
        #NIR = imread(roidb[i]['image'][:-11] + '_intensity_' + roidb[i]['image'][-6:-4] + '.jpg')
        #depth = imread(roidb[i]['image'][:-11] + '_depth_' + roidb[i]['image'][-6:-4] + '.jpg')
        #im = np.concatenate((im,I_D['NIR_DEPTH_res_crop']), axis=2)
        #im = np.concatenate((im, NIR[:,:,:1], depth[:,:,:1]), axis=2)

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, RGB, NIR, DEPTH)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH)

    return blob, im_scales
Beispiel #20
0
def _get_image_blob_with_aug(roidb, scale_inds = -1, training = True):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    gt_boxes = None
    gt_boxes_keep = None
    gt_classes = None
    gt_grasps = None
    gt_grasps_keep = None

    if 'boxes' in roidb[0]:
        gt_boxes = []
        gt_classes = []
        gt_boxes_keep = []

    if 'grasps' in roidb[0] and roidb[0]['grasps'].size > 0 :
        gt_grasps = []
        gt_grasps_keep=[]

    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        boxes = None
        cls = None
        boxes_keep = None
        if 'boxes' in roidb[i]:
            boxes = np.array(roidb[i]['boxes'], dtype=np.int32)
            cls = roidb[i]['gt_classes']
            boxes_keep = np.array(range(boxes.shape[0]), dtype=np.int32)

        grasps = None
        grasps_keep = None
        # grasps should be floats
        if 'grasps' in roidb[i] and roidb[i]['grasps'].size > 0:
            grasps = np.array(roidb[i]['grasps'], dtype=np.int32)
            grasps_keep = np.array(range(grasps.shape[0]), dtype=np.int32)

        # flip the channel, since the original one using cv2
        im = np.rot90(im, roidb[i]['rotated'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im, boxes, cls, grasps, boxes_keep, grasps_keep = \
            prep_im_for_blob_aug(im, boxes, cls, grasps, boxes_keep, grasps_keep, training)

        if len(im.shape) == 2:
            im = im[:,:,np.newaxis]
            im = np.concatenate((im,im,im), axis=2)

        # rgb -> bgr
        im = im[:, :, ::-1]

        # origion code
        if scale_inds == -1:
            target_size = cfg.TRAIN.COMMON.INPUT_SIZE
            im, im_scale = prep_im_for_blob_fixed_size(im, cfg.PIXEL_MEANS, target_size)
        else:
            target_size = cfg.TRAIN.RCNN_COMMON.SCALES[scale_inds[i]]
            im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.COMMON.MAX_SIZE)

        processed_ims.append(im)
        im_scales.append(im_scale)

        if gt_boxes is not None:
            gt_boxes.append(np.array(boxes, dtype=np.int32))
            gt_classes.append(cls)
            gt_boxes_keep.append(np.array(boxes_keep,dtype=np.uint16))

        if gt_grasps is not None:
            gt_grasps.append(np.array(grasps, dtype=np.int32))
            gt_grasps_keep.append(np.array(grasps_keep,dtype=np.uint16))

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, gt_boxes, gt_classes, gt_grasps, gt_boxes_keep, gt_grasps_keep
Beispiel #21
0
    def __getitem__(self, index):
        """ get item by index of segment
        """
        # parse segment index to video id and video-level segment id.
        vid_index, vid_id, seg_ind = self.parse_index(index)
        # get segment entities
        if len(self.entity_type) == 2:
            entities = self.word_dict[vid_id][self.entity_type[0]][seg_ind]
            sent = self.word_dict[vid_id][self.entity_type[1]][seg_ind][:20]
        else:
            entities = self.word_dict[vid_id][self.entity_type[0]][seg_ind]
        # get segment images (read n images in series)
        vid_path = self.vid_paths[vid_index]
        image_list = glob.glob(os.path.join(vid_path, '*.jpg'))
        image_lists = self.div_imglst_by_name(image_list)
        # now we only can parse frame by name (reconstruct the name)
        image_list = image_lists[seg_ind]
        f_inds = self.get_frm_inds(image_list)
        image_list = [image_list[f_ind] for f_ind in f_inds]
        imgs = []
        img_paths = []
        ###################
        #Box from OIdetect#
        ###################
        DetectBox_path = []
        DetectBox_class = []
        DetectBox_score = []
        DetectBox = []

        if self.phase == 'train':
            for i, img_path in enumerate(image_list):
                #  read image
                img = cv2.imread(img_path)
                img = img.astype(np.float32, copy=True)
                img -= 127.5
                # resize image
                if img.shape[0] != self.args.img_h or img.shape[
                        1] != self.args.img_w:
                    img = cv2.resize(img, (self.args.img_h, self.args.img_w))
                # append image to ims
                imgs.append(img)
                img_paths.append(img_path)

                # get box info
                box_path = img_path.split('.')[0] + ".txt"
                #print("loading ", box_path)
                DetectBox_path.append(box_path)
                with open(box_path, 'rb') as handle:
                    info = pickle.load(handle)
                    #print(info)
                    temp_class = []
                    temp_score = []
                    temp_box = []
                    if len(info) > 0:
                        for eachinfo in info:
                            temp_class.append(eachinfo[0].lower())
                            temp_score.append(eachinfo[1])
                            temp_box.append(eachinfo[2])

                    DetectBox_class.append(temp_class)
                    DetectBox_score.append(temp_score)
                    DetectBox.append(temp_box)

                # transfer to blob (batch, 3, h, w)
                # preclude the condition that no entity in such action
            blob = im_list_to_blob(imgs)
            # blob = blob.transpose(0, 3, 1, 2)[0]
            # new video: true if current seg_ind is the last segment in a video, else false
            new_vid = True if self.seg_accumulate_num[
                vid_index] + seg_ind in self.seg_accumulate_num else False
            # get action_length
            action_length = self.actions_length[vid_index]
            action_ind = seg_ind
            # yeild image blob, word entity, image_path and new video flag

            if len(self.entity_type) == 2:
                return blob, entities, sent, img_paths, new_vid, action_length, action_ind, DetectBox_path, DetectBox_class, DetectBox_score, DetectBox
            elif len(self.entity_type) == 1:
                return blob, entities, img_paths, new_vid, action_length, action_ind, DetectBox_path, DetectBox_class, DetectBox_score, DetectBox
        else:
            for i, img_path in enumerate(image_list):
                #  read image
                img = cv2.imread(img_path)
                img = img.astype(np.float32, copy=True)
                img -= 127.5
                # resize image
                if img.shape[0] != self.args.img_h or img.shape[
                        1] != self.args.img_w:
                    img = cv2.resize(img, (self.args.img_h, self.args.img_w))
                # append image to ims
                imgs.append(img)
                img_paths.append(img_path)
                # transfer to blob (batch, 3, h, w)
                # preclude the condition that no entity in such action
            blob = im_list_to_blob(imgs)
            # blob = blob.transpose(0, 3, 1, 2)[0]
            # new video: true if current seg_ind is the last segment in a video, else false
            new_vid = True if self.seg_accumulate_num[
                vid_index] + seg_ind in self.seg_accumulate_num else False
            # get action_length
            action_length = self.actions_length[vid_index]
            action_ind = seg_ind
            # yeild image blob, word entity, image_path and new video flag

            if len(self.entity_type) == 2:
                return blob, entities, sent, img_paths, new_vid, action_length, action_ind
            elif len(self.entity_type) == 1:
                return blob, entities, img_paths, new_vid, action_length, action_ind
def _get_image_blob(im, RGB, NIR, DEPTH):
    """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
    im_orig = im.astype(np.float32, copy=True)
    #im_orig -= cfg.PIXEL_MEANS
    pixel_means = cfg.PIXEL_MEANS
    if RGB:
        p_means = pixel_means[:, :, :3]
        if NIR:
            p_means = np.concatenate((p_means, pixel_means[:, :, 3:4]), axis=2)
        if DEPTH:
            p_means = np.concatenate((p_means, pixel_means[:, :, 4:5]), axis=2)
    elif NIR:
        if not DEPTH:
            p_means = np.concatenate(
                (pixel_means[:, :, 3:4], pixel_means[:, :, 3:4],
                 pixel_means[:, :, 3:4]),
                axis=2)
        else:
            p_means = np.concatenate(
                (pixel_means[:, :, 3:5], pixel_means[:, :, 3:5]), axis=2)
    elif DEPTH:
        p_means = np.concatenate(
            (pixel_means[:, :, 4:5], pixel_means[:, :, 4:5], pixel_means[:, :,
                                                                         4:5]),
            axis=2)
    else:
        print('Any color space was selected')

    im_orig -= p_means

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH)

    return blob, np.array(im_scale_factors)
Beispiel #23
0
    def load_query(self, choice, id=0):

        if self.training:
            # Random choice query catgory image
            all_data = self._query[choice]
            # data     = random.choice(all_data)

            # todo: check changed code is acceptable.
            while True:
                data = random.choice(all_data)
                if int(data['boxes'][1]) == int(data['boxes'][3]) or int(
                        data['boxes'][0]) == int(data['boxes'][2]):
                    continue
                else:
                    break

        else:
            # Take out the purpose category for testing
            catgory = self.cat_list[choice]
            # list all the candidate image
            all_data = self._query[catgory]

            # Use image_id to determine the random seed
            # The list l is candidate sequence, which random by image_id
            random.seed(id)
            l = list(range(len(all_data)))
            random.shuffle(l)

            # choose the candidate sequence and take out the data information
            position = l[self.query_position % len(l)]
            data = all_data[position]

        # Get image
        path = data['image_path']
        im = imread(path)

        # todo: check changed code is acceptable.
        # check_zero = True
        # while check_zero:
        #     path       = data['image_path']
        #     im = imread(path)
        #     if 0 not in im.shape[0:3]:
        #         check_zero = False
        #         break
        #     elif 0 in im.shape[0:3]:
        #         data = random.choice(all_data)

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        im = crop(im, data['boxes'], cfg.TRAIN.query_size)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        # im = im[:,:,::-1]
        if random.randint(0, 99) / 100 > 0.5 and self.training:
            im = im[:, ::-1, :]

        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                        cfg.TRAIN.query_size,
                                        cfg.TRAIN.MAX_SIZE)

        query = im_list_to_blob([im])

        return query
Beispiel #24
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)
    # print("num iamges{}".format(num_images))

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        # im = Random_crop(roidb, im, i)
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales


# def Random_crop(roidb, im, index):
#   image = im
#   annots = roidb[index]["boxes"]

#   if not annots.shape[0]:
#       return image
#   if random.choice([0, 1]):
#       return image
#   else:
#       rows, cols, cns = image.shape
#       flag = 0
#       while True:
#           flag += 1
#           if flag > 10:
#               return image

#           crop_ratio = random.uniform(0.5, 1)
#           rows_zero = int(rows * random.uniform(0, 1 - crop_ratio))
#           cols_zero = int(cols * random.uniform(0, 1 - crop_ratio))
#           crop_rows = int(rows * crop_ratio)
#           crop_cols = int(cols * crop_ratio)
#           '''
#           new_image = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :]
#           new_image = cv2.resize(new_image, (cols, rows))
#           #new_image = skimage.transform.resize(new_image, (rows, cols))

#           new_annots = np.zeros((0, 5))
#           for i in range(annots.shape[0]):
#               x1 = max(annots[i, 0] - cols_zero, 0)
#               y1 = max(annots[i, 1] - rows_zero, 0)
#               x2 = min(annots[i, 2] - cols_zero, crop_cols)
#               y2 = min(annots[i, 3] - rows_zero, crop_rows)
#               label = annots[i, 4]
#               if x1 + 10 < x2 and y1 + 10 < y2:
#                   x1 /= crop_ratio
#                   y1 /= crop_ratio
#                   x2 /= crop_ratio
#                   y2 /= crop_ratio
#                   new_annots = np.append(new_annots, np.array([[x1, y1, x2, y2, label]]), axis=0)

#           if not new_annots.shape[0]:
#               continue
#           '''
#           new_image = np.zeros((rows , cols , cns))
#           new_image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :] = image[rows_zero:rows_zero+crop_rows, cols_zero:cols_zero+crop_cols, :]
#           im = new_image

#           # new_annots = np.zeros((0, 4))

#           NUM_CLASS = 2
#           new_annots = np.zeros((0, 4), dtype=np.uint16)
#           gt_classes = np.zeros((0), dtype=np.int32)
#           overlaps = np.zeros((0, NUM_CLASS), dtype=np.float32)
#           max_classes = np.zeros((0), dtype=np.int64)
#           max_overlaps = np.zeros((0), dtype=np.float32)
#           # seg_areas = np.zeros((0), dtype=np.float32)

#           for i in range(annots.shape[0]):
#               x1 = max(cols_zero, annots[i, 0])
#               y1 = max(rows_zero, annots[i, 1])
#               x2 = min(cols_zero+crop_cols, annots[i, 2])
#               y2 = min(rows_zero+crop_rows, annots[i, 3])
#               if x1+10 < x2 and y1+10 < y2:
#                   new_annots = np.append(new_annots, np.array([[x1,y1,x2,y2]]), axis=0)
#                   gt_classes = np.append(gt_classes,roidb[index]['gt_classes'][i])

#                   if roidb[index]['gt_overlaps'].data[i] <= 0:
#                     # Set overlap to -1 for all classes for crowd objects
#                     # so they will be excluded during training
#                     tmp_overlap = np.zeros((1, NUM_CLASS), dtype=np.float32)
#                     tmp_overlap[0,:] = -1.0
#                     overlaps = np.append(overlaps, tmp_overlap)
#                     # overlaps[ix, :] = -1.0
#                   else:
#                     tmp_overlap = np.zeros((1, NUM_CLASS), dtype=np.float32)
#                     tmp_overlap[0,gt_classes] = 1.0
#                     overlaps = np.append(overlaps, tmp_overlap)

#                     # overlaps[ix, cls] = 1.0

#                   max_classes = np.append(max_classes, roidb[index]['max_classes'][i])
#                   max_overlaps = np.append(max_overlaps, roidb[index]['max_overlaps'][i])

#           if not new_annots.shape[0]:
#               continue

#           overlaps = scipy.sparse.csr_matrix(overlaps)
#           roidb[index]['boxes'] = new_annots
#           roidb[index]['gt_classes'] = gt_classes
#           roidb[index]['gt_overlaps'] = overlaps
#           roidb[index]['max_classes'] = max_classes
#           roidb[index]['max_overlaps'] = max_overlaps
#           roidb[index]['height'] = new_image.shape[0]
#           roidb[index]['width'] = new_image.shape[1]

#           return new_image
Beispiel #25
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    assert isinstance(cfg.SHIFT_X, int) and isinstance(cfg.SHIFT_X, int), \
           'wrong shift number, please check'
    for i in range(num_images):
        im = []
        # the reference and sensed modality
        for j in range(2):
            im.append(imread(roidb[i]['image'][j]))
            if len(im[j].shape) == 2:
                im[j] = im[j][:, :, np.newaxis]
                im[j] = np.concatenate((im[j], im[j], im[j]), axis=2)
            # flip the channel, since the original one using cv2
            # rgb -> bgr
            im[j] = im[j][:, :, ::-1]

            if j == 1 and (cfg.SHIFT_X != 0 or cfg.SHIFT_Y != 0):
                new_img = np.zeros(im[j].shape)
                if cfg.SHIFT_X > 0:
                    if cfg.SHIFT_Y > 0:
                        new_img[:-cfg.SHIFT_Y, cfg.SHIFT_X:, :] = im[j][
                            cfg.SHIFT_Y:, :-cfg.SHIFT_X, :]
                    elif cfg.SHIFT_Y < 0:
                        new_img[-cfg.SHIFT_Y:,
                                cfg.SHIFT_X:, :] = im[j][:cfg.SHIFT_Y, :-cfg.
                                                         SHIFT_X, :]
                    else:
                        new_img[:,
                                cfg.SHIFT_X:, :] = im[j][:, :-cfg.SHIFT_X, :]
                elif cfg.SHIFT_X < 0:
                    if cfg.SHIFT_Y > 0:
                        new_img[:-cfg.SHIFT_Y, :cfg.SHIFT_X, :] = im[j][
                            cfg.SHIFT_Y:, -cfg.SHIFT_X:, :]
                    elif cfg.SHIFT_Y < 0:
                        new_img[-cfg.SHIFT_Y:, :cfg.
                                SHIFT_X, :] = im[j][:cfg.SHIFT_Y,
                                                    -cfg.SHIFT_X:, :]
                    else:
                        new_img[:, :cfg.SHIFT_X, :] = im[j][:,
                                                            -cfg.SHIFT_X:, :]
                else:
                    if cfg.SHIFT_Y > 0:
                        new_img[:-cfg.SHIFT_Y, :, :] = im[j][
                            cfg.SHIFT_Y:, :, :]
                    elif cfg.SHIFT_Y < 0:
                        new_img[
                            -cfg.SHIFT_Y:, :, :] = im[j][:cfg.SHIFT_Y, :, :]
                    else:
                        pass
                im[j] = new_img

            if roidb[i]['flipped']:
                im[j] = im[j][:, ::-1, :]
            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im[j], im_scale = prep_im_for_blob(im[j], cfg.PIXEL_MEANS,
                                               target_size, cfg.TRAIN.MAX_SIZE)

        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales