Esempio n. 1
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS

        im_scale = cfg.TRAIN.SCALES_BASE[scale_inds[i]]
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)

        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 2
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(
        0, high=len(cfg.TRAIN.SCALES), size=num_images
    )
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = blob_utils.prep_im_for_blob(
            im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE
        )
        im_scales.append(im_scale[0])
        processed_ims.append(im[0])

    # Create a blob to hold the input images
    blob = blob_utils.im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 3
0
    def __call__(self, sample):
        # resizes image and returns scale factors
        original_im_size=sample['image'].shape
        im_list,im_scales = prep_im_for_blob(sample['image'],
                                             pixel_means=self.mean,
                                             target_sizes=self.target_sizes,
                                             max_size=self.max_size)
        sample['image'] = torch.FloatTensor(im_list_to_blob(im_list,self.fpn_on)) # im_list_to blob swaps channels and adds stride in case of fpn
        sample['scaling_factors'] = im_scales[0] 
        sample['original_im_size'] = torch.FloatTensor(original_im_size)
        if len(sample['dbentry']['boxes'])!=0 and not self.sample_proposals_for_training: # Fast RCNN test
            proposals = sample['dbentry']['boxes']*im_scales[0]  
            if self.remove_dup_proposals:
                proposals,_ = self.remove_dup_prop(proposals) 
            
            if self.fpn_on==False:
                sample['rois'] = torch.FloatTensor(proposals)
            else:
                multiscale_proposals = add_multilevel_rois_for_test({'rois': proposals},'rois')
                for k in multiscale_proposals.keys():
                    sample[k] = torch.FloatTensor(multiscale_proposals[k])

        elif self.sample_proposals_for_training: # Fast RCNN training
            sampled_rois_labels_and_targets = fast_rcnn_sample_rois(roidb=sample['dbentry'],
                                                                    im_scale=im_scales[0],
                                                                    batch_idx=0) # ok as long as we keep batch_size=1
            sampled_rois_labels_and_targets = {key: torch.FloatTensor(value) for key,value in sampled_rois_labels_and_targets.items()}
            # add to sample
            sample = {**sample, **sampled_rois_labels_and_targets} 
        # remove dbentry from sample
        del sample['dbentry']
        return sample
Esempio n. 4
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    processed_ims = []
    im_scale_factors = []
    scales = cfg.TEST.SCALES_BASE

    for im_scale in scales:
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Esempio n. 5
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    processed_ims = []

    assert len(cfg.TEST.SCALES_BASE) == 1
    im_scale = cfg.TRAIN.SCALES_BASE[0]

    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                    interpolation=cv2.INTER_LINEAR)
    im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
    processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_info
Esempio n. 6
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im_bgr = cv2.imread(roidb[i]['image'])
        if cfg.DEBUG:
	    print im_bgr.shape
	#******************************
        #   Add deformed mask to input
        #******************************
        deformed_mask = cv2.imread(roidb[i]['deformed_mask'],0)
        im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4))
        im[:,:,0:3] = im_bgr
        im[:,:,3] = deformed_mask
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 7
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Esempio n. 8
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(
        0, high=len(cfg.TRAIN.SCALES), size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        ims = image_utils.read_image_video(roidb[i])
        for im_id, im in enumerate(ims):
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale = blob_utils.prep_im_for_blob(
                im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
            ims[im_id] = im[0]
        # Just taking the im_scale for the last im in ims is fine (all are same)
        im_scales.append(im_scale[0])
        processed_ims += ims

    # Create a blob to hold the input images
    blob = blob_utils.im_list_to_blob(processed_ims)
    return blob, im_scales
Esempio n. 9
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        #im = cv2.imread(roidb[i]['image'])
		#Multi channels supported
        im = np.load(roidb[i]['image'])
        if im.ndim != 3:
            im = np.expand_dims(im, axis=2)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 10
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        if cfg.TRAIN.IS_COLOR == True:
            im = cv2.imread(roidb[i]['image'])
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
        else:
            im = cv2.imread(roidb[i]['image'], flags= cv2.CV_LOAD_IMAGE_GRAYSCALE)
            #im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR)
            if roidb[i]['flipped']:
                im = im[:, ::-1]


        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 11
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (list of ndarray): a list of color images in BGR order. In case of
        video it is a list of frames, else is is a list with len = 1.

    Returns:
        blob (ndarray): a data blob holding an image pyramid (or video pyramid)
        im_scale_factors (ndarray): array of image scales (relative to im) used
            in the image pyramid
    """
    all_processed_ims = []  # contains a a list for each frame, for each scale
    all_im_scale_factors = []
    for frame in im:
        processed_ims, im_scale_factors = blob_utils.prep_im_for_blob(
            frame, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE)
        all_processed_ims.append(processed_ims)
        all_im_scale_factors.append(im_scale_factors)
    # All the im_scale_factors will be the same, so just take the first one
    for el in all_im_scale_factors:
        assert(all_im_scale_factors[0] == el)
    im_scale_factors = all_im_scale_factors[0]
    # Now get all frames with corresponding scale next to each other
    processed_ims = []
    for i in range(len(all_processed_ims[0])):
        for frames_at_specific_scale in all_processed_ims:
            processed_ims.append(frames_at_specific_scale[i])
    # Now processed_ims contains
    # [frame1_scale1, frame2_scale1..., frame1_scale2, frame2_scale2...] etc
    blob = blob_utils.im_list_to_blob(processed_ims)
    return blob, np.array(im_scale_factors)
Esempio n. 12
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(
        0, high=len(cfg.TRAIN.SCALES), size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        assert im is not None, \
            'Failed to read image \'{}\''.format(roidb[i]['image'])
        # If NOT using opencv to read in images, uncomment following lines
        # if len(im.shape) == 2:
        #     im = im[:, :, np.newaxis]
        #     im = np.concatenate((im, im, im), axis=2)
        # # flip the channel, since the original one using cv2
        # # rgb -> bgr
        # im = im[:, :, ::-1]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = blob_utils.prep_im_for_blob(
            im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale[0])
        processed_ims.append(im[0])

    # Create a blob to hold the input images [n, c, h, w]
    blob = blob_utils.im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 13
0
def _get_rprocessed_image_blob(roidb, scale_inds, angles):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
	
	if roidb[i]['rotated']:
	    # get the size of image
	    (h, w) = im.shape[:2] 
	    # set the rotation center
	    center = (w / 2, h / 2) 
	    # get the rotation matrix no scale changes
	    scale = 1.0
	    # anti-clockwise angle in the function
	    M = cv2.getRotationMatrix2D(center, angles[i], scale)
	    im = cv2.warpAffine(im,M,(w,h)) 
 
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
def _get_image_blob(roidb, scale_inds):
  """
  Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)
  processed_ims = []
  im_scales = []
  im_shapes = []

  for i in xrange(num_images):
    im = cv2.imread(roidb[i]['image'])
    # Check flipped or not
    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    # record the shape of origin image: (height, width, channels)
    im_shapes.append(im.shape)

    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales, im_shapes
Esempio n. 15
0
def _get_image_blob(roidb, scale_inds, data_i):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        imname1 = roidb[i]["image"][data_i]
        imname2 = imname1 + "_norm.png"
        im1 = cv2.imread(imname1)
        im2 = cv2.imread(imname2)
        if roidb[i]["flipped"]:
            im1 = im1[:, ::-1, :]
            im2 = im2[:, ::-1, :]
            im2[:, :, 2] = 255 - im2[:, :, 2]

        im = np.zeros((im1.shape[0], im1.shape[1], 6))
        im = im.astype("uint8")
        im1 = im1[:, :, ::-1]
        im2 = im2[:, :, ::-1]
        im[:, :, 0:3] = im1
        im[:, :, 3:6] = im2

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, 127.5, target_size, cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 16
0
def _get_image_blob(imdb, roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        proto = imdb.get_proto_at(roidb[i]['image'])
        mem = BytesIO(proto.data)
        im = io.imread(mem)
        im = im[:,:,::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, cfg.TRAIN.SCALE_MULTIPLE_OF)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 17
0
def _get_image_blob(roidb):
    im = cv2.imread(roidb['image'])
    if roidb['flipped']:
        im = im[:, ::-1, :]
    target_size = np.random.choice(cfg.TRAIN.SCALES)
    im, im_scale = prep_im_for_blob(
        im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
    blob = im_list_to_blob([im])
    return blob, im_scale
def _get_image_blob(im,im_scales):
    """
    :param im: input image
    :param im_scales: a list of scale coefficients
    :return: A list of network blobs each containing a resized ver. of the image
    """
    # Subtract the mean
    im_copy = im.astype(np.float32, copy=True) - cfg.PIXEL_MEANS

    # Append all scales to form a blob
    blobs = []
    for scale in im_scales:
        if scale==1.0:
            blobs.append({'data':im_list_to_blob([im_copy])})
        else:
            blobs.append({'data':im_list_to_blob([cv2.resize(im_copy, None, None, fx=scale, fy=scale,
                            interpolation=cv2.INTER_LINEAR)])})
    return blobs
Esempio n. 19
0
def _get_image_blob(roidb, scale_ind):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    processed_ims_depth = []
    im_scales = []
    for i in xrange(num_images):
        # rgba
        rgba = cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED)
        im = rgba[:,:,:3]
        alpha = rgba[:,:,3]
        I = np.where(alpha == 0)
        im[I[0], I[1], :] = 255

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        im_scales.append(im_scale)
        processed_ims.append(im)

        # depth
        im_depth = cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED).astype(np.float32)
        im_depth = im_depth / im_depth.max() * 255
        im_depth = np.tile(im_depth[:,:,np.newaxis], (1,1,3))
        if roidb[i]['flipped']:
            im_depth = im_depth[:, ::-1]

        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im_depth)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)

    return blob, blob_depth, im_scales
Esempio n. 20
0
def _get_image_blob(im, im_depth):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    # RGB
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    processed_ims = []
    im_scale_factors = []
    assert len(cfg.TEST.SCALES_BASE) == 1
    im_scale = cfg.TEST.SCALES_BASE[0]

    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale)
    processed_ims.append(im)

    # im_info
    im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]

    # depth
    im_orig = im_depth.astype(np.float32, copy=True)
    im_orig = im_orig / im_orig.max() * 255
    im_orig = np.tile(im_orig[:,:,np.newaxis], (1,1,3))
    im_orig -= cfg.PIXEL_MEANS

    processed_ims_depth = []
    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
    processed_ims_depth.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)

    return blob, blob_depth, im_info, np.array(im_scale_factors)
Esempio n. 21
0
    def _get_image_blob(self, im):
        im = im.astype(np.float32, copy=True)
        im -= cfg.PIXEL_MEANS

        processed_ims = []
        processed_ims.append(im)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)

        return blob
Esempio n. 22
0
def _get_roi_blob(roidb, pid):
    im = cv2.imread(roidb['image'])
    if roidb['flipped']:
        im = im[:, ::-1, :]
    im = im.astype(np.float32, copy=False)
    k = np.where(roidb['gt_pids'] == pid)[0][0]
    x1, y1, x2, y2 = roidb['boxes'][k]
    im = im[y1:y2+1, x1:x2+1, :]
    im -= cfg.PIXEL_MEANS
    im = cv2.resize(im, (64, 160), interpolation=cv2.INTER_LINEAR)
    blob = im_list_to_blob([im])
    return blob
Esempio n. 23
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    # change to read flow images, assuming the names are without ".jpg"
    # path/000000
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        imname = roidb[i]["image"]
        # print imname
        imnames = imname.split("/")
        imname2 = imnames[-1]
        imid = int(imname2)
        srcdir = imname[0 : -len(imname2)]

        im_scale = 1
        im = 0

        for j in range(10):
            nowimid = imid + j
            nowname = "{0:06d}".format(nowimid)
            nowname = srcdir + nowname
            xname = nowname + "_x.jpg"
            yname = nowname + "_y.jpg"
            imx = cv2.imread(xname, cv2.CV_LOAD_IMAGE_GRAYSCALE)
            imy = cv2.imread(yname, cv2.CV_LOAD_IMAGE_GRAYSCALE)
            if roidb[i]["flipped"]:
                imx = imx[:, ::-1]
                imx = 255 - imx
            # target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            # imx, im_scale = prep_im_for_blob(imx, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
            # imy, im_scale = prep_im_for_blob(imy, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
            if j == 0:
                im = np.zeros((imx.shape[0], imx.shape[1], 20))
                im = im.astype("uint8")
            im[:, :, j * 2] = imx
            im[:, :, j * 2 + 1] = imy

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)

        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 24
0
    def _get_image_blob(self,sample):
        im_blob = []
        labels_blob = []
        for i in range(self.batch_size):
            im = cv2.imread(cfg.IMAGEPATH + sample[i]['picname'])
            if sample[i]['flipped']:
                im = im[:, ::-1, :]
            personname = sample[i]['picname'].split('/')[0]
            labels_blob.append(self._data._sample_label[personname])
            im = prep_im_for_blob(im)

            im_blob.append(im)

        # Create a blob to hold the input images
        blob = im_list_to_blob(im_blob)
        return blob,labels_blob
Esempio n. 25
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (ndarray): array of image scales (relative to im) used
            in the image pyramid
    """
    processed_ims, im_scale_factors = blob_utils.prep_im_for_blob(
        im, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE
    )
    blob = blob_utils.im_list_to_blob(processed_ims)
    return blob, np.array(im_scale_factors)
Esempio n. 26
0
def _get_label_blob(roidb, im_scales, num_classes):
    """ build the label blob """

    num_images = len(roidb)
    processed_ims_cls = []

    for i in xrange(num_images):
        # read label image
        im = cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_cls = _process_label_image(im, roidb[i]['class_colors'])

        # rescale image
        im_scale = im_scales[i]
        im = cv2.resize(im_cls, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_NEAREST)
        processed_ims_cls.append(im)

    # Create a blob to hold the input images
    blob_cls = im_list_to_blob(processed_ims_cls, 1)

    #"""
    # blob image size
    image_height = blob_cls.shape[2]
    image_width = blob_cls.shape[3]

    # height and width of the heatmap
    height = np.floor(image_height / 2.0 + 0.5)
    height = np.floor(height / 2.0 + 0.5)
    height = np.floor(height / 2.0 + 0.5)
    height = np.floor(height / 2.0 + 0.5)
    height = int(height * 8)

    width = np.floor(image_width / 2.0 + 0.5)
    width = np.floor(width / 2.0 + 0.5)
    width = np.floor(width / 2.0 + 0.5)
    width = np.floor(width / 2.0 + 0.5)
    width = int(width * 8)

    # rescale the blob
    blob_cls_rescale = np.zeros((num_images, 1, height, width), dtype=np.float32)
    for i in xrange(num_images):
        blob_cls_rescale[i,0,:,:] = cv2.resize(blob_cls[i,0,:,:], dsize=(width, height), interpolation=cv2.INTER_NEAREST)

    return blob_cls_rescale
Esempio n. 27
0
def _get_image_blob(im, roidb, scale_inds):
  """
  Builds an input blob from the images in the roidb at the specified scales.
  """
  num_images = len(roidb)
  processed_ims = []
  im_scales = []
  for i in xrange(num_images):
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
Esempio n. 28
0
def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    stride = cfg.TRAIN.IMAGE_STRIDE
    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        if stride == 0:
            im_scale_factors.append(im_scale)
            processed_ims.append(im)
        else:
            # pad to product of stride
            im_height = int(np.ceil(im.shape[0] / float(stride)) * stride)
            im_width = int(np.ceil(im.shape[1] / float(stride)) * stride)
            im_channel = im.shape[2]
            padded_im = np.zeros((im_height, im_width, im_channel))
            padded_im[:im.shape[0], :im.shape[1], :] = im
            im_scale_factors.append(im_scale)
            processed_ims.append(padded_im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Esempio n. 29
0
    def _get_image_blob(image):
        """Converts an image into a network input.
        Arguments:
          image: a PIL Image.

        Returns:
          blob (ndarray): a data blob holding an image pyramid
          im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
        """

        width, height = image.size
        im_size_min = min(width, height)
        im_size_max = max(width, height)

        processed_ims = []
        im_scale_factors = []

        # scale the image to net input
        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_min)

            # Prevent the biggest axis from being more than MAX_SIZE
            if np.round(im_size_max) > cfg.TEST.MAX_SIZE:
                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)

            image = image.resize((int(round(width * im_scale)), int(round(height * im_scale))), PIL.Image.BILINEAR)

        im_scale_factors.append(im_scale)

        # from PIL.Image to Numpy
        image = np.asarray(image, dtype=np.float32)

        # RGB to BGR
        image = np.flip(image, axis=2)
        image -= cfg.PIXEL_MEANS

        processed_ims.append(image)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)

        return blob, np.array(im_scale_factors)
Esempio n. 30
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'],cv2.IMREAD_COLOR)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 31
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    im_info = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        im_info.append([im.shape[0], im.shape[1]])
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, im_info
Esempio n. 32
0
def _get_image_blob(ims, target_size):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_infos(ndarray): a data blob holding input size pyramid
    """
    processed_ims = []
    for im in ims:
        im = im.astype(np.float32, copy=False)
        im = im - cfg.PIXEL_MEANS
        im_shape = im.shape[0:2]
        im = cv2.resize(im, None, None, fx=float(target_size) / im_shape[1], \
                        fy=float(target_size) / im_shape[0], interpolation=cv2.INTER_LINEAR)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob
Esempio n. 33
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        #按照cfg.TRAIN.SCALES中的尺寸,读入图片金字塔,不过配置文件只给了一种尺寸
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        #去均值
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    #返回图片的二维数组, 所用尺寸的列表
    return blob, im_scales
Esempio n. 34
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    im_shapes = np.zeros((0, 2), dtype=np.float32)
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale, im_shape = prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                                  target_size)
        im_scales.append(im_scale)
        processed_ims.append(im)
        im_shapes = np.vstack((im_shapes, im_shape))

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, im_shapes
Esempio n. 35
0
    def get_image_blob(self, im):
        im_orig = im.astype(np.float32, copy=True) / 255.0
        im_shape = im_orig.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        processed_ims = []
        im_scale_factors = []
        mean = np.array([[[0.485, 0.456, 0.406]]])
        std = np.array([[[0.229, 0.224, 0.225]]])
        for target_size in self.SCALES:
            im, im_scale = prep_im_for_blob(im_orig,
                                            target_size,
                                            self.MAX_SIZE,
                                            mean=mean,
                                            std=std)
            im_scale_factors.append(im_scale)
            processed_ims.append(im)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)

        return blob, np.array(im_scale_factors)
Esempio n. 36
0
File: demo.py Progetto: vbillys/MNC
def prepare_mnc_args(im, net):
    # Prepare image data blob
    blobs = {'data': None}
    processed_ims = []
    im, im_scale_factors = \
        prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE)
    processed_ims.append(im)
    blobs['data'] = im_list_to_blob(processed_ims)
    # Prepare image info blob
    im_scales = [np.array(im_scale_factors)]
    assert len(im_scales) == 1, 'Only single-image batch implemented'
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
        dtype=np.float32)
    # Reshape network inputs and do forward
    net.blobs['data'].reshape(*blobs['data'].shape)
    net.blobs['im_info'].reshape(*blobs['im_info'].shape)
    forward_kwargs = {
        'data': blobs['data'].astype(np.float32, copy=False),
        'im_info': blobs['im_info'].astype(np.float32, copy=False)
    }
    return forward_kwargs, im_scales
Esempio n. 37
0
def _get_image_blob(im):
    """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)
Esempio n. 38
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)  #每次输入图像数
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])  #读取图像
        if 'hor_flipped' in roidb[i] and roidb[i]['hor_flipped']:     #被翻转
            im = im[:, ::-1, :]       #图像翻转
        if 'ver_flipped' in roidb[i] and roidb[i]['ver_flipped']:
            im = im[::-1, :, :]
        if 'bright_scala' in roidb[i] and roidb[i]['bright_scala']!=1:
            im=data_augment._bright_adjuest(im, roidb[i]['bright_scala'])
        if 'rotate_angle' in roidb[i] and roidb[i]['rotate_angle']!=0:
            im=data_augment._rotate_image(im, roidb[i]['rotate_angle'])
        if 'shift_x' in roidb[i] and 'shift_y' in roidb[i]:
            offset = (int(roidb[i]['shift_x']), int(roidb[i]['shift_y']))
            im = data_augment._shift_image(im, offset)
        if 'zoom_x' in roidb[i] and 'zoom_y' in roidb[i]:
            factor_x,factor_y=roidb[i]['zoom_x'],roidb[i]['zoom_y']
            im = data_augment._zoom_image(im, factor_x, factor_y)
        if 'position' in roidb[i] and 'crop_size_width' in roidb[i] and 'crop_size_height' in roidb[i]:
            crop_size =(roidb[i]['crop_size_width'],roidb[i]['crop_size_height'])
            scale = cfg.TRAIN.RESIZE_SCALE
            position=roidb[i]['position']
            im=data_augment.random_crop_image(im, crop_size, scale, position)
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]  #设置size
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)   #得到缩放后的图像和缩放系数
        im_scales.append(im_scale)   #存放起缩放系数
        processed_ims.append(im)     #存放缩放后的图像

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)   #利用blob最大的框架来装在缩放后的图像

    return blob, im_scales
Esempio n. 39
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        if roidb[i]['blurred']:
            im = cv2.GaussianBlur(im, (7, 7), 20)

        if 'scale' in roidb[i].keys():
            if roidb[i]['scale'] != 1:
                resized_im = cv2.resize(im, (
                    np.floor(im.shape[1] * roidb[i]['scale']).astype(np.int32),
                    np.floor(im.shape[0] * roidb[i]['scale']).astype(np.int32)))
                im = resized_im

        if 'crop_box' in roidb[i].keys():
            im = im[roidb[i]['crop_box'][1]:roidb[i]['crop_box'][3],
                 roidb[i]['crop_box'][0]:roidb[i]['crop_box'][2]]

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 40
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        # target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        height, width, channel = im.shape
        max_side = max(height, width)
        small_side = min(height, width)
        target_size = 600 / max_side * small_side

        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)

        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 41
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):  #----其实只循环一次----
        im = cv2.imread(roidb[i]['image'])  #----读入图片----
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]  #????????????图片翻转
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]  #----- 其实就是600 -----

        # prep_im_for_blob 函数的功能是获取经过resize的图像以及缩放的比例
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    #调用 im_list_to_blob 来将经过预处理的 processed_ims 转换成 caffe 支持的数据结构,即 N * C * H * W的四维结构
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 42
0
def _get_image_blob_multiscale(roidb):
    """Builds an input blob from the images in the roidb at multiscales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    scales = cfg.TRAIN.SCALES_BASE
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS

        for im_scale in scales:
            im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
            im_scales.append(im_scale)
            processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 43
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = 1
    #num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        #print ("load file:",roidb[i]['image'])
        #print ("load im:",im)
        target_size = config.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, config.PIXEL_MEANS, target_size,
                                        config.TRAIN_MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    #print ("load image blob:",blob)
    return blob, im_scales
Esempio n. 44
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        #cai jian tu pian
        #if roidb[i]['cropped']:
        '''
        for j in xrange(4):
            if j==0:
                im=im[0:512,0:1024]
            if j==1:
                im = im[0:512, 1024:2048]
            if j==1:
                im = im[512:1024, 0:1024]
            if j==1:
                im = im[512:1024, 1024:2048]
        '''

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE,
                                        cfg.TRAIN.IMAGE_STRIDE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 45
0
def _get_image_blob(roidb, scale_inds, data_i):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        imname1 = roidb[i]['image'][data_i]
        imname2= imname1 + '_norm.png'
        im1= cv2.imread(imname1)
        im2= cv2.imread(imname2) 
        if roidb[i]['flipped']:
            im1 = im1[:, ::-1, :]
            im2 = im2[:, ::-1, :]
            im2[:,:,2] = 255 - im2[:,:,2]

        im = np.zeros((im1.shape[0], im1.shape[1], 6))
        im = im.astype('uint8')
        im1 = im1[:, :, ::-1]
        im2 = im2[:, :, ::-1]
        im[:,:,0:3] = im1
        im[:,:,3:6] = im2



        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, 127.5, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 46
0
def _get_image_blob(roidb, scale_inds, args):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    crop_box = []

    for i in range(num_images):

        im = Image.open(roidb[i]['image'])
        im = np.array(im, dtype=np.float32)

        if "VOC2012" in roidb[i]['image']:
            # 1 2 0
            im = im[:, :, (0, 1, 2)]
            # substract mean
            if args.substract_mean == "True":
                mean = (122.67891434, 116.66876762, 104.00698793)
                im -= mean
            #im = im.transpose((2, 0, 1))

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        global_scale = args.scale_list[scale_inds[i]]
        im, im_scale, im_crop_box = prep_im_for_blob(im, global_scale, args)

        crop_box.append(im_crop_box)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, crop_box
Esempio n. 47
0
def _get_image_blob(roidb, scale_inds):
    # 将roidb中的图片按照特定的scale转换成blob格式
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            # 水平翻转,将width倒序排列即可
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]  #SCALES 600
        # 对图片去均值并进行放缩,返回放缩后的图片以及放缩倍数
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)  # MAX_SIZE 1000
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 48
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        # print roidb[i]['image'],roidb[i]['flipped'],num_images
        # print roidb[i]['image']
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    # original_ims = []
    # for i in xrange(num_images):
    #     im_ori = cv2.imread(roidb[i]['image'])
    #     if roidb[i]['flipped']:
    #         im_ori = im_ori[:, ::-1, :]
    #     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    #     im_ori, im_scale = prep_im_original_for_blob(im_ori, cfg.PIXEL_MEANS, target_size,
    #                                     cfg.TRAIN.MAX_SIZE)
    #     #im_scales.append(im_scale)
    #     original_ims.append(im_ori)
    # #images_copy = processed_ims.copy()
    # blob_original = im_list_to_blob_without_channel_transpose(original_ims)

    # return blob, blob_original,im_scales
    return blob, im_scales
Esempio n. 49
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])

        # im.shape (rows,columns,channel), with respect to height,width
        # print ('im_shape:',im.shape),roidb[i]['flipped'] if DEBUG else ''
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    # print ('blob_shape:',np.shape(blob)) if DEBUG else ''
    return blob, im_scales
Esempio n. 50
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]

        #prep_im_for_blob,this is preprocess data for train,the thing you must notice is the im is just one pic
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    # yeah,you konw,the blob is just one,im_list_to_blob,because the precessed_ims is just one
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 51
0
def _get_image_blob(im):
    """Converts an image into a network input.
    Arguments:
        im (ndarray): a color image in BGR order
    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
        im_shapes: the list of image shapes
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS
    im_shape = im_orig.shape
    im_size_max = np.max(im_shape[0:2])
    processed_ims = []
    im_scale_factors = []

    im = cv2.resize(im_orig, (417, 417), interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_orig.shape[:2])

    processed_ims.append(im_list_to_blob([im]))

    blob = processed_ims
    return blob, np.array(im_scale_factors)
Esempio n. 52
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    # cfg.TRAIN.SCALES=(600,)
    # cfg.PIXEL_MEANS=np.array([[[102.9801, 115.9465, 122.7717]]])
    # cfg.TRAIN.MAX_SIZE=1000
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 53
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        #print roidb[i]['image'],'\n'
        im = sio.imread(roidb[i]['image'])
        if len(im.shape) == 2 or im.shape[2] == 1:
            im = skimage.color.gray2rgb(im)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(0,
                                   high=len(cfg.TRAIN.SCALES),
                                   size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        assert im is not None, \
            'Failed to read image \'{}\''.format(roidb[i]['image'])
        # If NOT using opencv to read in images, uncomment following lines
        # if len(im.shape) == 2:
        #     im = im[:, :, np.newaxis]
        #     im = np.concatenate((im, im, im), axis=2)
        # # flip the channel, since the original one using cv2
        # # rgb -> bgr
        # im = im[:, :, ::-1]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                                   cfg.PIXEL_VARS,
                                                   [target_size],
                                                   cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale[0])
        processed_ims.append(im[0])

    # Create a blob to hold the input images [n, c, h, w]
    blob = blob_utils.im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 55
0
def _get_image_blob(roidb, scale_inds, images_subdir):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in xrange(num_images):
        #im = cv2.imread(roidb[i]['image'])
        default_subdir = roidb[i]['image'].split("/")[-2:-1][0]
        im = cv2.imread(roidb[i]['image'].replace(default_subdir,
                                                  images_subdir))
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Esempio n. 56
0
def _get_image_blob(roidb, scale_ind, num_classes, backgrounds,
                    intrinsic_matrix, db_inds_syn, is_syn):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    processed_ims_depth = []
    processed_ims_normal = []
    im_scales = []
    roidb_syn = []

    for i in xrange(num_images):

        if is_syn:
            # depth raw
            filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format(
                db_inds_syn[i])
            im_depth_raw = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED),
                                  16)

            # rgba
            filename = cfg.TRAIN.SYNROOT + '{:06d}-color.png'.format(
                db_inds_syn[i])
            rgba = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16)

            # sample a background image
            ind = np.random.randint(len(backgrounds), size=1)[0]
            filename = backgrounds[ind]
            background = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
            try:
                background = cv2.resize(background,
                                        (rgba.shape[1], rgba.shape[0]),
                                        interpolation=cv2.INTER_LINEAR)
            except:
                if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL':
                    background = np.zeros((rgba.shape[0], rgba.shape[1]),
                                          dtype=np.uint16)
                else:
                    background = np.zeros((rgba.shape[0], rgba.shape[1], 3),
                                          dtype=np.uint8)
                print 'bad background image'

            if cfg.INPUT != 'DEPTH' and cfg.INPUT != 'NORMAL' and len(
                    background.shape) != 3:
                background = np.zeros((rgba.shape[0], rgba.shape[1], 3),
                                      dtype=np.uint8)
                print 'bad background image'

            # add background
            im = np.copy(rgba[:, :, :3])
            alpha = rgba[:, :, 3]
            I = np.where(alpha == 0)
            if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL':
                im_depth_raw[I[0], I[1]] = background[I[0], I[1]] / 10
            else:
                im[I[0], I[1], :] = background[I[0], I[1], :3]
        else:
            # depth raw
            im_depth_raw = pad_im(
                cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16)

            # rgba
            rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED),
                          16)
            if rgba.shape[2] == 4:
                im = np.copy(rgba[:, :, :3])
                alpha = rgba[:, :, 3]
                I = np.where(alpha == 0)
                im[I[0], I[1], :] = 0
            else:
                im = rgba

        # chromatic transform
        if cfg.TRAIN.CHROMATIC:
            im = chromatic_transform(im)

        if cfg.TRAIN.ADD_NOISE:
            im = add_noise(im)

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scales.append(im_scale)
        processed_ims.append(im)

        # depth
        im_depth = im_depth_raw.astype(np.float32, copy=True) / float(
            im_depth_raw.max()) * 255
        im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3))

        if cfg.TRAIN.ADD_NOISE:
            im_depth = add_noise(im_depth)

        if roidb[i]['flipped']:
            im_depth = im_depth[:, ::-1]

        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_depth = cv2.resize(im_orig,
                              None,
                              None,
                              fx=im_scale,
                              fy=im_scale,
                              interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im_depth)

        # normals
        if cfg.INPUT == 'NORMAL':
            depth = im_depth_raw.astype(np.float32, copy=True) / 1000.0
            fx = intrinsic_matrix[0, 0] * im_scale
            fy = intrinsic_matrix[1, 1] * im_scale
            cx = intrinsic_matrix[0, 2] * im_scale
            cy = intrinsic_matrix[1, 2] * im_scale
            nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0,
                                           cfg.GPU_ID)
            im_normal = 127.5 * nmap + 127.5
            im_normal = im_normal.astype(np.uint8)
            im_normal = im_normal[:, :, (2, 1, 0)]
            im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75)
            if roidb[i]['flipped']:
                im_normal = im_normal[:, ::-1, :]

            im_orig = im_normal.astype(np.float32, copy=True)
            im_orig -= cfg.PIXEL_MEANS
            im_normal = cv2.resize(im_orig,
                                   None,
                                   None,
                                   fx=im_scale,
                                   fy=im_scale,
                                   interpolation=cv2.INTER_LINEAR)
            processed_ims_normal.append(im_normal)
            blob_normal = im_list_to_blob(processed_ims_normal, 3)
        else:
            blob_normal = []

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)

    return blob, blob_depth, blob_normal, im_scales
Esempio n. 57
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    im_crops = []
    im_shapes = []
    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_shapes.append(im.shape)

        if cfg.TRAIN.USE_DISTORTION:
            hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            s0 = npr.random() * (cfg.TRAIN.SATURATION - 1) + 1
            s1 = npr.random() * (cfg.TRAIN.EXPOSURE - 1) + 1
            s0 = s0 if npr.random() > 0.5 else 1.0 / s0
            s1 = s1 if npr.random() > 0.5 else 1.0 / s1
            hsv = np.array(hsv, dtype=np.float)
            hsv[:, :, 1] = np.minimum(s0 * hsv[:, :, 1], 255)
            hsv[:, :, 2] = np.minimum(s1 * hsv[:, :, 2], 255)
            hsv = np.array(hsv, dtype=np.uint8)
            im = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

        if cfg.TRAIN.USE_CROP:
            im_shape = np.array(im.shape)
            crop_dims = im_shape[:2] * cfg.TRAIN.CROP

            r0 = npr.random()
            r1 = npr.random()
            s = im_shape[:2] - crop_dims
            s[0] *= r0
            s[1] *= r1
            im_crop = np.array(
                [s[0], s[1], s[0] + crop_dims[0] - 1, s[1] + crop_dims[1] - 1],
                dtype=np.uint16)

            im = im[im_crop[0]:im_crop[2] + 1, im_crop[1]:im_crop[3] + 1, :]
        else:
            im_crop = np.array([0, 0, im.shape[0] - 1, im.shape[1] - 1],
                               dtype=np.uint16)

        if cfg.CSC_DEBUG:
            im_save = im

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)

        if cfg.CSC_DEBUG:
            im_save = cv2.resize(im_save,
                                 None,
                                 None,
                                 fx=im_scale,
                                 fy=im_scale,
                                 interpolation=cv2.INTER_LINEAR)
            cv2.imwrite('tmp/' + str(cfg.TRAIN.PASS_IM) + '_.png', im_save)
            cfg.TRAIN.PASS_IM = cfg.TRAIN.PASS_IM + 1

        im_scales.append(im_scale)
        im_crops.append(im_crop)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, im_crops, im_shapes
Esempio n. 58
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)
    processed_ims = []
    processed_mask = []
    processed_noise = []
    im_scales = []
    mask_shapes = []
    if cfg.USE_MASK is True:
        for i in range(num_images):
            im = cv2.imread(roidb[i]['image'])
            mask = cv2.imread(roidb[i]['mask'])
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            ret, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
            mask_shape = im.shape[0:2]
            mask = np.expand_dims(mask, 2)
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
                mask = mask[:, ::-1, :]
            if roidb[i]['noised']:
                row, col, ch = im.shape
                for bb in roidb[i]['boxes']:
                    bcol = bb[2] - bb[0]
                    brow = bb[3] - bb[1]
                    mean = 0
                    var = 5
                    sigma = var**0.5
                    gauss = np.random.normal(mean, sigma, (brow, bcol, ch))
                    gauss = gauss.reshape(brow, bcol, ch)
                    im = im.astype(np.float32, copy=False)
                    im[bb[1]:bb[3],
                       bb[0]:bb[2], :] = im[bb[1]:bb[3],
                                            bb[0]:bb[2], :] + gauss

            if roidb[i]['JPGed']:
                for bb in roidb[i]['boxes']:
                    cv2.imwrite('JPGed.jpg', im[bb[1]:bb[3], bb[0]:bb[2], :],
                                [cv2.IMWRITE_JPEG_QUALITY, 70])
                    bb_jpged = cv2.imread('JPGed.jpg')
                    im[bb[1]:bb[3], bb[0]:bb[2], :] = bb_jpged

            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale, mask = prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                                  target_size,
                                                  cfg.TRAIN.MAX_SIZE, mask)
            mask = np.expand_dims(mask, 2)
            im_scales.append(im_scale)
            mask_shapes.append(mask_shape)
            processed_ims.append(im)
            processed_mask.append(mask)
            noise, im_scale = prep_noise_for_blob(im, cfg.PIXEL_MEANS,
                                                  target_size,
                                                  cfg.TRAIN.MAX_SIZE)
            processed_noise.append(noise)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)
        noise_blob = im_list_to_blob(processed_noise)
        mask_blob = mask_list_to_blob(processed_mask)
        return blob, noise_blob, im_scales, mask_blob, mask_shapes
    else:
        for i in range(num_images):
            im = cv2.imread(roidb[i]['image'])
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]

            if roidb[i]['noised']:
                row, col, ch = im.shape
                for bb in roidb[i]['boxes']:
                    bcol = bb[2] - bb[0]
                    brow = bb[3] - bb[1]
                    mean = 0
                    var = 5
                    sigma = var**0.5
                    gauss = np.random.normal(mean, sigma, (brow, bcol, ch))
                    gauss = gauss.reshape(brow, bcol, ch)
                    im = im.astype(np.float32, copy=False)
                    im[bb[1]:bb[3],
                       bb[0]:bb[2], :] = im[bb[1]:bb[3],
                                            bb[0]:bb[2], :] + gauss

            if roidb[i]['JPGed']:
                for bb in roidb[i]['boxes']:
                    cv2.imwrite('JPGed.jpg', im[bb[1]:bb[3], bb[0]:bb[2], :],
                                [cv2.IMWRITE_JPEG_QUALITY, 70])
                    bb_jpged = cv2.imread('JPGed.jpg')
                    im[bb[1]:bb[3], bb[0]:bb[2], :] = bb_jpged

            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale, _ = prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                               target_size, cfg.TRAIN.MAX_SIZE)
            # print(mask.shape)

            im_scales.append(im_scale)

            processed_ims.append(im)

            noise, im_scale = prep_noise_for_blob(im, cfg.PIXEL_MEANS,
                                                  target_size,
                                                  cfg.TRAIN.MAX_SIZE)
            processed_noise.append(noise)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)
        noise_blob = im_list_to_blob(processed_noise)

        return blob, noise_blob, im_scales
Esempio n. 59
0
    def get_image_blob(self, im, im_depth, meta_data):
        """Converts an image into a network input.

        Arguments:
            im (ndarray): a color image in BGR order

        Returns:
            blob (ndarray): a data blob holding an image pyramid
            im_scale_factors (list): list of image scales (relative to im) used
               in the image pyramid
        """

        # RGB
        im_orig = im.astype(np.float32, copy=True)
        # mask the color image according to depth
        if self.cfg.EXP_DIR == 'rgbd_scene':
            I = np.where(im_depth == 0)
            im_orig[I[0], I[1], :] = 0

        processed_ims_rescale = []
        im_scale = self.cfg.TEST.SCALES_BASE[0]
        im_rescale = cv2.resize(im_orig / 127.5 - 1,
                                None,
                                None,
                                fx=im_scale,
                                fy=im_scale,
                                interpolation=cv2.INTER_LINEAR)
        processed_ims_rescale.append(im_rescale)

        im_orig -= self.cfg.PIXEL_MEANS
        processed_ims = []
        im_scale_factors = []
        assert len(self.cfg.TEST.SCALES_BASE) == 1

        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

        # depth
        im_orig = im_depth.astype(np.float32, copy=True)
        # im_orig = im_orig / im_orig.max() * 255
        im_orig = np.clip(im_orig / 2000.0, 0, 1) * 255
        im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3))
        im_orig -= self.cfg.PIXEL_MEANS

        processed_ims_depth = []
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im)

        if cfg.INPUT == 'NORMAL':
            # meta data
            K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True)
            fx = K[0, 0]
            fy = K[1, 1]
            cx = K[0, 2]
            cy = K[1, 2]

            # normals
            depth = im_depth.astype(np.float32, copy=True) / float(
                meta_data['factor_depth'])
            nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0,
                                           cfg.GPU_ID)
            im_normal = 127.5 * nmap + 127.5
            im_normal = im_normal.astype(np.uint8)
            im_normal = im_normal[:, :, (2, 1, 0)]
            im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75)

            processed_ims_normal = []
            im_orig = im_normal.astype(np.float32, copy=True)
            im_orig -= cfg.PIXEL_MEANS
            im_normal = cv2.resize(im_orig,
                                   None,
                                   None,
                                   fx=im_scale,
                                   fy=im_scale,
                                   interpolation=cv2.INTER_LINEAR)
            processed_ims_normal.append(im_normal)
            blob_normal = im_list_to_blob(processed_ims_normal, 3)
        else:
            blob_normal = []

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims, 3)
        blob_rescale = im_list_to_blob(processed_ims_rescale, 3)
        blob_depth = im_list_to_blob(processed_ims_depth, 3)

        return blob, blob_rescale, blob_depth, blob_normal, np.array(
            im_scale_factors)
Esempio n. 60
0
	def _get_image_blob(self, im, im_dim):
                """Converts an image into a network input.
                
                Arguments:
                    im (ndarray): a color image in BGR order
                
                Returns:
                    blob (ndarray): a data blob holding an image pyramid
                    im_scale_factors (list): list of image scales (relative to im) used
                        in the image pyramid
                """
                im = im.astype(np.float32, copy=True)
                
                print('Image im.shape = {}'.format(im.shape))

                im = im - cfg.PIXEL_MEANS
                im /= cfg.PIXEL_STDS
				
                print('\nAfter substract mean')
                #print(im[0:10,0:10,0])
                #print(im[0:10,0:10,1])
                #print(im[0:10,0:10,2])
                
                im_shape = im.shape
                im_size_min = np.min(im_shape[0:2])
                im_size_max = np.max(im_shape[0:2])
                print("im_size_min: %d, im_size_max: %d\n" %(im_size_min,im_size_max))
                processed_ims = []
                im_scale_factors = []
               
		print('cfg.TEST.SCALES = {},cfg.TEST.MAX_SIZE = {}, im_dim[1] = {}'.format(cfg.TEST.SCALES,cfg.TEST.MAX_SIZE,im_dim[1]))
		TEST_MAX_SIZE = im_dim[1]

                for target_size in cfg.TEST.SCALES:
                    im_scale = float(target_size) / float(im_size_min)
                    # Prevent the biggest axis from being more than MAX_SIZE
                    if np.round(im_scale * im_size_max) > TEST_MAX_SIZE:
                        im_scale = float(TEST_MAX_SIZE) / float(im_size_max)
                    multiple = cfg.TEST.SCALE_MULTIPLE_OF
                    if multiple > 1:
                        im_scale_x = np.floor(im.shape[1] * im_scale / multiple) * multiple / im.shape[1]
                        im_scale_y = np.floor(im.shape[0] * im_scale / multiple) * multiple / im.shape[0]
                        print('im_scale = {}, multiple = {}'.format(im_scale,multiple))
                        print('im.shape[0] = {}, im.shape[1] = {}'.format(im.shape[0],im.shape[1]))
                        print('im_scale_x = {}, im_scale_y = {}\n'.format(im_scale_x,im_scale_y))
                        im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR)
                
                    else:
                        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
                        print('im_scale = {}\n'.format(im_scale))
                    im_scale_factors.append(im_scale)
                    processed_ims.append(im)
                
                #print('\nAfter resize')
                #print(im[0:10,0:10,0])
                #print(im[0:10,0:10,1])
                #print(im[0:10,0:10,2])
                
                # Create a blob to hold the input images
                
                blob = im_list_to_blob(processed_ims)
                
                blob = blob.astype(np.int32, copy=True)
                blob = blob.astype(np.float32, copy=True)

                self.build_json("blob_pre_clip",blob)

                blob = np.clip(blob,-128,127)

                self.build_json("blob_post_clip",blob)
                
                #print(blob[0,0:10,0:10,0])
                #print(blob[0,0:10,0:10,1])
                #print(blob[0,0:10,0:10,2])
                #np.savetxt('a0_processed_ims0.csv',blob[0,:,:,0],fmt='%d,')
                #np.savetxt('a0_processed_ims1.csv',blob[0,:,:,1],fmt='%d,')
                #np.savetxt('a0_processed_ims2.csv',blob[0,:,:,2],fmt='%d,')
                print('pvanet_8bit_ob_roip8 blob.shape = {}'.format(blob.shape))
                print('pvanet_8bit_ob_roip8 im.shape = {}'.format(im.shape))
                
                return blob, np.array(im_scale_factors)