Esempio n. 1
0
    def get_image_blob(self, im, im_depth, meta_data):
        """Converts an image into a network input.

        Arguments:
            im (ndarray): a color image in BGR order

        Returns:
            blob (ndarray): a data blob holding an image pyramid
            im_scale_factors (list): list of image scales (relative to im) used
               in the image pyramid
        """

        # RGB
        im_orig = im.astype(np.float32, copy=True)
        # mask the color image according to depth
        if self.cfg.EXP_DIR == 'rgbd_scene':
            I = np.where(im_depth == 0)
            im_orig[I[0], I[1], :] = 0

        processed_ims_rescale = []
        im_scale = self.cfg.TEST.SCALES_BASE[0]
        im_rescale = cv2.resize(im_orig / 127.5 - 1,
                                None,
                                None,
                                fx=im_scale,
                                fy=im_scale,
                                interpolation=cv2.INTER_LINEAR)
        processed_ims_rescale.append(im_rescale)

        im_orig -= self.cfg.PIXEL_MEANS
        processed_ims = []
        im_scale_factors = []
        assert len(self.cfg.TEST.SCALES_BASE) == 1

        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

        # depth
        im_orig = im_depth.astype(np.float32, copy=True)
        # im_orig = im_orig / im_orig.max() * 255
        im_orig = np.clip(im_orig / 2000.0, 0, 1) * 255
        im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3))
        im_orig -= self.cfg.PIXEL_MEANS

        processed_ims_depth = []
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im)

        if cfg.INPUT == 'NORMAL':
            # meta data
            K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True)
            fx = K[0, 0]
            fy = K[1, 1]
            cx = K[0, 2]
            cy = K[1, 2]

            # normals
            depth = im_depth.astype(np.float32, copy=True) / float(
                meta_data['factor_depth'])
            nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0,
                                           cfg.GPU_ID)
            im_normal = 127.5 * nmap + 127.5
            im_normal = im_normal.astype(np.uint8)
            im_normal = im_normal[:, :, (2, 1, 0)]
            im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75)

            processed_ims_normal = []
            im_orig = im_normal.astype(np.float32, copy=True)
            im_orig -= cfg.PIXEL_MEANS
            im_normal = cv2.resize(im_orig,
                                   None,
                                   None,
                                   fx=im_scale,
                                   fy=im_scale,
                                   interpolation=cv2.INTER_LINEAR)
            processed_ims_normal.append(im_normal)
            blob_normal = im_list_to_blob(processed_ims_normal, 3)
        else:
            blob_normal = []

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims, 3)
        blob_rescale = im_list_to_blob(processed_ims_rescale, 3)
        blob_depth = im_list_to_blob(processed_ims_depth, 3)

        return blob, blob_rescale, blob_depth, blob_normal, np.array(
            im_scale_factors)
Esempio n. 2
0
def _get_image_blob(roidb, scale_ind, num_classes, backgrounds,
                    intrinsic_matrix, db_inds_syn, is_syn):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    processed_ims_depth = []
    processed_ims_normal = []
    im_scales = []
    roidb_syn = []

    for i in xrange(num_images):

        if is_syn:
            # depth raw
            filename = cfg.TRAIN.SYNROOT + '{:06d}-depth.png'.format(
                db_inds_syn[i])
            im_depth_raw = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED),
                                  16)

            # rgba
            filename = cfg.TRAIN.SYNROOT + '{:06d}-color.png'.format(
                db_inds_syn[i])
            rgba = pad_im(cv2.imread(filename, cv2.IMREAD_UNCHANGED), 16)

            # sample a background image
            ind = np.random.randint(len(backgrounds), size=1)[0]
            filename = backgrounds[ind]
            background = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
            try:
                background = cv2.resize(background,
                                        (rgba.shape[1], rgba.shape[0]),
                                        interpolation=cv2.INTER_LINEAR)
            except:
                if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL':
                    background = np.zeros((rgba.shape[0], rgba.shape[1]),
                                          dtype=np.uint16)
                else:
                    background = np.zeros((rgba.shape[0], rgba.shape[1], 3),
                                          dtype=np.uint8)
                print 'bad background image'

            if cfg.INPUT != 'DEPTH' and cfg.INPUT != 'NORMAL' and len(
                    background.shape) != 3:
                background = np.zeros((rgba.shape[0], rgba.shape[1], 3),
                                      dtype=np.uint8)
                print 'bad background image'

            # add background
            im = np.copy(rgba[:, :, :3])
            alpha = rgba[:, :, 3]
            I = np.where(alpha == 0)
            if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'NORMAL':
                im_depth_raw[I[0], I[1]] = background[I[0], I[1]] / 10
            else:
                im[I[0], I[1], :] = background[I[0], I[1], :3]
        else:
            # depth raw
            im_depth_raw = pad_im(
                cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16)

            # rgba
            rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED),
                          16)
            if rgba.shape[2] == 4:
                im = np.copy(rgba[:, :, :3])
                alpha = rgba[:, :, 3]
                I = np.where(alpha == 0)
                im[I[0], I[1], :] = 0
            else:
                im = rgba

        # chromatic transform
        if cfg.TRAIN.CHROMATIC:
            im = chromatic_transform(im)

        if cfg.TRAIN.ADD_NOISE:
            im = add_noise(im)

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scales.append(im_scale)
        processed_ims.append(im)

        # depth
        im_depth = im_depth_raw.astype(np.float32, copy=True) / float(
            im_depth_raw.max()) * 255
        im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3))

        if cfg.TRAIN.ADD_NOISE:
            im_depth = add_noise(im_depth)

        if roidb[i]['flipped']:
            im_depth = im_depth[:, ::-1]

        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_depth = cv2.resize(im_orig,
                              None,
                              None,
                              fx=im_scale,
                              fy=im_scale,
                              interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im_depth)

        # normals
        if cfg.INPUT == 'NORMAL':
            depth = im_depth_raw.astype(np.float32, copy=True) / 1000.0
            fx = intrinsic_matrix[0, 0] * im_scale
            fy = intrinsic_matrix[1, 1] * im_scale
            cx = intrinsic_matrix[0, 2] * im_scale
            cy = intrinsic_matrix[1, 2] * im_scale
            nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0,
                                           cfg.GPU_ID)
            im_normal = 127.5 * nmap + 127.5
            im_normal = im_normal.astype(np.uint8)
            im_normal = im_normal[:, :, (2, 1, 0)]
            im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75)
            if roidb[i]['flipped']:
                im_normal = im_normal[:, ::-1, :]

            im_orig = im_normal.astype(np.float32, copy=True)
            im_orig -= cfg.PIXEL_MEANS
            im_normal = cv2.resize(im_orig,
                                   None,
                                   None,
                                   fx=im_scale,
                                   fy=im_scale,
                                   interpolation=cv2.INTER_LINEAR)
            processed_ims_normal.append(im_normal)
            blob_normal = im_list_to_blob(processed_ims_normal, 3)
        else:
            blob_normal = []

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)

    return blob, blob_depth, blob_normal, im_scales
Esempio n. 3
0
def get_image_blob(im, im_depth, meta_data, cfg):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """

    # RGB
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS
    processed_ims = []
    im_scale_factors = []
    assert len(cfg.TEST.SCALES_BASE) == 1
    im_scale = cfg.TEST.SCALES_BASE[0]

    im = cv2.resize(im_orig,
                    None,
                    None,
                    fx=im_scale,
                    fy=im_scale,
                    interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale)
    processed_ims.append(im)
    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    height = processed_ims[0].shape[0]
    width = processed_ims[0].shape[1]

    # depth
    if im_depth is not None:
        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig = im_orig / im_orig.max() * 255
        im_orig = np.tile(im_orig[:, :, np.newaxis], (1, 1, 3))
        im_orig -= cfg.PIXEL_MEANS

        processed_ims_depth = []
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im)
        blob_depth = im_list_to_blob(processed_ims_depth, 3)
    else:
        blob_depth = None

    if cfg.INPUT == 'NORMAL':
        # meta data
        K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True)
        fx = K[0, 0]
        fy = K[1, 1]
        cx = K[0, 2]
        cy = K[1, 2]

        # normals
        depth = im_depth.astype(np.float32, copy=True) / float(
            meta_data['factor_depth'])
        nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID)
        im_normal = 127.5 * nmap + 127.5
        im_normal = im_normal.astype(np.uint8)
        im_normal = im_normal[:, :, (2, 1, 0)]
        im_normal = cv2.bilateralFilter(im_normal, 9, 75, 75)

        processed_ims_normal = []
        im_orig = im_normal.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_normal = cv2.resize(im_orig,
                               None,
                               None,
                               fx=im_scale,
                               fy=im_scale,
                               interpolation=cv2.INTER_LINEAR)
        processed_ims_normal.append(im_normal)
        # Create a blob to hold the input images
        blob_normal = im_list_to_blob(processed_ims_normal, 3)
    else:
        blob_normal = []

    return blob, blob_depth, blob_normal, np.array(
        im_scale_factors), height, width
Esempio n. 4
0
def _get_image_blob(roidb, scale_ind):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    processed_ims_depth = []
    processed_ims_normal = []
    im_scales = []
    if cfg.TRAIN.GAN:
        processed_ims_rescale = []

    for i in range(num_images):
        # meta data
        meta_data = scipy.io.loadmat(roidb[i]['meta_data'])
        K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True)
        fx = K[0, 0]
        fy = K[1, 1]
        cx = K[0, 2]
        cy = K[1, 2]

        # depth raw
        im_depth_raw = pad_im(
            cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16)
        height = im_depth_raw.shape[0]
        width = im_depth_raw.shape[1]

        # rgba
        rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16)
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:, :, :3])
            alpha = rgba[:, :, 3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        # chromatic transform
        if cfg.TRAIN.CHROMATIC:
            label = pad_im(cv2.imread(roidb[i]['label'], cv2.IMREAD_UNCHANGED),
                           16)
            im = chromatic_transform(im, label)

        # mask the color image according to depth
        if cfg.EXP_DIR == 'rgbd_scene':
            I = np.where(im_depth_raw == 0)
            im[I[0], I[1], :] = 0

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        if cfg.TRAIN.GAN:
            im_orig = im.astype(np.float32, copy=True) / 127.5 - 1
            im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
            im_rescale = cv2.resize(im_orig,
                                    None,
                                    None,
                                    fx=im_scale,
                                    fy=im_scale,
                                    interpolation=cv2.INTER_LINEAR)
            processed_ims_rescale.append(im_rescale)

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
        im = cv2.resize(im_orig,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scales.append(im_scale)
        processed_ims.append(im)

        # depth
        im_depth = im_depth_raw.astype(np.float32, copy=True) / float(
            im_depth_raw.max()) * 255
        im_depth = np.tile(im_depth[:, :, np.newaxis], (1, 1, 3))

        if roidb[i]['flipped']:
            im_depth = im_depth[:, ::-1]

        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_depth = cv2.resize(im_orig,
                              None,
                              None,
                              fx=im_scale,
                              fy=im_scale,
                              interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im_depth)

        # normals
        depth = im_depth_raw.astype(np.float32, copy=True) / float(
            meta_data['factor_depth'])
        nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID)
        im_normal = 127.5 * nmap + 127.5
        im_normal = im_normal.astype(np.uint8)
        im_normal = im_normal[:, :, (2, 1, 0)]
        if roidb[i]['flipped']:
            im_normal = im_normal[:, ::-1, :]

        im_orig = im_normal.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_normal = cv2.resize(im_orig,
                               None,
                               None,
                               fx=im_scale,
                               fy=im_scale,
                               interpolation=cv2.INTER_LINEAR)
        processed_ims_normal.append(im_normal)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)
    blob_normal = im_list_to_blob(processed_ims_normal, 3)
    if cfg.TRAIN.GAN:
        blob_rescale = im_list_to_blob(processed_ims_rescale, 3)
    else:
        blob_rescale = []

    return blob, blob_rescale, blob_depth, blob_normal, im_scales
Esempio n. 5
0
def _get_image_blob(roidb, scale_ind):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    processed_ims = []
    processed_ims_depth = []
    processed_ims_normal = []
    im_scales = []
    for i in xrange(num_images):
        # meta data
        meta_data = scipy.io.loadmat(roidb[i]['meta_data'])
        K = meta_data['intrinsic_matrix'].astype(np.float32, copy=True)
        fx = K[0, 0]
        fy = K[1, 1]
        cx = K[0, 2]
        cy = K[1, 2]

        # depth raw
        im_depth_raw = pad_im(cv2.imread(roidb[i]['depth'], cv2.IMREAD_UNCHANGED), 16)
        height = im_depth_raw.shape[0]
        width = im_depth_raw.shape[1]

        # rgba
        rgba = pad_im(cv2.imread(roidb[i]['image'], cv2.IMREAD_UNCHANGED), 16)
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:,:,:3])
            alpha = rgba[:,:,3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 255
        else:
            im = rgba

        # chromatic transform
        if cfg.TRAIN.CHROMATIC:
            im = chromatic_transform(im)

        # mask the color image according to depth
        if cfg.EXP_DIR == 'rgbd_scene':
            I = np.where(im_depth_raw == 0)
            im[I[0], I[1], :] = 0

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_scale = cfg.TRAIN.SCALES_BASE[scale_ind]
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        im_scales.append(im_scale)
        processed_ims.append(im)

        # depth
        im_depth = im_depth_raw.astype(np.float32, copy=True) / float(im_depth_raw.max()) * 255
        im_depth = np.tile(im_depth[:,:,np.newaxis], (1,1,3))

        if roidb[i]['flipped']:
            im_depth = im_depth[:, ::-1]

        im_orig = im_depth.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_depth = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        processed_ims_depth.append(im_depth)

        # normals
        depth = im_depth_raw.astype(np.float32, copy=True) / float(meta_data['factor_depth'])
        nmap = gpu_normals.gpu_normals(depth, fx, fy, cx, cy, 20.0, cfg.GPU_ID)
        im_normal = 127.5 * nmap + 127.5
        im_normal = im_normal.astype(np.uint8)
        im_normal = im_normal[:, :, (2, 1, 0)]
        if roidb[i]['flipped']:
            im_normal = im_normal[:, ::-1, :]

        im_orig = im_normal.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        im_normal = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
        processed_ims_normal.append(im_normal)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, 3)
    blob_depth = im_list_to_blob(processed_ims_depth, 3)
    blob_normal = im_list_to_blob(processed_ims_normal, 3)

    return blob, blob_depth, blob_normal, im_scales