Ejemplo n.º 1
0
    def alpha_compose(self, src_ctx, dst_ctx):
        # Assume src_ctx has fields: bg_image, box
        # Assume dst_ctx is from val_imdb
        src_img   = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR)
        dst_img   = cv2.imread(dst_ctx['image'],    cv2.IMREAD_COLOR)
        dst_alpha = cv2.imread(dst_ctx['alpha'],    cv2.IMREAD_GRAYSCALE)

        src_xyxy  = src_ctx['box'];   dst_xyxy = dst_ctx['box']
        src_width = src_img.shape[1]; src_height = src_img.shape[0]
        dst_width = dst_img.shape[1]; dst_height = dst_img.shape[0]

        # resize the target image to align the heights of the bboxes
        factor = float(src_xyxy[3] - src_xyxy[1] + 1)/float(dst_xyxy[3] - dst_xyxy[1] + 1)
        dst_width = int(dst_width  * factor); dst_height = int(dst_height * factor)
        dst_img   = cv2.resize(dst_img, (dst_width, dst_height))
        dst_alpha = cv2.resize(dst_alpha, (dst_width, dst_height))
        dst_alpha = dst_alpha.astype(np.float)/255.0
        dst_xyxy  = factor * dst_xyxy
        src_xywh = ds_utils.xyxy_to_xywh(src_xyxy.reshape((1,4))).squeeze()
        dst_xywh = ds_utils.xyxy_to_xywh(dst_xyxy.reshape((1,4))).squeeze()

        # anchors that should match (the standing points)
        src_anchor = src_xywh[:2]; dst_anchor = dst_xywh[:2]
        offset = (src_anchor - dst_anchor).astype(np.int)

        # dilate the target patch a bit to include the blending region
        dst_bb = ds_utils.expand_xyxy(dst_xyxy.reshape((1,4)), dst_width, dst_height, ratio=0.2).squeeze().astype(np.int)
        
        src_bb = dst_bb.copy()
        src_bb[:2] = dst_bb[:2] + offset
        src_bb[2:] = dst_bb[2:] + offset

        # in case the bbox of the target object is beyond the boundaries of the source image
        if src_bb[0] < 0:
            dst_bb[0] -= src_bb[0]; src_bb[0] = 0
        if src_bb[1] < 0:
            dst_bb[1] -= src_bb[1]; src_bb[1] = 0
        if src_bb[2] > src_width - 1:
            dst_bb[2] -= src_bb[2] - src_width + 1; src_bb[2] = src_width - 1
        if src_bb[3] > src_height - 1:
            dst_bb[3] -= src_bb[3] - src_height + 1; src_bb[3] = src_height - 1


        output_mask  = np.zeros((src_height, src_width), dtype=np.float)
        output_image = src_img.copy()

        alpha_patch = dst_alpha[dst_bb[1]:(dst_bb[3]+1), dst_bb[0]:(dst_bb[2]+1)]
        src_patch   = src_img[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:]
        dst_patch   = dst_img[dst_bb[1]:(dst_bb[3]+1), dst_bb[0]:(dst_bb[2]+1),:]

        output_mask[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1)] = alpha_patch
        output_image[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:] = \
            np.expand_dims(1.0 - alpha_patch, axis=-1) * src_patch + \
            np.expand_dims(alpha_patch, axis=-1) * dst_patch

        # cv2.rectangle(output_image, (src_xyxy[0], src_xyxy[1]), (src_xyxy[2], src_xyxy[3]), \
        #                         (255, 0, 0), 1)

        return output_image.astype(np.uint8), output_mask
Ejemplo n.º 2
0
    def build_search_tree(self, ctxdb, mode=0):
        num_samples = len(ctxdb)

        if mode == 0:
            # hybrid mode
            X = np.zeros((num_samples, 4 + 2 * cfg.FEAT_DIMS[-1]))
        else:
            X = np.zeros((num_samples, 4 + cfg.FEAT_DIMS[-1]))

        for i in range(num_samples):
            ctx = ctxdb[i]
            box = ctx['box'].copy()
            xywh = ds_utils.xyxy_to_xywh(box.reshape(
                (1, 4))).squeeze().astype(np.float)

            with open(ctx['crop_feat'], 'rb') as fid:
                crop_feat = cPickle.load(fid).flatten()

            with open(ctx['full_feat'], 'rb') as fid:
                full_feat = cPickle.load(fid).flatten()

            if mode == 0:
                X[i, :] = np.concatenate((xywh, crop_feat, full_feat))
            elif mode == 1:
                X[i, :] = np.concatenate((xywh, crop_feat))
            else:
                X[i, :] = np.concatenate((xywh, full_feat))

        # if mode == 0:
        #     return BallTree(X, leaf_size=30, metric=cus_distance_hybrid)
        # else:
        #     return BallTree(X, leaf_size=30, metric=cus_distance)
        return BallTree(X, leaf_size=30, metric=cus_distance)
Ejemplo n.º 3
0
    def build_search_tree(self, ctxdb, mode = 0):
        num_samples = len(ctxdb)

        if mode == 0:
            # hybrid mode
            X = np.zeros((num_samples, 4 + 2 * cfg.FEAT_DIMS[-1]))
        else:
            X = np.zeros((num_samples, 4 + cfg.FEAT_DIMS[-1]))

        for i in range(num_samples):
            ctx = ctxdb[i]
            box = ctx['box'].copy()
            xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze().astype(np.float)

            with open(ctx['crop_feat'], 'rb') as fid:
                crop_feat = cPickle.load(fid).flatten()

            with open(ctx['full_feat'], 'rb') as fid:
                full_feat = cPickle.load(fid).flatten()

            if mode == 0:
                X[i, :] = np.concatenate((xywh, crop_feat, full_feat))
            elif mode == 1:
                X[i, :] = np.concatenate((xywh, crop_feat))
            else:
                X[i, :] = np.concatenate((xywh, full_feat))

        # if mode == 0:
        #     return BallTree(X, leaf_size=30, metric=cus_distance_hybrid)
        # else:
        #     return BallTree(X, leaf_size=30, metric=cus_distance)
        return BallTree(X, leaf_size=30, metric=cus_distance)
Ejemplo n.º 4
0
    def inference_ctx(self, ctx, mode, ctx_tree, K):
        # Assume ctx has fields: bg_image, box
        full_resolution = cfg.RETRIEVAL_RESOLUTION
        crop_resolution = [
            full_resolution[0] / 2, full_resolution[1] / 2, full_resolution[2]
        ]

        box = ctx['box'].copy().astype(np.int)
        img = cv2.imread(ctx['bg_image'], cv2.IMREAD_COLOR)
        # if ctx.get('flipped', False):
        #     img = cv2.flip(img, 1)

        full_img = cv2.resize(img, (full_resolution[1], full_resolution[0]))
        full_img = np.expand_dims(full_img, axis=0) - cfg.PIXEL_MEANS.reshape(
            (1, 1, 1, 3))
        full_feat = self.model.predict(full_img).flatten()

        # img[box[1]:(box[3] + 1), box[0]:(box[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3))
        crop_img = ds_utils.crop_and_resize(img, box.astype(np.float),
                                            full_resolution, crop_resolution)
        crop_img = np.expand_dims(crop_img, axis=0) - cfg.PIXEL_MEANS.reshape(
            (1, 1, 1, 3))
        crop_feat = self.model.predict(crop_img).flatten()

        xywh = ds_utils.xyxy_to_xywh(box.reshape(
            (1, 4))).squeeze().astype(np.float)

        if mode == 0:
            feat = np.concatenate((xywh, crop_feat, full_feat))
        elif mode == 1:
            feat = np.concatenate((xywh, crop_feat))
        else:
            feat = np.concatenate((xywh, full_feat))

        return self.inference_feature(feat, ctx_tree, K)
Ejemplo n.º 5
0
    def inference_ctx(self, ctx, mode, ctx_tree, K):
        # Assume ctx has fields: bg_image, box
        full_resolution = cfg.RETRIEVAL_RESOLUTION
        crop_resolution = [full_resolution[0]/2, full_resolution[1]/2, full_resolution[2]]

        box = ctx['box'].copy().astype(np.int)
        img = cv2.imread(ctx['bg_image'], cv2.IMREAD_COLOR)
        # if ctx.get('flipped', False):
        #     img = cv2.flip(img, 1)

        full_img  = cv2.resize(img, (full_resolution[1], full_resolution[0]))
        full_img  = np.expand_dims(full_img, axis=0) - cfg.PIXEL_MEANS.reshape((1,1,1,3))
        full_feat = self.model.predict(full_img).flatten()
            
        # img[box[1]:(box[3] + 1), box[0]:(box[2] + 1), :] = cfg.PIXEL_MEANS.reshape((1,1,3))
        crop_img = ds_utils.crop_and_resize(img, box.astype(np.float), full_resolution, crop_resolution)
        crop_img = np.expand_dims(crop_img, axis=0) - cfg.PIXEL_MEANS.reshape((1,1,1,3))
        crop_feat = self.model.predict(crop_img).flatten()

        xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze().astype(np.float)

        if mode == 0:
            feat = np.concatenate((xywh, crop_feat, full_feat))
        elif mode == 1:
            feat = np.concatenate((xywh, crop_feat))
        else:
            feat = np.concatenate((xywh, full_feat))

        return self.inference_feature(feat, ctx_tree, K)
Ejemplo n.º 6
0
    def draw_position_histogram(self, ctxdb=None):

        resolution = [15, 15]

        bins = np.zeros((resolution[0], resolution[1]))

        X = []
        Y = []

        if ctxdb == None:
            ctxdb = self.objdb

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy = np.array(entry['box']).copy()
            width = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox
            xyxy[1] += oy
            xyxy[2] += ox
            xyxy[3] += oy

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)
            scaled_xy = np.ceil(xywh[:2] * resolution[0])
            scaled_xy = np.maximum(0, scaled_xy - 1).astype(np.int)

            bins[scaled_xy[1], scaled_xy[0]] += 1.0
            X = X + [xywh[0]]
            Y = Y + [1.0 - xywh[1]]

            if i % 1000 == 0:
                print i

        plt.switch_backend('agg')
        fig = plt.figure()

        plt.hist2d(X, Y, 15, range=[[0.0, 1.0], [0.0, 1.0]])
        plt.colorbar()
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        # plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j]))
        plt.grid(True)

        fig.savefig('gt_pos_hist.jpg', bbox_inches='tight')
        plt.close(fig)

        print 'Done'
Ejemplo n.º 7
0
    def draw_position_histogram(self, ctxdb=None):

        resolution = [15, 15]

        bins = np.zeros((resolution[0], resolution[1]))

        X = []
        Y = []

        if ctxdb == None:
            ctxdb = self.objdb

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry  = ctxdb[i]
            xyxy   = np.array(entry['box']).copy()
            width  = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox; xyxy[1] += oy;
            xyxy[2] += ox; xyxy[3] += oy;

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)
            scaled_xy = np.ceil(xywh[:2] * resolution[0])
            scaled_xy = np.maximum(0, scaled_xy-1).astype(np.int)

            bins[scaled_xy[1], scaled_xy[0]] += 1.0
            X = X + [xywh[0]]
            Y = Y + [1.0 - xywh[1]]

            if i%1000 == 0:
                print i


        plt.switch_backend('agg')
        fig = plt.figure()

        plt.hist2d(X, Y, 15, range=[[0.0, 1.0], [0.0, 1.0]])
        plt.colorbar()
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        # plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j]))
        plt.grid(True)

        fig.savefig('gt_pos_hist.jpg', bbox_inches='tight')
        plt.close(fig)

        print 'Done'
Ejemplo n.º 8
0
def _filter_crowd_proposals(roidb, crowd_thresh):
    """
    Finds proposals that are inside crowd regions and marks them with
    overlap = -1 (for all gt rois), which means they will be excluded from
    training.
    """
    for ix, entry in enumerate(roidb):
        overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(overlaps.max(axis=1) == -1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        iscrowd = [int(True) for _ in xrange(len(crowd_inds))]
        crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        overlaps[non_gt_inds[bad_inds], :] = -1
        roidb[ix]['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps)
    return roidb
Ejemplo n.º 9
0
    def get_minibatch(self, square=True):
        # outputs: resized images, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        grid_shape = cfg.GRID_SHAPE
        resolution = cfg.RESOLUTION

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images  = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]
            im_path = obj['background']
            width = obj['width']
            height = obj['height']
            box = obj['box'].copy()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
            xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze()

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y
                width = height = img.shape[0]

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width,
                                            height).squeeze()
            # discreted output positions
            grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)),
                                             grid_shape).squeeze()
            # images of the same shape
            images[i, :, :, :] = cv2.resize(img,
                                            (resolution[1], resolution[0]))
            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, boxes, grids
Ejemplo n.º 10
0
def _filter_crowd_proposals(roidb, crowd_thresh):
    """
    Finds proposals that are inside crowd regions and marks them with
    overlap = -1 (for all gt rois), which means they will be excluded from
    training.
    """
    for ix, entry in enumerate(roidb):
        overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(overlaps.max(axis=1) == -1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        iscrowd = [int(True) for _ in xrange(len(crowd_inds))]
        crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        overlaps[non_gt_inds[bad_inds], :] = -1
        roidb[ix]['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps)
    return roidb
Ejemplo n.º 11
0
    def get_minibatch(self, square=True):
        # outputs: resized images, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        grid_shape = cfg.GRID_SHAPE
        resolution = cfg.RESOLUTION

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images  = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]
            im_path = obj['background']
            width   = obj['width']
            height  = obj['height']
            box     = obj['box'].copy()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
            xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze()

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y
                width = height = img.shape[0]

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze()
            # discreted output positions
            grid  = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze()
            # images of the same shape
            images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0]))
            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, boxes, grids
Ejemplo n.º 12
0
    def draw_heatmap(self, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        scale = 15

        areas = np.zeros((scale, ))
        count = cfg.EPS * np.ones((scale, ))

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy = np.array(entry['box']).copy()
            width = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox
            xyxy[1] += oy
            xyxy[2] += ox
            xyxy[3] += oy

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)

            area = xywh[2] * xywh[3]

            scaled_xy = np.ceil(xywh[:2] * scale)
            scaled_xy = np.maximum(0, scaled_xy - 1).astype(np.int)

            areas[scaled_xy[1]] += area
            count[scaled_xy[1]] += 1.0

            if i % 1000 == 0:
                print i

        areas = np.divide(areas, count)

        heatmap = self.areas_to_heatmap(areas)

        heatmap = cv2.resize(heatmap, (512, 512))
        cv2.imwrite('heatmap.png', heatmap)
Ejemplo n.º 13
0
    def draw_heatmap(self, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        scale = 15

        areas = np.zeros((scale,))
        count = cfg.EPS * np.ones((scale,))

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy   = np.array(entry['box']).copy()
            width  = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox; xyxy[1] += oy;
            xyxy[2] += ox; xyxy[3] += oy;

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)

            area = xywh[2] * xywh[3]

            scaled_xy = np.ceil(xywh[:2] * scale)
            scaled_xy = np.maximum(0, scaled_xy-1).astype(np.int)

            areas[scaled_xy[1]] += area
            count[scaled_xy[1]] += 1.0

            if i%1000 == 0:
                print i

        areas = np.divide(areas, count)

        heatmap = self.areas_to_heatmap(areas)

        heatmap = cv2.resize(heatmap, (512, 512))
        cv2.imwrite('heatmap.png', heatmap)
Ejemplo n.º 14
0
    def draw_ratio_histogram(self, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        R = []
        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy = np.array(entry['box']).copy()
            width = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox
            xyxy[1] += oy
            xyxy[2] += ox
            xyxy[3] += oy

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)

            ratio = np.log(xywh[3] / xywh[2])
            R = R + [ratio]

            if i % 1000 == 0:
                print i

        plt.switch_backend('agg')
        fig = plt.figure()

        plt.hist(R, 100, range=[0.0, 2.0])
        plt.xlim([0.0, 2.0])

        fig.savefig('gt_logratio_hist.jpg', bbox_inches='tight')
        plt.close(fig)

        print 'Done'
Ejemplo n.º 15
0
    def draw_ratio_histogram(self, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        R = []
        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy   = np.array(entry['box']).copy()
            width  = entry['width']
            height = entry['height']

            max_dim = np.maximum(width, height)
            ox = int(0.5 * (max_dim - width))
            oy = int(0.5 * (max_dim - height))

            xyxy[0] += ox; xyxy[1] += oy;
            xyxy[2] += ox; xyxy[3] += oy;

            xywh = ds_utils.xyxy_to_xywh(xyxy.reshape((1, 4))).flatten()
            xywh /= float(max_dim)

            ratio = np.log(xywh[3] / xywh[2])
            R = R + [ratio]

            if i%1000 == 0:
                print i

        plt.switch_backend('agg')
        fig = plt.figure()

        plt.hist(R, 100, range=[0.0, 2.0])
        plt.xlim([0.0, 2.0])

        fig.savefig('gt_logratio_hist.jpg', bbox_inches='tight')
        plt.close(fig)

        print 'Done'
Ejemplo n.º 16
0
    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur : self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []; centers = []; ratios  = []; masks   = []
        # normalized xywh representation
        bboxes  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois     = self.roidb[db_inds[i]]
            im_path  = rois['image']
            width    = rois['width']
            height   = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats  = rois['clses'].copy()
            areas    = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order    = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats  = gt_cats[order]
            areas    = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:,0] += offset_x
                gt_boxes[:,1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        centers = pad_sequences(centers, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        ratios  = pad_sequences(ratios, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        masks   = pad_sequences(masks, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack((centers[i,:], ratios[i,:])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i,:,:])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir, '%06d_'%i+im_name+'.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks
Ejemplo n.º 17
0
    def get_scene_minibatch(self, square=True):
        # outputs: resized images, layouts, segmentations, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        num_clses  = self.num_classes-1

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        scenes = np.zeros((batch_size, resolution[0], \
                        resolution[1], num_clses), dtype=np.float32)
        segs   = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)

        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]

            im_path  = obj['background']
            seg_path = obj['out_seg']

            width   = obj['width']
            height  = obj['height']
            box     = obj['box'].copy()

            all_boxes = obj['all_boxes'].copy().reshape((-1,4)).astype(np.int)
            all_clses = obj['all_clses'].copy().flatten()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            seg = cv2.imread(seg_path, cv2.IMREAD_COLOR)

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
                seg = cv2.flip(seg, 1)

            xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze()
            ex_box = box.copy().flatten().astype(np.int)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y


                ex_box[0] += offset_x
                ex_box[1] += offset_y
                ex_box[2] += offset_x
                ex_box[3] += offset_y

                all_boxes[:, 0] += offset_x
                all_boxes[:, 1] += offset_y
                all_boxes[:, 2] += offset_x
                all_boxes[:, 3] += offset_y
                width = height = img.shape[0]
                seg, offset_x, offset_y = \
                    ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS)

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze()
            # discreted output positions
            grid  = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze()

            # images of the same shape
            images[i] = cv2.resize(img, (resolution[1], resolution[0]))
            segs[i]   = cv2.resize(seg, (resolution[1], resolution[0]))

            factor    = float(resolution[0])/width
            all_boxes = (factor * all_boxes).astype(np.int)
            ex_box    = (factor * ex_box).astype(np.int)
            scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses)

            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, scenes, segs, boxes, grids
Ejemplo n.º 18
0
    def draw_binary_correlation_stat_graph(self, output_dir, roidb=None):
        # Create the output directory if necessary
        if not osp.exists(output_dir):
            os.makedirs(output_dir)
        if not osp.exists(osp.join(output_dir, 'images')):
            os.makedirs(osp.join(output_dir, 'images'))

        # Cache files
        present_cache_file = osp.join(self.cache_path,
                                      self.name + '_present_stats.pkl')
        correlation_cache_file = osp.join(self.cache_path,
                                          self.name + '_correlation_stats.pkl')

        # Load cache files if they exist
        if osp.exists(present_cache_file) and osp.exists(
                correlation_cache_file):
            with open(present_cache_file, 'rb') as fid:
                present_stats = cPickle.load(fid)
            print '{} present stats loaded from {}'.format(
                self.name, present_cache_file)

            with open(correlation_cache_file, 'rb') as fid:
                correlation_stats = cPickle.load(fid)
            print '{} correlation stats loaded from {}'.format(
                self.name, correlation_cache_file)
        # Otherwise, create them
        else:
            if roidb == None:
                roidb = self.roidb
            num_rois = len(roidb)

            # present_stats: the number of pairs
            present_stats = np.zeros((self.num_classes, self.num_classes))
            correlation_stats = [[ np.zeros((6, 0)) for j in xrange(self.num_classes) ] \
                                                    for i in xrange(self.num_classes) ]

            for i in xrange(num_rois):
                rois = roidb[i]
                im_width = float(rois['width'])
                im_height = float(rois['height'])
                bboxes = rois['boxes'].copy()
                classes = rois['clses']

                # At least 2 objects
                if bboxes.shape[0] < 2:
                    continue

                # Assume squared images
                max_dim = np.maximum(im_width, im_height)
                nfactor = np.array([max_dim, max_dim, \
                                    max_dim, max_dim]).reshape((1,4))

                # Change representations from xyxy to xywh
                bboxes = ds_utils.xyxy_to_xywh(bboxes)
                # Normalize
                bboxes = np.divide(bboxes, nfactor)
                # Area
                areas = np.multiply(bboxes[:, 2], bboxes[:, 3]).squeeze()
                # Aspect ratio
                ratios = np.divide(bboxes[:, 2], bboxes[:, 3]).squeeze()

                for j in xrange(bboxes.shape[0] - 1):
                    cls1 = classes[j]
                    bbox1 = bboxes[j, :].squeeze()

                    for k in xrange(j + 1, bboxes.shape[0]):
                        cls2 = classes[k]
                        bbox2 = bboxes[k, :].squeeze()

                        offset = bbox2[:2] - bbox1[:2]

                        correlation21 = np.array([
                            offset[0], offset[1], areas[j], areas[k],
                            ratios[j], ratios[k]
                        ]).reshape((6, 1))

                        correlation12 = np.array([
                            -offset[0], -offset[1], areas[k], areas[j],
                            ratios[k], ratios[j]
                        ]).reshape((6, 1))

                        correlation_stats[cls1][cls2] = \
                                np.hstack((correlation_stats[cls1][cls2], correlation21))
                        correlation_stats[cls2][cls1] = \
                                np.hstack((correlation_stats[cls2][cls1], correlation12))

                        present_stats[cls1, cls2] += 1
                        present_stats[cls2, cls1] += 1

                print i

            with open(present_cache_file, 'wb') as fid:
                cPickle.dump(present_stats, fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote present stats to {}'.format(present_cache_file)

            with open(correlation_cache_file, 'wb') as fid:
                cPickle.dump(correlation_stats, fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote correlation stats to {}'.format(
                correlation_cache_file)

        plt.switch_backend('agg')
        for i in xrange(1, self.num_classes):
            for j in xrange(1, self.num_classes):
                correlation = correlation_stats[i][j]

                fig = plt.figure()

                plt.hist2d(correlation[0, :],
                           correlation[1, :],
                           20,
                           range=[[-1.0, 1.0], [-1.0, 1.0]])
                plt.colorbar()
                plt.xlim([-1.0, 1.0])
                plt.ylim([-1.0, 1.0])
                plt.title('offset: %s vs %s' %
                          (self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(os.path.join(
                    output_dir, 'images/offset_%02d_%02d.jpg' % (i, j)),
                            bbox_inches='tight')
                plt.close(fig)

                fig = plt.figure()

                plt.hist2d(correlation[2, :],
                           correlation[3, :],
                           20,
                           range=[[0, 0.05], [0, 0.05]])
                plt.colorbar()
                plt.xlim([0, 0.05])
                plt.ylim([0, 0.05])
                plt.title('area: %s vs %s' %
                          (self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(osp.join(output_dir,
                                     'images/area_%02d_%02d.jpg' % (i, j)),
                            bbox_inches='tight')
                plt.close(fig)

                fig = plt.figure()

                plt.hist2d(correlation[4, :],
                           correlation[5, :],
                           20,
                           range=[[0, 4.0], [0, 4.0]])
                plt.colorbar()
                plt.xlim([0, 4.0])
                plt.ylim([0, 4.0])
                plt.title('aspect ratio: %s vs %s' %
                          (self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(osp.join(output_dir,
                                     'images/ratio_%02d_%02d.jpg' % (i, j)),
                            bbox_inches='tight')
                plt.close(fig)

                im1 = cv2.resize(
                    cv2.imread(
                        osp.join(output_dir,
                                 'images/offset_%02d_%02d.jpg' % (i, j))),
                    (648, 545))
                im2 = cv2.resize(
                    cv2.imread(
                        osp.join(output_dir,
                                 'images/area_%02d_%02d.jpg' % (i, j))),
                    (648, 545))
                im3 = cv2.resize(
                    cv2.imread(
                        osp.join(output_dir,
                                 'images/ratio_%02d_%02d.jpg' % (i, j))),
                    (648, 545))

                im = np.zeros((545, 648 * 3, 3), dtype=np.int16)
                im[:, :648, :] = im1
                im[:, 648:2 * 648, :] = im2
                im[:, 2 * 648:3 * 648, :] = im3

                cv2.imwrite(
                    osp.join(output_dir, 'images/%02d_%02d.jpg' % (i, j)), im)

                print i, j
Ejemplo n.º 19
0
    def get_scene_minibatch(self, square=True):
        # outputs: resized images, layouts, segmentations, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        num_clses = self.num_classes - 1

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        scenes = np.zeros((batch_size, resolution[0], \
                        resolution[1], num_clses), dtype=np.float32)
        segs   = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)

        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]

            im_path = obj['background']
            seg_path = obj['out_seg']

            width = obj['width']
            height = obj['height']
            box = obj['box'].copy()

            all_boxes = obj['all_boxes'].copy().reshape((-1, 4)).astype(np.int)
            all_clses = obj['all_clses'].copy().flatten()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            seg = cv2.imread(seg_path, cv2.IMREAD_COLOR)

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
                seg = cv2.flip(seg, 1)

            xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze()
            ex_box = box.copy().flatten().astype(np.int)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y

                ex_box[0] += offset_x
                ex_box[1] += offset_y
                ex_box[2] += offset_x
                ex_box[3] += offset_y

                all_boxes[:, 0] += offset_x
                all_boxes[:, 1] += offset_y
                all_boxes[:, 2] += offset_x
                all_boxes[:, 3] += offset_y
                width = height = img.shape[0]
                seg, offset_x, offset_y = \
                    ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS)

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width,
                                            height).squeeze()
            # discreted output positions
            grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)),
                                             grid_shape).squeeze()

            # images of the same shape
            images[i] = cv2.resize(img, (resolution[1], resolution[0]))
            segs[i] = cv2.resize(seg, (resolution[1], resolution[0]))

            factor = float(resolution[0]) / width
            all_boxes = (factor * all_boxes).astype(np.int)
            ex_box = (factor * ex_box).astype(np.int)
            scenes[i] = ds_utils.create_scenes(resolution[1],
                                               resolution[0],
                                               all_boxes,
                                               all_clses,
                                               ex_box=ex_box,
                                               n_cls=num_clses)

            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, scenes, segs, boxes, grids
Ejemplo n.º 20
0
    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur:self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []
        centers = []
        ratios = []
        masks = []
        # normalized xywh representation
        bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois = self.roidb[db_inds[i]]
            im_path = rois['image']
            width = rois['width']
            height = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats = rois['clses'].copy()
            areas = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats = gt_cats[order]
            areas = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:, 0] += offset_x
                gt_boxes[:, 1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img,
                                            (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        centers = pad_sequences(centers,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        ratios = pad_sequences(ratios,
                               maxlen=max_seq_len,
                               padding='post',
                               truncating='post',
                               value=0.)
        masks = pad_sequences(masks,
                              maxlen=max_seq_len,
                              padding='post',
                              truncating='post',
                              value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack(
                    (centers[i, :], ratios[i, :])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1],
                                             grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i, :, :])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1],
                                                resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1],
                                                resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1],
                                                resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir,
                                       '%06d_' % i + im_name + '.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks
Ejemplo n.º 21
0
    def draw_binary_correlation_stat_graph(self, output_dir, roidb=None):
        # Create the output directory if necessary
        if not osp.exists(output_dir):
            os.makedirs(output_dir)
        if not osp.exists(osp.join(output_dir, 'images')):
            os.makedirs(osp.join(output_dir, 'images'))

        # Cache files
        present_cache_file      = osp.join(self.cache_path, self.name + '_present_stats.pkl')
        correlation_cache_file  = osp.join(self.cache_path, self.name + '_correlation_stats.pkl')

        # Load cache files if they exist
        if osp.exists(present_cache_file) and osp.exists(correlation_cache_file):
            with open(present_cache_file, 'rb') as fid:
                present_stats = cPickle.load(fid)
            print '{} present stats loaded from {}'.format(self.name, present_cache_file)

            with open(correlation_cache_file, 'rb') as fid:
                correlation_stats = cPickle.load(fid)
            print '{} correlation stats loaded from {}'.format(self.name, correlation_cache_file)
        # Otherwise, create them
        else:
            if roidb == None:
                roidb = self.roidb
            num_rois = len(roidb)

            # present_stats: the number of pairs
            present_stats     = np.zeros((self.num_classes, self.num_classes))
            correlation_stats = [[ np.zeros((6, 0)) for j in xrange(self.num_classes) ] \
                                                    for i in xrange(self.num_classes) ]

            for i in xrange(num_rois):
                rois      = roidb[i]
                im_width  = float(rois['width'])
                im_height = float(rois['height'])
                bboxes    = rois['boxes'].copy()
                classes   = rois['clses']

                # At least 2 objects
                if bboxes.shape[0] < 2:
                    continue

                # Assume squared images
                max_dim = np.maximum(im_width, im_height)
                nfactor = np.array([max_dim, max_dim, \
                                    max_dim, max_dim]).reshape((1,4))

                # Change representations from xyxy to xywh
                bboxes  = ds_utils.xyxy_to_xywh(bboxes)
                # Normalize
                bboxes  = np.divide(bboxes, nfactor)
                # Area
                areas   = np.multiply(bboxes[:, 2], bboxes[:, 3]).squeeze()
                # Aspect ratio
                ratios  = np.divide(bboxes[:, 2], bboxes[:, 3]).squeeze()

                for j in xrange(bboxes.shape[0] - 1):
                    cls1   = classes[j]
                    bbox1  = bboxes[j, :].squeeze()

                    for k in xrange(j + 1, bboxes.shape[0]):
                        cls2   = classes[k]
                        bbox2  = bboxes[k, :].squeeze()

                        offset = bbox2[:2] - bbox1[:2]

                        correlation21 = np.array([offset[0], offset[1],
                                                  areas[j], areas[k],
                                                  ratios[j], ratios[k]]).reshape((6,1))

                        correlation12 = np.array([-offset[0], -offset[1],
                                                  areas[k],  areas[j],
                                                  ratios[k], ratios[j]]).reshape((6,1))

                        correlation_stats[cls1][cls2] = \
                                np.hstack((correlation_stats[cls1][cls2], correlation21))
                        correlation_stats[cls2][cls1] = \
                                np.hstack((correlation_stats[cls2][cls1], correlation12))

                        present_stats[cls1, cls2] += 1
                        present_stats[cls2, cls1] += 1

                print i


            with open(present_cache_file, 'wb') as fid:
                cPickle.dump(present_stats, fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote present stats to {}'.format(present_cache_file)

            with open(correlation_cache_file, 'wb') as fid:
                cPickle.dump(correlation_stats , fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote correlation stats to {}'.format(correlation_cache_file)

        plt.switch_backend('agg')
        for i in xrange(1, self.num_classes):
            for j in xrange(1, self.num_classes):
                correlation = correlation_stats[i][j]

                fig = plt.figure()

                plt.hist2d(correlation[0, :], correlation[1, :], 20, range=[[-1.0, 1.0], [-1.0, 1.0]])
                plt.colorbar()
                plt.xlim([-1.0, 1.0])
                plt.ylim([-1.0, 1.0])
                plt.title('offset: %s vs %s'%(self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(os.path.join(output_dir, 'images/offset_%02d_%02d.jpg' % (i, j)), bbox_inches='tight')
                plt.close(fig)

                fig = plt.figure()

                plt.hist2d(correlation[2, :], correlation[3, :], 20, range=[[0, 0.05], [0, 0.05]])
                plt.colorbar()
                plt.xlim([0, 0.05])
                plt.ylim([0, 0.05])
                plt.title('area: %s vs %s'%(self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j)), bbox_inches='tight')
                plt.close(fig)


                fig = plt.figure()

                plt.hist2d(correlation[4, :], correlation[5, :], 20, range=[[0, 4.0], [0, 4.0]])
                plt.colorbar()
                plt.xlim([0, 4.0])
                plt.ylim([0, 4.0])
                plt.title('aspect ratio: %s vs %s'%(self.classes[i], self.classes[j]))
                plt.grid(True)

                fig.savefig(osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j)), bbox_inches='tight')
                plt.close(fig)

                im1 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/offset_%02d_%02d.jpg' % (i, j))), (648, 545))
                im2 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/area_%02d_%02d.jpg' % (i, j))), (648, 545))
                im3 = cv2.resize(cv2.imread(osp.join(output_dir, 'images/ratio_%02d_%02d.jpg' % (i, j))), (648, 545))

                im = np.zeros((545, 648 * 3, 3), dtype=np.int16)
                im[:,      :  648, :] = im1
                im[:,   648:2*648, :] = im2
                im[:, 2*648:3*648, :] = im3

                cv2.imwrite(osp.join(output_dir, 'images/%02d_%02d.jpg' % (i, j)), im)

                print i,j
Ejemplo n.º 22
0
    def alpha_compose(self, src_ctx, dst_ctx):
        # Assume src_ctx has fields: bg_image, box
        # Assume dst_ctx is from val_imdb
        src_img = cv2.imread(src_ctx['bg_image'], cv2.IMREAD_COLOR)
        dst_img = cv2.imread(dst_ctx['image'], cv2.IMREAD_COLOR)
        dst_alpha = cv2.imread(dst_ctx['alpha'], cv2.IMREAD_GRAYSCALE)

        src_xyxy = src_ctx['box']
        dst_xyxy = dst_ctx['box']
        src_width = src_img.shape[1]
        src_height = src_img.shape[0]
        dst_width = dst_img.shape[1]
        dst_height = dst_img.shape[0]

        # resize the target image to align the heights of the bboxes
        factor = float(src_xyxy[3] - src_xyxy[1] + 1) / float(dst_xyxy[3] -
                                                              dst_xyxy[1] + 1)
        dst_width = int(dst_width * factor)
        dst_height = int(dst_height * factor)
        dst_img = cv2.resize(dst_img, (dst_width, dst_height))
        dst_alpha = cv2.resize(dst_alpha, (dst_width, dst_height))
        dst_alpha = dst_alpha.astype(np.float) / 255.0
        dst_xyxy = factor * dst_xyxy
        src_xywh = ds_utils.xyxy_to_xywh(src_xyxy.reshape((1, 4))).squeeze()
        dst_xywh = ds_utils.xyxy_to_xywh(dst_xyxy.reshape((1, 4))).squeeze()

        # anchors that should match (the standing points)
        src_anchor = src_xywh[:2]
        dst_anchor = dst_xywh[:2]
        offset = (src_anchor - dst_anchor).astype(np.int)

        # dilate the target patch a bit to include the blending region
        dst_bb = ds_utils.expand_xyxy(dst_xyxy.reshape((1, 4)),
                                      dst_width,
                                      dst_height,
                                      ratio=0.2).squeeze().astype(np.int)

        src_bb = dst_bb.copy()
        src_bb[:2] = dst_bb[:2] + offset
        src_bb[2:] = dst_bb[2:] + offset

        # in case the bbox of the target object is beyond the boundaries of the source image
        if src_bb[0] < 0:
            dst_bb[0] -= src_bb[0]
            src_bb[0] = 0
        if src_bb[1] < 0:
            dst_bb[1] -= src_bb[1]
            src_bb[1] = 0
        if src_bb[2] > src_width - 1:
            dst_bb[2] -= src_bb[2] - src_width + 1
            src_bb[2] = src_width - 1
        if src_bb[3] > src_height - 1:
            dst_bb[3] -= src_bb[3] - src_height + 1
            src_bb[3] = src_height - 1

        output_mask = np.zeros((src_height, src_width), dtype=np.float)
        output_image = src_img.copy()

        alpha_patch = dst_alpha[dst_bb[1]:(dst_bb[3] + 1),
                                dst_bb[0]:(dst_bb[2] + 1)]
        src_patch = src_img[src_bb[1]:(src_bb[3] + 1),
                            src_bb[0]:(src_bb[2] + 1), :]
        dst_patch = dst_img[dst_bb[1]:(dst_bb[3] + 1),
                            dst_bb[0]:(dst_bb[2] + 1), :]

        output_mask[src_bb[1]:(src_bb[3] + 1),
                    src_bb[0]:(src_bb[2] + 1)] = alpha_patch
        output_image[src_bb[1]:(src_bb[3]+1), src_bb[0]:(src_bb[2]+1),:] = \
            np.expand_dims(1.0 - alpha_patch, axis=-1) * src_patch + \
            np.expand_dims(alpha_patch, axis=-1) * dst_patch

        # cv2.rectangle(output_image, (src_xyxy[0], src_xyxy[1]), (src_xyxy[2], src_xyxy[3]), \
        #                         (255, 0, 0), 1)

        return output_image.astype(np.uint8), output_mask