コード例 #1
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(
            box_list
        ) == self.nrof_images, 'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.nrof_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.nrof_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes': boxes,
                'gt_classes': np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps': overlaps,
                'flipped': False,
                'seg_areas': np.zeros((num_boxes, ), dtype=np.float32)
            })
        return roidb
コード例 #2
0
    def _compute_targets(self, rois, overlaps, labels):
        """
        Compute bounding-box regression targets for an image.
        for each roi find the corresponding gt_box, then compute the distance.
        """
        # Indices of ground-truth ROIs
        gt_inds = np.where(overlaps == 1)[0]
        if len(gt_inds) == 0:
            # Bail if the image has no ground-truth ROIs
            return np.zeros((rois.shape[0], 5), dtype=np.float32)
        # Indices of examples for which we try to make predictions
        ex_inds = np.where(overlaps >= self.config.TRAIN.BBOX_THRESH)[0]

        # Get IoU overlap between each ex ROI and gt ROI
        ex_gt_overlaps = bbox_overlaps(
            np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
            np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

        # Find which gt ROI each ex ROI has max overlap with:
        # this will be the ex ROI's gt target
        gt_assignment = ex_gt_overlaps.argmax(axis=1)
        gt_rois = rois[gt_inds[gt_assignment], :]
        ex_rois = rois[ex_inds, :]

        targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
        targets[ex_inds, 0] = labels[ex_inds]
        targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
        return targets
コード例 #3
0
    def _sample_rois(self, all_rois, gt_boxes, fg_rois_per_image,
                     rois_per_image, num_classes):
        """Generate a random sample of RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

        # Select foreground RoIs as those with >= FG_THRESH overlap
        fg_inds = np.where(max_overlaps >= self.FG_THRESH)[0]
        # Guard against the case when an image has fewer than fg_rois_per_image
        # foreground RoIs
        fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
        # Sample foreground regions without replacement
        if fg_inds.size > 0:
            fg_inds = npr.choice(fg_inds,
                                 size=fg_rois_per_this_image,
                                 replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_inds = np.where((max_overlaps < self.BG_THRESH_HI)
                           & (max_overlaps >= self.BG_THRESH_LO))[0]
        # Compute number of background RoIs to take from this image (guarding
        # against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
        # Sample background regions without replacement
        if bg_inds.size > 0:
            bg_inds = npr.choice(bg_inds,
                                 size=bg_rois_per_this_image,
                                 replace=False)

        # The indices that we're selecting (both fg and bg)
        keep_inds = np.append(fg_inds, bg_inds)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[fg_rois_per_this_image:] = 0
        rois = all_rois[keep_inds]

        bbox_target_data = self._compute_targets(
            rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

        bbox_targets, bbox_inside_weights = \
            self._get_bbox_regression_labels(bbox_target_data, num_classes)

        return labels, rois, bbox_targets, bbox_inside_weights
コード例 #4
0
    def _sample_rois(
            self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
            num_classes):
        """Generate a random sample of RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

        # Select foreground RoIs as those with >= FG_THRESH overlap
        fg_inds = np.where(max_overlaps >= self.FG_THRESH)[0]
        # Guard against the case when an image has fewer than fg_rois_per_image
        # foreground RoIs
        fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
        # Sample foreground regions without replacement
        if fg_inds.size > 0:
            fg_inds = npr.choice(
                fg_inds, size=fg_rois_per_this_image, replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_inds = np.where((max_overlaps < self.BG_THRESH_HI) &
                           (max_overlaps >= self.BG_THRESH_LO))[0]
        # Compute number of background RoIs to take from this image (guarding
        # against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
        # Sample background regions without replacement
        if bg_inds.size > 0:
            bg_inds = npr.choice(
                bg_inds, size=bg_rois_per_this_image, replace=False)

        # The indices that we're selecting (both fg and bg)
        keep_inds = np.append(fg_inds, bg_inds)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[fg_rois_per_this_image:] = 0
        rois = all_rois[keep_inds]

        bbox_target_data = self._compute_targets(
            rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

        bbox_targets, bbox_inside_weights = \
            self._get_bbox_regression_labels(bbox_target_data, num_classes)

        return labels, rois, bbox_targets, bbox_inside_weights
コード例 #5
0
def page_eval(page_image,
              pred_boxes,
              gt_boxes,
              use_pixel_level=True,
              output_all=False):
    page_stats = {}
    overlaps = bbox_overlaps(gt_boxes.astype(np.float32),
                             pred_boxes.astype(np.float32))
    gt_to_pred_map = get_coverage_mapping(overlaps.T)
    inv_page_binary = _inverse_binary(page_image, thresh=0.99)

    output_titles = []
    output_boxes = []
    # Check each gt_box
    for ind in range(gt_boxes.shape[0]):
        word_stats = {}
        gt_box = gt_boxes[ind, :]
        pred_ind = gt_to_pred_map.get(ind, None)
        if pred_ind is None:
            continue
        pred_box = pred_boxes[pred_ind, :]
        if use_pixel_level:
            o2o = pixel_iou(gt_box=gt_box,
                            box=pred_box,
                            binary_image=inv_page_binary)
        else:
            o2o = overlaps[ind, pred_ind]
        output_boxes.append(pred_box)
        output_titles.append('%4.3f' % o2o)
        word_stats['gt'] = gt_box.tolist()
        word_stats['pred'] = pred_box.tolist()
        word_stats['cover'] = o2o
        page_stats['word_%d' % ind] = word_stats

    if output_all:
        for ind in range(pred_boxes.shape[0]):
            pred_box = pred_boxes[ind, :]
            output_boxes.append(pred_box)
            output_titles.append('-')
            word_stats['pred'] = pred_box.tolist()
            page_stats['box_%d' % ind] = word_stats

    page_stats['predictions'] = pred_boxes.shape[0]
    page_stats['gt_boxes'] = gt_boxes.shape[0]
    preds_image = debugShowBoxes(page_image.copy(),
                                 boxes=output_boxes,
                                 gt_boxes=gt_boxes,
                                 titles=output_titles,
                                 dont_show=True)

    return page_stats, preds_image
コード例 #6
0
    def calc_overlaps(self, anchors, gt_boxes, inds_inside):
        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        return argmax_overlaps, max_overlaps, gt_max_overlaps, \
               gt_argmax_overlaps
コード例 #7
0
    def calc_overlaps(self, anchors, gt_boxes, inds_inside):
        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        return argmax_overlaps, max_overlaps, gt_max_overlaps, \
            gt_argmax_overlaps
コード例 #8
0
def _compute_labels(A, anchors, gt_boxes, dontcare_areas, gt_ishard):
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((A, ), dtype=np.float32)
    labels.fill(-1)  # initlize label to be -1
    # compute overlaps between the anchors and the gt boxes for labeling anchor overlaps, shape is A x G.
    # Note: anchors (A,4), gt_boxes (G,5)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    # (A)#找到和所有anchor的overlap最大的gt_box的index
    argmax_overlaps = overlaps.argmax(
        axis=1)  # the max index of each raw --> the index of gt_box
    # (A)#找到和所有anchor的overlap最大的gt_box的value
    max_overlaps = overlaps[np.arange(A), argmax_overlaps]
    # (G)#找到和所有gt_box的overlap最大的anchor的index
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # the max index of each column --> the index of anchor
    # (G)#找到和所有gt_box的overlap最大的anchor的value
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    # (G)#找到和所有gt_box的overlap最大的anchor的index, 同时找到所有具有这些最大overlap的anchor
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    # assign bg labels first so that positive labels can clobber them
    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        labels[
            max_overlaps < cfg.TRAIN.
            RPN_NEGATIVE_OVERLAP] = 0  # max_overlaps and labels have same shape
    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1
    # fg label: above threshold IOU
    labels[
        max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  # overlap>0.7, fg

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        # 将所有的anchor中与gt_box的overlap最大值还小于0.3的anchor的label置为0
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),
            np.ascontiguousarray(anchors, dtype=np.float))
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
            0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(
                axis=0)  # (A) return the value
            labels[
                hard_max_overlaps >= cfg.TRAIN.
                RPN_POSITIVE_OVERLAP] = -1  # hard_max_overlaps and labels have the same shape
            max_intersec_label_inds = hard_overlaps.argmax(
                axis=1
            )  # H x 1, return the index, so the values are all less than A
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many, less than 128
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(
            fg_inds, size=(len(fg_inds) - num_fg),
            replace=False)  # randomly clip some samples
        labels[disable_inds] = -1

    # subsample negative labels if we have too many, less than 128
    # if the num of positive samples less than 128, use negative samples to replace to ensure the total num of negative and positive samples is 256
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1
        # print "was %s inds, disabling %s, now %s inds" % (
        # len(bg_inds), len(disable_inds), np.sum(labels == 0))
    return labels, max_overlaps, argmax_overlaps
コード例 #9
0
def random_rois(batch_gt_boxes,
                batch_embeddings=None,
                image_size=(900, 1200),
                num_classes=2,
                num_boxes_per_class=20,
                random_boxes_per_word=10,
                std_range=30,
                lower_bound=None,
                tf_format_in=False,
                tf_format_out=False):
    """

    :param batch_gt_boxes: (num_boxes, 5) => [batch_idx, x1, y1, x2, y2]
    :param batch_embeddings: (num_boxes, embedding_size+1) => e.g. [batch_id, v^T \in R^540]
    :param image_size:
    :param num_classes:
    :param num_boxes_per_class:
    :param lower_bound: (float) => lower bound of first IoU class
    :param tf_format_in:
    :param tf_format_out:

    :return:
    """
    assert batch_gt_boxes.shape[
        1] == 5 and batch_gt_boxes.ndim == 2, 'Pass gt_boxes in [batch, x,y,x,y] format'
    assert isinstance(num_classes,
                      int) and num_classes > 1, 'Must have at least 2 classes'

    if tf_format_in:
        # Switch to abs boxes coordinates
        batch_gt_boxes = tf_format_to_abs(batch_gt_boxes, image_size)

    RANDS_PER_WORD = random_boxes_per_word
    RANDOM_STD_RANGE = std_range
    # gt_randoms = _generate_random_boxes_around_gt(gt_boxes, RANDS_PER_WORD, pixel_std=5)
    batch_size = batch_gt_boxes.astype(np.int32)[:, 0].max() + 1
    batch_rois = []
    assigned_embeddings = []
    for n in range(batch_size):
        bidx = np.where(batch_gt_boxes[:, 0] == n)[0]
        gt_boxes = batch_gt_boxes[bidx, 1:]
        gt_randoms = np.vstack([
            _generate_random_boxes_around_gt(gt_boxes,
                                             RANDS_PER_WORD,
                                             pixel_std=i + 0.5)
            for i in range(0, RANDOM_STD_RANGE, 2)
        ])
        rand_randoms = _generate_random_relative_boxes(
            RANDS_PER_WORD * gt_boxes.shape[0]) * np.array(image_size * 2)
        rois = np.vstack((gt_randoms, rand_randoms)).astype(np.float32)
        # Clamp to image
        rois[:, ::2] = np.minimum(np.maximum(rois[:, ::2], 0), image_size[0])
        rois[:, 1::2] = np.minimum(np.maximum(rois[:, 1::2], 0), image_size[1])
        ovlps = bbox_overlaps(rois.astype(np.float32),
                              gt_boxes.astype(np.float32))
        scores = ovlps.max(1).flatten()
        # NOTICE: the following assumes classes can be 5 or 2 by default, for any other num_classes you should set a lower_bound that makes sense
        lower_bound = (0.35 if num_classes == 5 else
                       0.2) if lower_bound is None else lower_bound
        class_bins = np.linspace(lower_bound, 1., num_classes + 1)[1:]
        func = partial(_box_scoring_helper, bins=class_bins)
        labels = np.array(map(func, scores))
        keep = _label_filter_picker_helper(
            num_classes=num_classes,
            labels=labels,
            num_boxes_per_class=num_boxes_per_class)
        rois = rois[keep, :]
        labels = labels[keep]
        rois = np.hstack(
            (np.ones(rois.shape[0], np.float32)[:, np.newaxis] * n, rois,
             labels[:, np.newaxis])).astype(np.float32)
        batch_rois.append(rois)

        if batch_embeddings is not None:
            assigned_words = ovlps.argmax(1).flatten()
            embeddings = batch_embeddings[bidx, 1:]
            embeddings = embeddings[assigned_words, :]
            # Aligned embedding with randomly selected idx
            embeddings = embeddings[keep, :]
            assigned_embeddings.append(embeddings)

    batch_rois = np.vstack(batch_rois)

    if tf_format_out:
        new_rois = batch_rois[:, 1:-1][:, [1, 0, 3, 2]] / np.array(
            list(image_size) * 2)[::-1]
        batch_rois[:, 1:-1] = new_rois

    if batch_embeddings is not None:
        assigned_embeddings = np.vstack(assigned_embeddings)
        return batch_rois, assigned_embeddings
    return batch_rois
コード例 #10
0
def phoc_eval_page(page_image, pred_boxes, pred_phocs, gt_boxes, gt_words):
    """

    :param page_image:
    :param pred_boxes:
    :param pred_phocs:
    :param gt_boxes:
    :param gt_words: (list) of str contating gt-words
    :param image_transform:
    :param o2o_score_func:
    :return:
    """
    page_stats = {}
    overlaps = bbox_overlaps(gt_boxes.astype(np.float32), pred_boxes.astype(np.float32))
    gt_to_pred_map = get_coverage_mapping(overlaps.T)
    pred_phocs = np.atleast_2d(pred_phocs)
    pred_boxes = np.atleast_2d(pred_boxes)

    output_titles = []
    output_boxes = []
    # Check each gt_box
    for ind in range(gt_boxes.shape[0]):
        word_stats = {}
        gt_box = gt_boxes[ind, :]
        gt_word = gt_words[ind]
        pred_ind = gt_to_pred_map.get(ind, None)

        word_stats['gt'] = gt_box.tolist()
        phocs, dim = phoc_letters_and_digits([gt_word])
        word_stats['gt_phoc'] = phocs[0, :].tolist()
        word_stats['text'] = gt_word

        if pred_ind is not None:
            pred_box = pred_boxes[pred_ind, :]
            pred_phoc = pred_phocs[pred_ind, :]
            o2o = overlaps[ind, pred_ind]
            output_boxes.append(pred_box)
            output_titles.append('%s[%d]' % (gt_word, o2o * 100))
            word_stats['pred'] = pred_box.tolist()
            word_stats['pre_phoc'] = pred_phoc.tolist()
            word_stats['cover'] = o2o
        page_stats['word_%d' % ind] = word_stats

    # Do stats for all un-assigned words
    for idx in set(range(pred_boxes.shape[0])) - set(gt_to_pred_map.values()):
        word_stats = {}
        best_gt_id = np.argmax(overlaps[:, idx])
        gt_box = gt_boxes[best_gt_id, :]
        gt_word = gt_words[best_gt_id]
        pred_ind = idx

        word_stats['gt'] = gt_box.tolist()
        phocs, dim = phoc_letters_and_digits([gt_word])
        word_stats['gt_phoc'] = phocs[0, :].tolist()
        word_stats['text'] = gt_word

        if pred_ind is not None:
            pred_box = pred_boxes[pred_ind, :]
            pred_phoc = pred_phocs[pred_ind, :]
            o2o = overlaps[ind, pred_ind]
            output_boxes.append(pred_box)
            output_titles.append('%s[%d]' % (gt_word, o2o*100))
            word_stats['pred'] = pred_box.tolist()
            word_stats['pre_phoc'] = pred_phoc.tolist()
            word_stats['cover'] = o2o
        page_stats['word_red_%d' % idx] = word_stats

    page_stats['predictions'] = pred_boxes.shape[0]
    page_stats['gt_boxes'] = gt_boxes.shape[0]

    preds_image = debugShowBoxes(page_image.copy(), boxes=output_boxes, gt_boxes=gt_boxes, titles=output_titles, dont_show=True)

    return page_stats, preds_image