def _process_feature_extraction_v2(self,
                                       output,
                                       im_scales,
                                       im_infos,
                                       feature_name="fc6",
                                       conf_thresh=0):
        from maskrcnn_benchmark.layers import nms
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros((scores.shape[0])).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    # & (cls_scores[keep] > conf_thresh_tensor[keep])
                    ,
                    cls_scores[keep],
                    max_conf[keep],
                )

            sorted_scores, sorted_indices = torch.sort(max_conf,
                                                       descending=True)
            num_boxes = (sorted_scores[:self.num_features] != 0).sum()
            keep_boxes = sorted_indices[:self.num_features]
            feat_list.append(feats[i][keep_boxes])
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            # Predict the class label using the scores
            objects = torch.argmax(scores[keep_boxes][start_index:], dim=1)
            cls_prob = torch.max(scores[keep_boxes][start_index:], dim=1)

            info_list.append({
                "bbox": bbox.cpu().numpy(),
                # "boxes": bbox.cpu().numpy(),
                # "num_boxes": num_boxes.item(),
                # "objects": objects.cpu().numpy(),
                "image_width": im_infos[i]["width"],
                "image_height": im_infos[i]["height"],
                # "image_h": im_infos[i]["height"],
                # "image_w": im_infos[i]["width"],
                "cls_prob": scores[keep_boxes].cpu().numpy(),
                "max_features": num_boxes.item(),
            })

        return feat_list, info_list
Beispiel #2
0
    def _process_feature_extraction(
        self, output, im_scales, feat_name="fc6", conf_thresh=0.2
    ):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(_) for _ in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feat_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]

            max_conf = torch.zeros((scores.shape[0])).to(cur_device)

            for cls_ind in range(1, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]
                )

            keep_boxes = torch.argsort(max_conf, descending=True)[:100]
            feat_list.append(feats[i][keep_boxes])
        return feat_list
def process_feature_extraction(output,
                               im_scales,
                               max_boxes=100,
                               get_boxes=False,
                               feat_name='fc6',
                               conf_thresh=0.2):
    # TODO: Add docstring and explain get_boxes
    batch_size = len(output[0]["proposals"])
    n_boxes_per_image = [len(_) for _ in output[0]["proposals"]]
    score_list = output[0]["scores"].split(n_boxes_per_image)
    score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
    feats = output[0][feat_name].split(n_boxes_per_image)
    cur_device = score_list[0].device

    feat_list = []
    boxes_list = []
    classes_list = []
    conf_list = []

    for i in range(batch_size):
        # bbox below stays on the device where it was generated
        dets = output[0]["proposals"][i].bbox.to(cur_device) / im_scales[i]
        scores = score_list[i]

        max_conf = torch.zeros((scores.shape[0])).to(cur_device)

        if get_boxes:
            max_cls = torch.zeros((scores.shape[0]),
                                  dtype=torch.long).to(cur_device)
            max_box = torch.zeros((scores.shape[0], 4)).to(cur_device)

        for cls_ind in range(1, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            keep = nms(dets, cls_scores, 0.5)

            if get_boxes:
                max_cls[keep] = torch.where(
                    cls_scores[keep] > max_conf[keep],
                    torch.tensor(cls_ind).to(cur_device), max_cls[keep])

                max_box[keep] = torch.where(
                    (cls_scores[keep] > max_conf[keep]).view(-1, 1),
                    dets[keep], max_box[keep])

            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])

        keep_boxes = torch.argsort(max_conf, descending=True)[:max_boxes]
        feat_list.append(feats[i][keep_boxes])

        if not get_boxes:
            return feat_list

        conf_list.append(max_conf[keep_boxes])
        boxes_list.append(max_box[keep_boxes])
        classes_list.append(max_cls[keep_boxes])

    return [boxes_list, feat_list, classes_list, conf_list]
Beispiel #4
0
    def _process_feature_extraction(self,
                                    output,
                                    im_scales,
                                    im_infos,
                                    feature_name="fc6",
                                    conf_thresh=0):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros((scores.shape[0])).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            if self.args.background:
                start_index = 0
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
                    cls_scores[keep],
                    max_conf[keep],
                )

            feat_list.append(feats[i])
            num_boxes = len(feats[i])
            bbox = output[0]["proposals"][i]
            bbox = bbox.resize(((im_infos[i]["width"], im_infos[i]["height"])))
            bbox = bbox.bbox
            # Predict the class label using the scores
            objects = torch.argmax(scores[:, start_index:], dim=1)

            info_list.append({
                "bbox": bbox.cpu().numpy(),
                "num_boxes": num_boxes,
                "objects": objects.cpu().numpy(),
                "image_width": im_infos[i]["width"],
                "image_height": im_infos[i]["height"],
                "cls_prob": scores.cpu().numpy(),
            })

        return feat_list, info_list
Beispiel #5
0
    def _process_feature_extraction(self,
                                    output,
                                    im_scales,
                                    im_infos,
                                    feature_name="fc6",
                                    conf_thresh=0):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros((scores.shape[0])).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            if self.args.background:
                start_index = 0
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
                    cls_scores[keep],
                    max_conf[keep],
                )

            sorted_scores, sorted_indices = torch.sort(max_conf,
                                                       descending=True)
            num_boxes = (sorted_scores[:self.args.num_features] != 0).sum()
            keep_boxes = sorted_indices[:self.args.num_features]
            feat = feats[i][keep_boxes]
            feat_list.append(feat)
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            # Normalize the boxes (to 0 ~ 1)
            img_h, img_w = im_infos[i]['height'], im_infos[i]['width']
            # boxes = boxes.copy()
            bbox[:, (0, 2)] /= img_w
            bbox[:, (1, 3)] /= img_h
            info_list.append(bbox)
            # print('size:', bbox.size(), feat.size())

        return feat_list, info_list
Beispiel #6
0
    def _features_extraction(self, output,
                                 im_scales,
                                 feature_name='fc6',
                                 conf_thresh=0.5):
        batch_size = len(output[0]["proposals"])
        # list[num_of_boxes_per_image]
        n_boxes_per_image = [len(_) for _ in output[0]["proposals"]]
        # list[Tensor: (n_boxes_per_image, num_classes)]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        # list[Tensor: (n_boxes_per_image, 2048)]
        features = output[0][feature_name].split(n_boxes_per_image)
        # list[Tensor: (num_features_selected_per_image, 2048)]
        # list contain selected features per image
        features_list = []

        for i in range(batch_size):
            # reshape the bounding box to original size/coordinate
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            # Tensor: (n_boxes_per_image, num_classes)
            scores = score_list[i]
            # Tensor: (n_boxes_per_image, )
            # max_conf record the  heightest probs of the class
            # associate with each bounding box. If the heightest prob
            # of a box (say i) is smaller than threshold (conf_thresh), 
            # this box will not be select and max_conf[i] will be set
            # to 0 
            max_conf = torch.zeros((scores.shape[0])).to(device)

            for cls_ind in range(1, scores.shape[1]):
                # Tensor: (n_boxes_per_image, 1)
                # score for a specified class
                cls_scores = scores[:, cls_ind]
                # index of boxes that will be keep
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                            cls_scores[keep],
                                            max_conf[keep])
            
            # select the top 100 boxes which contain an onject with
            # probability greater than conf_thresh(usually 0.5)
            keep_boxes = torch.argsort(max_conf, descending=True)[:100]
            features_per_image = features[i][keep_boxes]
            features_list.append(features_per_image)
        
        return features_list
Beispiel #7
0
    def _process_feature_extraction(self,
                                    output,
                                    im_scales,
                                    feature_name="fc6",
                                    conf_thresh=0.2):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros((scores.shape[0])).to(cur_device)

            for cls_ind in range(1, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                             cls_scores[keep], max_conf[keep])

            keep_boxes = torch.argsort(max_conf,
                                       descending=True)[:self.NUM_FEATURES]
            feat_list.append(feats[i][keep_boxes])
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            objects = torch.argmax(scores[keep_boxes], dim=1)
            image_width = output[0]["proposals"][i].size[0] / im_scales[i]
            image_height = output[0]["proposals"][i].size[1] / im_scales[i]

            info_list.append({
                "bbox": bbox.cpu().numpy(),
                "objects": objects.cpu().numpy(),
                "image_width": image_width,
                "image_height": image_height,
            })

        return feat_list, info_list
Beispiel #8
0
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
    """
    Removes detections with lower object confidence score than 'conf_thres'
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    """

    output = [None for _ in range(len(prediction))]
    for image_i, pred in enumerate(prediction):
        # Filter out confidence scores below threshold
        # Get score and class with highest confidence

        v = pred[:, 4] > conf_thres
        v = v.nonzero().squeeze()
        if len(v.shape) == 0:
            v = v.unsqueeze(0)

        pred = pred[v]

        # If none are remaining => process next image
        nP = pred.shape[0]
        if not nP:
            continue
        # From (center x, center y, width, height) to (x1, y1, x2, y2)
        pred[:, :4] = xywh2xyxy(pred[:, :4])

        # Non-maximum suppression
        if method == -1:
            nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
        else:
            dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
            nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
        det_max = pred[nms_indices]

        if len(det_max) > 0:
            # Add max detections to outputs
            output[
                image_i] = det_max if output[image_i] is None else torch.cat(
                    (output[image_i], det_max))

    return output
    def _process_feature_extraction(self, output, im_scales, feat_name='fc6'):

        bboxes, scores, feats = output[0]['proposals'][0].bbox, output[0][
            'scores'], output[0]['fc6']
        scores = F.softmax(scores, dim=1)

        feat_list = []

        dets = bboxes / im_scales[0]

        max_conf = torch.zeros((scores.shape[0])).cuda()

        for cls_ind in range(1, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            keep = nms(dets, cls_scores, 0.5)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])

        keep_boxes = torch.argsort(max_conf, descending=True)[:100]
        return feats[keep_boxes], bboxes[keep_boxes]
Beispiel #10
0
    def _process_nms_bbox(self, output, point, im_scale, feat_name):
        bboxes, scores, feat = output[0]['proposals'][0].bbox, output[0][
            'scores'], output[0][feat_name]

        bboxes = bboxes / im_scale

        scores = F.softmax(scores, dim=1)

        xmin, ymin, xmax, ymax = bboxes.split(1, dim=1)
        filt_pt = ((point['x'] >= xmin) & (point['x'] <= xmax) &
                   (point['y'] >= ymin) & (point['y'] <= ymax)).flatten()

        bboxes, scores, feat = bboxes[filt_pt], scores[filt_pt], feat[filt_pt]

        # check if no bounding boxes contain that point
        if bboxes.nelement() == 0:
            return torch.zeros(100, 2048), torch.zeros(100, 4)

        # perform nms
        max_conf = torch.zeros((scores.shape[0])).cuda()

        for cls_ind in range(1, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            keep = nms(bboxes, cls_scores, 0.5)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])

        max_scores, _ = scores[:, 1:].max(dim=-1)
        filt_nms = (max_scores == max_conf)

        bboxes, scores, feat = bboxes[filt_nms], scores[filt_nms], feat[
            filt_nms]
        max_scores, _ = scores[:, 1:].max(dim=-1)
        sorted_boxes = torch.argsort(max_scores, descending=True)

        return feat[sorted_boxes], bboxes[sorted_boxes]
Beispiel #11
0
    def _process_feature_extraction(
        self,
        output: torch.Tensor,
        im_scales: List[float],
        im_infos: List[Dict[str, int]],
        feature_name: str = "fc6",
        conf_thresh: int = 0,
    ):
        """
        Post-process feature extraction from the detection model.

        :param output:
            output from the detection model
        :param im_scales:
            list of scales for the processed images
        :param im_infos:
            list of dicts containing width/height for images
        :param feature_name:
            which feature to extract for the image
        :param conf_thresh:
            threshold for bounding box scores (?)

        :return (feature_list, info_list):
            return list of processed image features, and list of information for each image
        """
        from maskrcnn_benchmark.layers import nms

        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros(scores.shape[0]).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater
                    # than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
                    cls_scores[keep],
                    max_conf[keep],
                )

            sorted_scores, sorted_indices = torch.sort(max_conf, descending=True)
            num_boxes = (sorted_scores[: self.num_features] != 0).sum()
            keep_boxes = sorted_indices[: self.num_features]
            feat_list.append(feats[i][keep_boxes])
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            # Predict the class label using the scores
            objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1)

            info_list.append(
                {
                    "bbox": bbox.cpu().numpy(),
                    "num_boxes": num_boxes.item(),
                    "objects": objects.cpu().numpy(),
                    "cls_prob": scores[keep_boxes][:, start_index:].cpu().numpy(),
                    "image_width": im_infos[i]["width"],
                    "image_height": im_infos[i]["height"],
                }
            )

        return feat_list, info_list