Ejemplo n.º 1
0
    def threshold_bbox(self, proposal_bbox_inst, thres=0.7, proposal_type="roih"):
        if proposal_type == "rpn":
            valid_map = proposal_bbox_inst.objectness_logits > thres

            # create instances containing boxes and gt_classes
            image_shape = proposal_bbox_inst.image_size
            new_proposal_inst = Instances(image_shape)

            # create box
            new_bbox_loc = proposal_bbox_inst.proposal_boxes.tensor[valid_map, :]
            new_boxes = Boxes(new_bbox_loc)

            # add boxes to instances
            new_proposal_inst.gt_boxes = new_boxes
            new_proposal_inst.objectness_logits = proposal_bbox_inst.objectness_logits[
                valid_map
            ]
        elif proposal_type == "roih":
            valid_map = proposal_bbox_inst.scores > thres

            # create instances containing boxes and gt_classes
            image_shape = proposal_bbox_inst.image_size
            new_proposal_inst = Instances(image_shape)

            # create box
            new_bbox_loc = proposal_bbox_inst.pred_boxes.tensor[valid_map, :]
            new_boxes = Boxes(new_bbox_loc)

            # add boxes to instances
            new_proposal_inst.gt_boxes = new_boxes
            new_proposal_inst.gt_classes = proposal_bbox_inst.pred_classes[valid_map]
            new_proposal_inst.scores = proposal_bbox_inst.scores[valid_map]

        return new_proposal_inst
Ejemplo n.º 2
0
def doit(raw_image, raw_boxes, predictor):
        # Process Boxes
    raw_boxes = Boxes(torch.from_numpy(raw_boxes).cuda())

    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        # print("Original image size: ", (raw_height, raw_width))

        # Preprocessing
        image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
        # print("Transformed image size: ", image.shape[:2])

        # Scale the box
        new_height, new_width = image.shape[:2]
        scale_x = 1. * new_width / raw_width
        scale_y = 1. * new_height / raw_height
        #print(scale_x, scale_y)
        boxes = raw_boxes.clone()
        boxes.scale(scale_x=scale_x, scale_y=scale_y)

        # ----
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)

        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)

        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [boxes]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        # print('Pooled features size:', feature_pooled.shape)

        # Predict classes        pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled) and boxes for each proposal.
        pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
            feature_pooled)
        pred_class_prob = nn.functional.softmax(pred_class_logits, -1)
        pred_scores, pred_classes = pred_class_prob[..., :-1].max(-1)

        attr_prob = pred_attr_logits[..., :-1].softmax(-1)
        max_attr_prob, max_attr_label = attr_prob.max(-1)

        # Detectron2 Formatting (for visualization only)
        roi_features = feature_pooled
        instances = Instances(
            image_size=(raw_height, raw_width),
            pred_boxes=raw_boxes,
            scores=pred_scores,
            pred_classes=pred_classes,
            attr_scores=max_attr_prob,
            attr_classes=max_attr_label
        )

        return instances, roi_features
def generate_poposals(images, model, score_threshold=0):
    inputs = [{
        "image":
        torch.as_tensor(image.astype("float32").transpose(2, 0, 1)),
        "height":
        image.shape[0],
        "width":
        image.shape[1]
    } for image in images]

    with torch.no_grad():
        images = model.preprocess_image(inputs)
        features = model.backbone(images.tensor)
        proposals, _ = model.proposal_generator(images, features, None)

        features_ = [features[f] for f in model.roi_heads.box_in_features]
        box_features = model.roi_heads.box_pooler(
            features_, [x.proposal_boxes for x in proposals])
        box_features = model.roi_heads.box_head(box_features)
        proposals_scores, proposals_deltas = model.roi_heads.box_predictor(
            box_features)

        boxes_tensors = model.roi_heads.box_predictor.predict_boxes(
            (proposals_scores, proposals_deltas), proposals)
        scores = model.roi_heads.box_predictor.predict_probs(
            (proposals_scores, proposals_deltas), proposals)

        result = []
        for i in range(len(inputs)):
            image_size = proposals[i].image_size
            num_bbox_reg_classes = boxes_tensors[i].shape[1] // 4
            boxes = Boxes(boxes_tensors[i].reshape(-1, 4))
            boxes.clip(image_size)
            boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)

            img_scores = scores[i][:, :-1]
            max_scores, pred_classes = torch.max(img_scores, dim=1)

            keep_mask = max_scores > score_threshold
            filtered_scores = img_scores[keep_mask, :]
            filtered_max_scores = max_scores[keep_mask]
            filtered_pred_classes = pred_classes[keep_mask]
            boxes = boxes[keep_mask, filtered_pred_classes, :]

            result_instance = Instances(image_size)
            result_instance.pred_boxes = Boxes(boxes)
            result_instance.scores = filtered_max_scores
            result_instance.pred_classes = filtered_pred_classes
            result_instance.class_distributions = filtered_scores
            result.append(result_instance)

    return result
    def get_instance(self, frame_id):
        if frame_id not in self.frame_ids:
            return None
        else:
            i = self.frame_ids.index(frame_id)
            if self.proposal_instances[i] is not None:
                return self.proposal_instances[i]
            else:
                # This has been a skipped frame ... interpolate box from the neighboring instances
                index_before = index_after = i
                while self.proposal_instances[
                        index_before] is None and index_before > 0:
                    index_before -= 1
                while self.proposal_instances[
                        index_after] is None and index_after < len(
                            self.proposal_instances):
                    index_after += 1
                instance_before = self.proposal_instances[index_before]
                instance_after = self.proposal_instances[index_after]

                interpolation_factor = (i - index_before) / (index_after -
                                                             index_before)

                interpolated_instance = Instances(instance_before.image_size)
                interpolated_instance.pred_boxes = Boxes(
                    instance_before.pred_boxes.tensor + interpolation_factor *
                    (instance_after.pred_boxes.tensor -
                     instance_before.pred_boxes.tensor))
                interpolated_instance.scores = torch.tensor([0])
                interpolated_instance.pred_classes = instance_before.pred_classes
                interpolated_instance.class_distributions = instance_before.class_distributions
                interpolated_instance.generation_process = ["I"]
                return interpolated_instance
Ejemplo n.º 5
0
def get_box_union(boxes: Boxes):
    """Merge all boxes into a single box"""
    if len(boxes) == 0:
        return boxes
    bt = boxes.tensor
    union_bt = torch.cat(
        (torch.min(bt[:, :2], 0).values, torch.max(bt[:, 2:], 0).values)
    ).reshape(1, -1)
    return Boxes(union_bt)
Ejemplo n.º 6
0
def get2d2box(box):

    xmin = min(box[0])
    xmax = max(box[0])
    ymin = min(box[1])
    ymax = max(box[1])

    return Boxes(
        torch.as_tensor([[xmin, ymin, xmax, ymax]],
                        dtype=torch.float32,
                        device='cuda'))
Ejemplo n.º 7
0
def get2d2boxes(boxes):
    list_boxes = []

    for box in boxes:
        xmin = min(box[0])
        xmax = max(box[0])
        ymin = min(box[1])
        ymax = max(box[1])
        list_boxes.append([xmin, ymin, xmax, ymax])

    return Boxes(
        torch.as_tensor(list_boxes, dtype=torch.float32, device='cuda'))
Ejemplo n.º 8
0
    def test_clip_area_0_degree(self):
        for _ in range(50):
            num_boxes = 100
            boxes_5d = torch.zeros(num_boxes, 5)
            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
            boxes_4d = torch.zeros(num_boxes, 4)
            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0

            image_size = (500, 600)
            test_boxes_4d = Boxes(boxes_4d)
            test_boxes_5d = RotatedBoxes(boxes_5d)
            # Before clip
            areas_4d = test_boxes_4d.area()
            areas_5d = test_boxes_5d.area()
            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
            # After clip
            test_boxes_4d.clip(image_size)
            test_boxes_5d.clip(image_size)
            areas_4d = test_boxes_4d.area()
            areas_5d = test_boxes_5d.area()
            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
Ejemplo n.º 9
0
 def highest_only(self, predict):
     instance = predict["instances"].to(self.cpu_device)
     image_size = instance.image_size
     get_scores = instance.get("scores")
     pred_classes_index = []
     if len(get_scores.tolist()) != 0:
         _, highest_index = torch.max(get_scores, 0)
         pred_classes_index.append(highest_index)
     pred_classes = self.tensor_transform(instance.get("pred_classes"),
                                          pred_classes_index)
     scores = self.tensor_transform(instance.get("scores"),
                                    pred_classes_index)
     pred_boxes = Boxes(
         self.tensor_transform(
             instance.get("pred_boxes").tensor, pred_classes_index))
     return Instances(image_size=image_size,
                      pred_boxes=pred_boxes,
                      scores=scores,
                      pred_classes=pred_classes)
Ejemplo n.º 10
0
    def generate_instance(self, instance: Instances, class_names: List[str], total_classes: List[str]):
        instance = instance.to('cpu')
        boxes = instance.pred_boxes.tensor.numpy()
        masks = None
        scores = instance.scores.numpy()

        if instance.has("pred_masks"):
            masks = instance.pred_masks.numpy()
        for index, name in enumerate(class_names):
            if name not in total_classes:
                boxes[index:index+1] = 0
                scores[index] = 0
                if masks is not None:
                    masks[index:index+1] = False

        instance.pred_boxes = Boxes(torch.from_numpy(boxes))
        if masks is not None:
            instance.pred_masks = torch.from_numpy(masks)
        instance.scores = torch.from_numpy(scores)
        return instance
Ejemplo n.º 11
0
def flaw_only(predict):
    '''
    预测结果中有正常元件,水印,瑕疵,在这里筛选出瑕疵信息,其他的去掉。
    :param predict: 模型的正常输出的预测结果,预测出了许多矩形框,包含了矩形框的位置和大小信息(用左上角和右下角坐标来表示),
                    矩形框预测的类别、分数,矩形框内部的mask(用只含bool类型的矩阵表示像素级别的mask),等等
    :return: 筛选完的预测结果
    '''
    cpu_device = torch.device("cpu")
    instance = predict["instances"].to(cpu_device)
    image_size = instance.image_size
    get_pred_classes = instance.get("pred_classes").numpy()
    pred_classes_index = []
    pred_classes = []
    for c in range(len(get_pred_classes)):
        if get_pred_classes[c] != 0 and get_pred_classes[c] != 1:
            pred_classes_index.append(c)
            pred_classes.append(get_pred_classes[c])
    pred_classes = torch.from_numpy(np.asarray(pred_classes))
    scores = tensor_transform(instance.get("scores"), pred_classes_index)
    pred_masks = tensor_transform(instance.get("pred_masks"), pred_classes_index)
    pred_boxes = Boxes(tensor_transform(instance.get("pred_boxes").tensor, pred_classes_index))
    return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes,
                     pred_masks=pred_masks)
Ejemplo n.º 12
0
 def flaw_only(self, predict):
     instance = predict["instances"].to(self.cpu_device)
     image_size = instance.image_size
     get_pred_classes = instance.get("pred_classes").numpy()
     pred_classes_index = []
     pred_classes = []
     for c in range(len(get_pred_classes)):
         if get_pred_classes[c] != 0 and get_pred_classes[c] != 1:
             pred_classes_index.append(c)
             pred_classes.append(get_pred_classes[c])
     pred_classes = torch.from_numpy(np.asarray(pred_classes))
     scores = self.tensor_transform(instance.get("scores"),
                                    pred_classes_index)
     pred_masks = self.tensor_transform(instance.get("pred_masks"),
                                        pred_classes_index)
     pred_boxes = Boxes(
         self.tensor_transform(
             instance.get("pred_boxes").tensor, pred_classes_index))
     return Instances(image_size=image_size,
                      pred_boxes=pred_boxes,
                      scores=scores,
                      pred_classes=pred_classes,
                      pred_masks=pred_masks)
    def project_proposal_instance(self, frame_index):
        if len(self.proposal_instances) == 1:
            # We consider the first frame, there is nothing to project here
            return self.proposal_instances[0]

        instance_index_current = self.last_key_instance_index[-1]
        instance_index_before = self.last_key_instance_index[-2]
        frame_index_current = self.frame_ids[instance_index_current]

        assert frame_index > frame_index_current
        if frame_index == frame_index_current:
            # We are replacing the most recent proposal instance
            if len(self.proposal_instances) < 3:
                instance_index_current = self.last_key_instance_index[-2]
                return self.proposal_instances[instance_index_current]
            else:
                instance_index_current = self.last_key_instance_index[-2]
                instance_index_before = self.last_key_instance_index[-3]
                frame_index_current = self.frame_ids[instance_index_current]

        instance_current = self.proposal_instances[instance_index_current]
        instance_before = self.proposal_instances[instance_index_before]

        centers_current = instance_current.pred_boxes.get_centers()
        centers_before = instance_before.pred_boxes.get_centers()
        centers_delta = (centers_current - centers_before) / (
            instance_index_current -
            instance_index_before) * (frame_index - frame_index_current)

        projected_instance = Instances(instance_current.image_size)
        projected_instance.pred_boxes = Boxes(
            instance_current.pred_boxes.tensor + centers_delta.repeat(1, 2))
        projected_instance.scores = instance_current.scores
        projected_instance.pred_classes = instance_current.pred_classes
        projected_instance.class_distributions = instance_current.class_distributions
        projected_instance.generation_process = ["P"]
        return projected_instance
Ejemplo n.º 14
0
    def draw_instance_predictions(self, predictions, category=None):
        """
        Draw instance-level prediction results on an image.

        Args:
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
            category: the integer category for the desired annotation to display as a list or None if all of them

        Returns:
            output (VisImage): image object with visualizations.
        """

        # start additional code
        if category == None:
            boxes = predictions.pred_boxes if predictions.has(
                "pred_boxes") else None
            scores = predictions.scores if predictions.has("scores") else None
            classes = predictions.pred_classes if predictions.has(
                "pred_classes") else None
            labels = self._create_text_labels(
                classes, scores, self.metadata.get("thing_classes", None))
            keypoints = predictions.pred_keypoints if predictions.has(
                "pred_keypoints") else None
        else:
            all_boxes = predictions.pred_boxes if predictions.has(
                "pred_boxes") else None
            all_scores = predictions.scores if predictions.has(
                "scores") else None
            all_classes = predictions.pred_classes if predictions.has(
                "pred_classes") else None
            all_labels = self._create_text_labels(
                all_classes, all_scores,
                self.metadata.get("thing_classes", None))
            all_keypoints = predictions.pred_keypoints if predictions.has(
                "pred_keypoints") else None

            boxes = [] if all_boxes != None else None
            scores = [] if all_scores != None else None
            classes = [] if all_classes != None else None
            labels = [] if all_labels != None else None
            keypoints = [] if all_keypoints != None else None

            for c in category:
                for i in range(0, len(all_classes)):
                    if all_classes[i] == c:
                        classes.append(all_classes[i])

                        if all_boxes != None:
                            boxes.append(all_boxes[i])
                        if all_scores != None:
                            scores.append(all_scores[i])
                        if all_labels != None:
                            labels.append(all_labels[i])
                        if all_keypoints != None:
                            keypoints.append(all_keypoints[i])

            if boxes != None and len(boxes) > 0:
                boxes = Boxes(torch.cat([b.tensor for b in boxes], dim=0))
            if scores != None and len(scores) > 0:
                scores = torch.stack(scores)
            if classes != None and len(classes) > 0:
                classes = torch.stack(classes)
        # end additional code

        # removed alpha from here and put it as fixed value
        if predictions.has("pred_masks"):
            masks = np.asarray(predictions.pred_masks)
            masks = [
                GenericMask(x, self.output.height, self.output.width)
                for x in masks
            ]
        else:
            masks = None
        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                "thing_colors"):
            colors = [
                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
                for c in classes
            ]
        else:
            colors = None
        if self._instance_mode == ColorMode.IMAGE_BW:
            self.output.img = self._create_grayscale_image(
                (predictions.pred_masks.any(dim=0) > 0).numpy())

        self.overlay_instances(labels=labels,
                               boxes=boxes,
                               masks=masks,
                               keypoints=keypoints,
                               assigned_colors=colors,
                               alpha=1)
        return self.output
Ejemplo n.º 15
0
def to_boxes_from_xywh(bbox_xywh: torch.Tensor) -> torch.Tensor:
    return Boxes(get_bbox_xyxy_from_xywh(bbox_xywh).unsqueeze(0))
Ejemplo n.º 16
0
        for i in range(dims):
            shape.append(struct.unpack("=i", f.read(4))[0])
        count = np.prod(shape)
        data = []
        for i in range(count):
            data.append(struct.unpack("=f", f.read(4))[0])

        return np.asarray(data, dtype=np.float32).reshape(shape)


if __name__ == '__main__':
    priorbox0 = OriginPriorBox().forward().numpy()
    print(priorbox0, priorbox0.shape)

    # priorbox1 = load_priors('/media/ps/A1/XPC/data/CCPD/ccpd_rotate_coco/output/model_0335999.anc')
    # print(priorbox1, priorbox1.shape)
    fmap = [
        torch.randn(1, 3, 100, 100),
        torch.randn(1, 3, 50, 50),
        torch.randn(1, 3, 25, 25),
    ]
    dag = DefaultAnchorGenerator(
        sizes=[[16, 32], [64, 128], [256, 512]],
        aspect_ratios=[[1.0]],
        strides=[8, 16, 32],
        offset=0.5,
    )
    anc = dag(fmap)
    anc = Boxes.cat(anc).tensor.detach().cpu().numpy()
    print(anc, anc.shape)
Ejemplo n.º 17
0
def _geometric_aug_func(x,
                        target,
                        angle=0,
                        translate=(0, 0),
                        scale=1,
                        shear=(0, 0),
                        hflip=False,
                        boxes_sample_prob=[],
                        scale_ratio=1.0):
    use_mask = ('gt_masks' in target)

    boxes_and_labels = [(target['gt_boxes'].tensor[i], target['gt_classes'][i],
                         target['gt_masks'].polygons[i] if use_mask else None)
                        for i in range(len(target['gt_boxes']))
                        if random.random() < boxes_sample_prob[i]]
    boxes = [b_and_l[0] for b_and_l in boxes_and_labels]
    labels = [b_and_l[1] for b_and_l in boxes_and_labels]
    masks = [b_and_l[2] for b_and_l in boxes_and_labels]

    if random.random() < 0.5:
        angle *= -1
        translate = (-translate[0], -translate[1])
        shear = (-shear[0], -shear[1])

    translate = (0, 0)
    height, width = x.shape[1], x.shape[2]

    x_crops = []
    boxes_crops = []
    boxes_new = []
    labels_new = []
    masks_new = []
    for i, box in enumerate(boxes):
        box_crop = scale_area(box, height, width, scale_ratio)
        y1, x1, y2, x2 = box_crop.long()

        x_crop = x[:, x1:x2, y1:y2]
        boxes_crops.append(box_crop)

        if x1 >= x2 or y1 >= y2:
            x_crops.append(x_crop)
            continue

        if hflip:
            x_crop = x_crop.flip(-1)
        elif translate[0] + translate[1] != 0:
            offset_y = (y2 + translate[0]).clamp(0, width).long().tolist() - y2
            offset_x = (x2 + translate[1]).clamp(0,
                                                 height).long().tolist() - x2
            if offset_x != 0 or offset_y != 0:
                offset = [offset_y, offset_x]
                boxes_new.append(box + torch.Tensor(offset * 2))
                labels_new.append(labels[i])
                if use_mask:
                    polys = masks[i]
                    polys_out = []
                    for poly in polys:
                        poly_new = copy.deepcopy(poly)
                        poly_new[0::2] = poly_new[0::2] + offset_y
                        poly_new[1::2] = poly_new[1::2] + offset_x
                        polys_out.append(poly_new)
                    masks_new.append(polys_out)
        else:
            x_crop = transforms.functional.to_pil_image(x_crop.cpu())
            x_crop = transforms.functional.affine(
                x_crop,
                angle,
                translate,
                scale,
                shear,
                resample=2,
                fillcolor=tuple([int(i) for i in pixel_mean]))
            x_crop = transforms.functional.to_tensor(x_crop).to(x.device)
        x_crops.append(x_crop)
    y = _transform(x, x_crops, boxes_crops, translate)

    if translate[0] + translate[1] != 0 and len(boxes_new) > 0:
        target['gt_boxes'] = Boxes(
            torch.cat((target['gt_boxes'], torch.stack(boxes_new))))
        target['gt_classes'] = torch.cat(
            (target['gt_classes'], torch.Tensor(labels_new).long()))
        if use_mask:
            target['gt_masks'] = PolygonMasks(target['gt_masks'].polygons +
                                              masks_new)

    return y, target
Ejemplo n.º 18
0
def vis_training_targets(cfg, fcose_outputs, image_list, idx=0):
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    import numpy as np

    colors = np.array([[1, 1, 198],
                       [51, 1, 148],
                       [101, 1, 98],
                       [151, 1, 48],
                       [201, 1, 8]]) / 255.

    num_loc_list = [len(loc) for loc in fcose_outputs.locations]
    fcose_outputs.num_loc_list = num_loc_list

    # compute locations to size ranges
    loc_to_size_range = []
    for l, loc_per_level in enumerate(fcose_outputs.locations):
        loc_to_size_range_per_level = loc_per_level.new_tensor(fcose_outputs.sizes_of_interest[l])
        loc_to_size_range.append(
            loc_to_size_range_per_level[None].expand(num_loc_list[l], -1)
        )

    # (Sigma_{levels_points}, 2)
    loc_to_size_range = torch.cat(loc_to_size_range, dim=0)
    locations = torch.cat(fcose_outputs.locations, dim=0)

    training_targets = fcose_outputs.compute_targets_for_locations(
        locations, fcose_outputs.gt_instances, loc_to_size_range
    )

    training_target = {k: v[idx] for k, v in training_targets.items()}

    fig, ax = plt.subplots(1, figsize=(20, 10))
    fig.tight_layout()

    labels = training_target['labels']
    reg_targets = training_target['reg_targets']
    ext_targets = training_target['ext_targets']

    idxOfloc_of_interest = torch.where(labels != 20)[0]

    global locxys, reg_targets_oi, ext_targets_oi, detections

    locxys = locations[idxOfloc_of_interest]

    reg_targets_oi = reg_targets[idxOfloc_of_interest]
    ext_targets_oi = ext_targets[idxOfloc_of_interest]

    detections = torch.stack([
        locxys[:, 0] - reg_targets_oi[:, 0],
        locxys[:, 1] - reg_targets_oi[:, 1],
        locxys[:, 0] + reg_targets_oi[:, 2],
        locxys[:, 1] + reg_targets_oi[:, 3],
    ], dim=1)

    global tmp, ext_points

    ext_points = ExtremePoints.from_boxes(Boxes(detections),
                                          ext_targets_oi,
                                          locxys).tensor.cpu().numpy()

    tmp = ext_points

    im = image_list.tensor[idx]
    pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(im.device).view(-1, 1, 1)
    pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(im.device).view(-1, 1, 1)
    im_norm = ((im * pixel_std) + pixel_mean).cpu().numpy().transpose(1, 2, 0).astype(np.uint8)

    ax.imshow(im_norm)
    locxys_np = locxys.cpu().numpy()
    reg_targets_oi_np = reg_targets_oi.cpu().numpy()
    ext_targets_oi_np = ext_targets_oi.cpu().numpy()
    detections_np = detections.cpu().numpy()

    for i in range(len(locxys_np)):
        ax.scatter(locxys_np[i, 0], locxys_np[i, 1], color=colors[i % len(colors)].tolist(), marker='*')
        x1, y1, x2, y2 = detections_np[i, :]

        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor=colors[i % len(colors)].tolist(),
                                 facecolor='none', fill=False)
        ax.add_patch(rect)

        ax.scatter(ext_points[i][:, 0], ext_points[i][:, 1], color=colors[i % len(colors)].tolist(), marker='+')

    plt.show()
Ejemplo n.º 19
0
def to_boxes_from_xywh(bbox_xywh):
    return Boxes(get_bbox_xyxy_from_xywh(bbox_xywh).unsqueeze(0))
Ejemplo n.º 20
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances (optional): groundtruth :class:`Instances`
                * proposals (optional): :class:`Instances`, precomputed proposals.

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "instances" whose value is a :class:`Instances`.
                The :class:`Instances` object has the following keys:
                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
        """
        if not self.training:
            return self.inference(batched_inputs)

        images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)

        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances)
        _, outputs_classic, outputs = self.tsd(images, features, proposals,
                                               gt_instances)

        detector_classic_losses = outputs_classic.losses()
        detector_losses = outputs.losses()
        detector_classic_losses[
            'loss_cls_classic'] = detector_classic_losses.pop('loss_cls')
        detector_classic_losses[
            'loss_box_reg_classic'] = detector_classic_losses.pop(
                'loss_box_reg')

        if self.vis_period > 0:
            storage = get_event_storage()
            if storage.iter % self.vis_period == 0:
                self.visualize_training(batched_inputs, proposals)

        # Progressive constraints
        margin_regression_losses = 0
        predict_boxes_classic = outputs_classic.predict_boxes_for_gt_classes()
        predict_boxes = outputs.predict_boxes_for_gt_classes()

        idx = -1
        endIdx = 0
        ind = outputs.gt_classes != (outputs.pred_proposal_deltas.size(1) / 4)
        for pbc, pb in zip(predict_boxes_classic, predict_boxes):
            idx += 1
            startIdx = endIdx
            endIdx += outputs.num_preds_per_image[idx]
            iind = ind[startIdx:endIdx]
            margin_regression_losses += F.relu(self.MR - abs(
                matched_boxlist_iou(Boxes(pbc[iind]),
                                    outputs.gt_boxes[startIdx:endIdx][iind]) -
                matched_boxlist_iou(Boxes(pb[iind]), outputs.
                                    gt_boxes[startIdx:endIdx][iind]))).mean()
        margin_regression_losses = margin_regression_losses / len(
            predict_boxes)

        margin_classification_losses = 0
        for ppc, pc in zip(outputs_classic.predict_probs(),
                           outputs.predict_probs()):
            margin_classification_losses += F.relu(self.MC -
                                                   (abs(ppc -
                                                        pc)).sum(1)).mean()
        margin_classification_losses = margin_classification_losses / len(
            outputs.predict_probs())

        losses = {}
        losses.update(detector_classic_losses)
        losses.update(detector_losses)
        losses.update(proposal_losses)
        losses.update({
            'loss_margin_classification': margin_classification_losses,
            'loss_margin_regression': margin_regression_losses
        })
        return losses
    def forward(self, features, all_phrase_ids, targets, precomp_boxes, precomp_score,
                precomp_det_label, image_scale, all_sent_sgs, all_sentences, image_unique_id, det_label_embedding):

        """
        :param obj_proposals: proposal from each images
        :param features: features maps from the backbone
        :param target: gt relation labels
        :param object_vocab, object_vocab_len [[xxx,xxx],[xxx],[xxx]], [2,1,1]
        :param sent_sg: sentence scene graph
        :return: prediction, loss

        note that first dimension is images
        """
        img_num_per_gpu = len(features)

        batch_decode_logits = []
        batch_topk_decoder_logits = []
        batch_pred_similarity = []
        batch_precomp_boxes = []
        batch_topk_precomp_boxes=[]
        batch_pred_boxes = []
        batch_topk_pred_boxes = []
        batch_topk_fusion_pred_boxes = []
        batch_topk_pred_similarity = []
        batch_topk_fusion_similarity = []
        batch_boxes_targets = []
        batch_ctx_embed = []
        batch_ctx_s1_embed = []

        batch_pred_targets = []
        batch_topk_pred_targets = []


        """ Language Embedding"""
        batch_phrase_ids, batch_phrase_types, batch_phrase_embed, batch_phrase_len, \
        batch_phrase_dec_ids, batch_phrase_mask, batch_decoder_word_embed, batch_phrase_glove_embed, batch_rel_phrase_embed, batch_relation_conn, batch_sent_embed,\
        batch_decoder_rel_word_embed, batch_rel_mask, batch_rel_dec_idx = self.phrase_embed(all_sentences, all_phrase_ids, all_sent_sgs)

        h, w = features.shape[-2:]

        # self.storage = get_event_storage()


        for bid in range(img_num_per_gpu):

            """ Visual Embedding """
            precomp_boxes_bid = precomp_boxes[bid].to(self.device)  ## 100*4

            order = []
            for phr_ids in batch_phrase_ids[bid]:
                order.append(all_phrase_ids[bid].index(phr_ids))
            target_filter = targets[bid][np.array(order)]
            batch_boxes_targets.append(target_filter.to(self.device))
            batch_precomp_boxes.append(precomp_boxes_bid)

            img_feat_bid = features[[bid]]
            visual_features_bid = self.rcnn_top(self.det_roi_pooler([img_feat_bid], [precomp_boxes_bid])).mean(dim=[2, 3]).contiguous()
            if cfg.MODEL.VG.SPATIAL_FEAT:
                spa_feat = meshgrid_generation(h, w)
                spa_feat = self.det_roi_pooler([spa_feat], [precomp_boxes_bid]).view(visual_features_bid.shape[0], -1)
                spa_feat = self.spatial_trans(spa_feat)
                visual_features_bid = torch.cat((visual_features_bid, spa_feat), dim=1)

            visual_features_bid = self.visual_embedding(visual_features_bid)
            visual_features_bid = self.vis_batchnorm(visual_features_bid)

            """ Noun Phrase embedding """
            phrase_embed_bid = batch_phrase_embed[bid]
            if phrase_embed_bid.shape[0] == 1 and self.training:
                phrase_embed_bid = self.phr_batchnorm(phrase_embed_bid.repeat(2,1))[[0]]
            else:
                phrase_embed_bid = self.phr_batchnorm(phrase_embed_bid)


            """ Similarity and attention prediction """
            num_box = precomp_boxes_bid.tensor.size(0)
            num_phrase = phrase_embed_bid.size(0)
            phr_inds, obj_inds = self.make_pair(num_phrase, num_box)
            pred_similarity_bid, pred_targets_bid = self.similarity(visual_features_bid, phrase_embed_bid, obj_inds, phr_inds)
            pred_similarity_bid = pred_similarity_bid.reshape(num_phrase, num_box)
            pred_targets_bid = pred_targets_bid.reshape(num_phrase, num_box, 4)
            batch_pred_targets.append(pred_targets_bid)


            if cfg.MODEL.VG.USING_DET_KNOWLEDGE :
                det_label_embedding_bid = det_label_embedding[bid].to(self.device)
                sim = self.cal_det_label_sim_max(det_label_embedding_bid, batch_phrase_glove_embed[bid])
                pred_similarity_bid = pred_similarity_bid * sim
                sim_mask = (sim > 0).float()
                atten_bid = numerical_stability_masked_softmax(pred_similarity_bid, sim_mask, dim=1)
            else:
                atten_bid = F.softmax(pred_similarity_bid, dim=1)

            ## reconstruction visual features
            visual_reconst_bid = torch.mm(atten_bid, visual_features_bid)
            decode_phr_logits = self.phrase_decoder(visual_reconst_bid, batch_decoder_word_embed[bid])
            batch_decode_logits.append(decode_phr_logits)

            atten_score_topk, atten_ranking_topk = torch.topk(atten_bid, dim=1, k=self.s2_topk) ## (N, 10)
            ind_phr_topk = np.arange(num_phrase).repeat(self.s2_topk)


            ## -----------------------------------------------------##
            ## crop 2st features
            ## -----------------------------------------------------##

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                visual_features_topk_bid = visual_features_bid[atten_ranking_topk.reshape(-1)]
                precomp_boxes_topk_bid = precomp_boxes_bid[atten_ranking_topk.reshape(-1)]
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
            else:
                topk_box_ids = atten_ranking_topk.reshape(-1) + torch.as_tensor(ind_phr_topk, dtype=torch.long).to(self.device)*num_box
                precomp_boxes_tensor, box_size = precomp_boxes_bid.tensor, precomp_boxes_bid.size
                precomp_boxes_topk_tensor = precomp_boxes_tensor[atten_ranking_topk.reshape(-1)]  ## (N*10, 4)
                pred_targets_s0 = pred_targets_bid.view(-1, 4)[topk_box_ids]
                precomp_boxes_topk_bid = self.box2box_translation.apply_deltas(pred_targets_s0, precomp_boxes_topk_tensor)
                precomp_boxes_topk_bid = Boxes(precomp_boxes_topk_bid, box_size)
                precomp_boxes_topk_bid.clip()
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
                visual_features_topk_bid = self.rcnn_top(self.det_roi_pooler([img_feat_bid], [precomp_boxes_topk_bid])).mean(dim=[2, 3]).contiguous()

                if cfg.MODEL.VG.SPATIAL_FEAT:
                    spa_feat = meshgrid_generation(h, w)
                    spa_feat = self.det_roi_pooler([spa_feat], [precomp_boxes_topk_bid]).view(visual_features_topk_bid.shape[0], -1)
                    spa_feat = self.spatial_trans(spa_feat)
                    visual_features_topk_bid = torch.cat((visual_features_topk_bid, spa_feat), dim=1)

                visual_features_topk_bid = self.visual_embedding(visual_features_topk_bid)## (N*10, 1024)
                visual_features_topk_bid = self.vis_batchnorm(visual_features_topk_bid)


            pred_similarity_topk_bid, pred_targets_topk_bid = self.similarity_topk(visual_features_topk_bid, phrase_embed_bid, ind_phr_topk)
            pred_similarity_topk_bid = pred_similarity_topk_bid.reshape(num_phrase, self.s2_topk)
            pred_targets_topk_bid = pred_targets_topk_bid.reshape(num_phrase, self.s2_topk, 4)
            batch_topk_pred_targets.append(pred_targets_topk_bid)


            if cfg.MODEL.VG.USING_DET_KNOWLEDGE:
                sim_topk = torch.gather(sim, dim=1, index=atten_ranking_topk.long())
                sim_mask = (sim_topk>0).float()
                pred_similarity_topk_bid = pred_similarity_topk_bid * sim_topk
                atten_topk_bid = numerical_stability_masked_softmax(pred_similarity_topk_bid, sim_mask, dim=1)
            else:
                atten_topk_bid = F.softmax(pred_similarity_topk_bid, dim=1)

            atten_fusion = atten_topk_bid * atten_score_topk  ## N*10
            visual_features_topk_bid = visual_features_topk_bid.view(num_phrase, self.s2_topk, -1)
            visual_reconst_topk_bid = (atten_fusion.unsqueeze(2)*visual_features_topk_bid).sum(1) ## N*1024
            decoder_phr_topk_logits = self.phrase_decoder(visual_reconst_topk_bid, batch_decoder_word_embed[bid])
            batch_topk_decoder_logits.append(decoder_phr_topk_logits)


            ## construct the discriminative loss
            batch_ctx_s1_embed.append(self.visual_mlp(visual_reconst_bid.mean(0, keepdim=True)))
            batch_ctx_embed.append(self.visual_mlp(visual_reconst_topk_bid.mean(0, keepdim=True)))


            batch_pred_similarity.append(atten_bid)
            batch_topk_pred_similarity.append(atten_topk_bid)
            batch_topk_fusion_similarity.append(atten_fusion)

            ### transform boxes for stage-1
            num_phrase_indices = torch.arange(num_phrase).long().to(self.device)
            max_box_ind = atten_bid.detach().cpu().numpy().argmax(1)
            precomp_boxes_delta_max = pred_targets_bid[num_phrase_indices, max_box_ind] ## numPhrase*4

            max_topk_id = torch.topk(atten_topk_bid, dim=1, k=1)[1].long().squeeze(1)
            precomp_boxes_delta_max_topk = pred_targets_topk_bid[num_phrase_indices, max_topk_id]  ## num_phrase*4
            precomp_boxes_topk_bid_tensor = precomp_boxes_topk_bid.tensor.reshape(-1, self.s2_topk, 4)

            max_fusion_topk_id = torch.topk(atten_fusion, dim=1, k=1)[1].long().squeeze()
            precomp_boxes_delta_max_topk_fusion = pred_targets_topk_bid[num_phrase_indices, max_fusion_topk_id]  ## num_phrase*4

            phr_index = torch.arange(num_phrase).to(self.device) * self.s2_topk

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                max_select_boxes = precomp_boxes_bid[max_box_ind]
                max_precomp_boxes = precomp_boxes_topk_bid[max_topk_id + phr_index]
                max_fusion_precomp_boxes = precomp_boxes_topk_bid[max_fusion_topk_id + phr_index]
            else:
                max_select_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max, precomp_boxes_bid[max_box_ind].tensor), precomp_boxes_bid.size)
                max_precomp_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max_topk, precomp_boxes_topk_bid_tensor[num_phrase_indices, max_topk_id]), precomp_boxes_bid.size)
                max_fusion_precomp_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max_topk_fusion, precomp_boxes_topk_bid_tensor[num_phrase_indices, max_fusion_topk_id]), precomp_boxes_bid.size)

            batch_pred_boxes.append(max_select_boxes)
            batch_topk_pred_boxes.append(max_precomp_boxes)
            batch_topk_fusion_pred_boxes.append(max_fusion_precomp_boxes)


        batch_ctx_sim, batch_ctx_sim_s1 = self.generate_image_sent_discriminative(batch_sent_embed, batch_ctx_embed, batch_ctx_s1_embed)

        noun_reconst_loss, noun_topk_reconst_loss, disc_img_sent_loss_s1, disc_img_sent_loss_s2,  reg_loss, \
        reg_loss_s1 = self.VGLoss(batch_phrase_mask, batch_decode_logits, batch_topk_decoder_logits, batch_phrase_dec_ids,
                                  batch_ctx_sim, batch_ctx_sim_s1, batch_pred_similarity, batch_topk_pred_similarity, batch_boxes_targets, batch_precomp_boxes,
                                  batch_pred_targets, batch_topk_pred_targets,
                                  batch_topk_precomp_boxes)

        all_loss = dict(noun_reconst_loss=noun_reconst_loss, noun_topk_reconst_loss=noun_topk_reconst_loss, disc_img_sent_loss_s1=disc_img_sent_loss_s1,
                        disc_img_sent_loss_s2=disc_img_sent_loss_s2, reg_loss_s1=reg_loss, reg_loss_s2=reg_loss_s1)


        if self.training:
            return all_loss, None
        else:
            return all_loss, (batch_phrase_ids, batch_phrase_types, move2cpu(batch_pred_boxes), move2cpu(batch_pred_similarity),
                              move2cpu(batch_boxes_targets), move2cpu(batch_precomp_boxes), image_unique_id, move2cpu(batch_topk_pred_similarity),
                              move2cpu(batch_topk_fusion_similarity), move2cpu(batch_topk_pred_boxes), move2cpu(batch_topk_fusion_pred_boxes),
                              move2cpu(batch_topk_precomp_boxes), move2cpu(batch_topk_pred_targets), move2cpu(batch_pred_targets))
Ejemplo n.º 22
0
    def __call__(self, tensor, target):
        if self.ratio >= 1.0:
            return tensor, target

        self.img_pool.append({'tensor': tensor, 'target': target})

        if len(self.img_pool) > self.img_pool_size:
            self.img_pool.pop(0)

        if len(self.img_pool) < 4:
            return tensor, target

        use_mask = ('gt_masks' in target)

        bbox = target['gt_boxes']
        classes = target['gt_classes']
        masks = target['gt_masks'] if use_mask else None

        c, h, w = tensor.shape
        h = int(math.ceil(h / self.size_divisible) * self.size_divisible)
        w = int(math.ceil(w / self.size_divisible) * self.size_divisible)

        new_h, new_w = int(self.ratio * h), int(self.ratio * w)
        in_tensor, in_bbox, in_mask = scale_jitter(tensor, bbox, self.ratio,
                                                   (new_h, new_w), masks)

        pad_imgs = random.sample(self.img_pool, 3)
        pad_tensors, pad_bboxes, pad_masks = [], [], []
        for img in pad_imgs:
            pad_tensor, pad_bbox, pad_mask = scale_jitter(
                img['tensor'], img['target']['gt_boxes'], self.ratio,
                (new_h, new_w),
                img['target']['gt_masks'] if use_mask else None)
            pad_tensors.append(pad_tensor)
            pad_bboxes.append(pad_bbox)
            pad_masks.append(pad_mask)

        crop_boxes = [(new_h, w - new_w), (h - new_h, new_w),
                      (h - new_h, w - new_w)]

        tensor_out = in_tensor.new(*(c, h, w)).zero_()
        tensor_out[:c, :new_h, :new_w].copy_(in_tensor)
        tensor_out[:c, :new_h, new_w:].copy_(
            pad_tensors[0][:c, :crop_boxes[0][0], :crop_boxes[0][1]])
        tensor_out[:c, new_h:, :new_w].copy_(
            pad_tensors[1][:c, :crop_boxes[1][0], :crop_boxes[1][1]])
        tensor_out[:c, new_h:, new_w:].copy_(
            pad_tensors[2][:c, :crop_boxes[2][0], :crop_boxes[2][1]])

        crop_bboxes, crop_classes, crop_masks = [], [], []
        for i, pad_bbox in enumerate(pad_bboxes):
            crop_bbox = copy.deepcopy(pad_bbox)
            crop_bbox.clip(crop_boxes[i])
            ious = crop_bbox.area() / pad_bbox.area()
            inds = ious >= self.iou_threshold
            crop_bbox = crop_bbox[inds]
            crop_bboxes.append(crop_bbox)
            crop_classes.append(pad_imgs[i]['target']['gt_classes'][inds])
            if use_mask:
                crop_masks.append(
                    [mask for j, mask in enumerate(pad_masks[i]) if inds[j]])

        offsets_box = [
            torch.Tensor([0.0, 0.0, 0.0, 0.0]),
            torch.Tensor([new_w, 0.0, new_w, 0.0]),
            torch.Tensor([0.0, new_h, 0.0, new_h]),
            torch.Tensor([new_w, new_h, new_w, new_h])
        ]
        offsets_mask = [[0.0, 0.0], [0.0, new_w], [new_h, 0], [new_h, new_w]]
        bbox_out = Boxes(
            torch.cat([
                target.tensor + offsets_box[i]
                for i, target in enumerate([in_bbox] + crop_bboxes)
            ],
                      dim=0))
        classes_out = torch.cat([classes] + crop_classes, dim=0)
        target_out = {'gt_boxes': bbox_out, 'gt_classes': classes_out}

        if use_mask:
            masks_out = []
            for i, crop_mask in enumerate([in_mask] + crop_masks):
                mask_out = []
                for polys in crop_mask:
                    poly_out = []
                    for poly in polys:
                        poly_new = copy.deepcopy(poly)
                        poly_new[0::2] = poly_new[0::2] + offsets_mask[i][1]
                        poly_new[1::2] = poly_new[1::2] + offsets_mask[i][0]
                        poly_out.append(poly_new)
                    mask_out.append(poly_out)

                masks_out += mask_out
            masks_out = PolygonMasks(masks_out)
            target_out['gt_masks'] = masks_out

        return tensor_out, target_out