コード例 #1
0
    def test_targets_to_anchors(self):
        boxes = torch.zeros((0, 4), dtype=torch.float32)
        negative_target = {"boxes": boxes,
                           "labels": torch.zeros((1, 1), dtype=torch.int64),
                           "image_id": 4,
                           "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
                           "iscrowd": torch.zeros((0,), dtype=torch.int64)}

        anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)]
        targets = [negative_target]

        anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
        aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(
            anchor_sizes, aspect_ratios
        )
        rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0])

        head = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            0.5, 0.3,
            256, 0.5,
            2000, 2000, 0.7)

        labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets)

        self.assertEqual(labels[0].sum(), 0)
        self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]]))
        self.assertEqual(labels[0].dtype, torch.float32)

        self.assertEqual(matched_gt_boxes[0].sum(), 0)
        self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape)
        self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
コード例 #2
0
    def test_targets_to_anchors(self):
        _, targets = self._make_empty_sample()
        anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)]

        anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
        aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(
            anchor_sizes, aspect_ratios
        )
        rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0])

        head = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            0.5, 0.3,
            256, 0.5,
            2000, 2000, 0.7, 0.05)

        labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets)

        self.assertEqual(labels[0].sum(), 0)
        self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]]))
        self.assertEqual(labels[0].dtype, torch.float32)

        self.assertEqual(matched_gt_boxes[0].sum(), 0)
        self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape)
        self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
コード例 #3
0
	def __init__(self):
		super(RPN, self).__init__()
		# Define FPN
		anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
		aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
		# Generate anchor boxes
		anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
		# Define RPN Head
		# rpn_head = RPNHead(256, 9)
		rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
		# RPN parameters,
		rpn_pre_nms_top_n_train = 2000
		rpn_pre_nms_top_n_test = 1000
		rpn_post_nms_top_n_train = 2000
		rpn_post_nms_top_n_test = 1000
		rpn_nms_thresh = 0.7
		rpn_fg_iou_thresh = 0.7
		rpn_bg_iou_thresh = 0.3
		rpn_batch_size_per_image = 256
		rpn_positive_fraction = 0.5

		rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
								 testing=rpn_pre_nms_top_n_test)
		rpn_post_nms_top_n = dict(
			training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

		# Create RPN
		self.rpn = RegionProposalNetwork(
			anchor_generator, rpn_head,
			rpn_fg_iou_thresh, rpn_bg_iou_thresh,
			rpn_batch_size_per_image, rpn_positive_fraction,
			rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
コード例 #4
0
    def __init__(self,
                 backbone_out_channels,
                 rpn_fg_iou_thresh=0.7,
                 rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256,
                 rpn_positive_fraction=0.5,
                 rpn_nms_thresh=0.7,
                 rpn_pre_nms_top_n_train=2000,
                 rpn_pre_nms_top_n_test=1000,
                 rpn_post_nms_top_n_train=2000,
                 rpn_post_nms_top_n_test=1000):
        super(RPN, self).__init__()

        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        rpn_head = RPNHead(backbone_out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        self.rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)
コード例 #5
0
def get_faster_rcnn(n_classes: int):
    faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)

    anchor_generator = AnchorGenerator(sizes=tuple([(16, 32, 64, 128, 256)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.75, 0.5, 1.25) for _ in range(5)
                                       ]))

    rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])

    faster_rcnn.rpn = RegionProposalNetwork(anchor_generator=anchor_generator,
                                            head=rpn_head,
                                            fg_iou_thresh=0.7,
                                            bg_iou_thresh=0.3,
                                            batch_size_per_image=48,
                                            positive_fraction=0.5,
                                            pre_nms_top_n=dict(training=200,
                                                               testing=100),
                                            post_nms_top_n=dict(training=160,
                                                                testing=80),
                                            nms_thresh=0.7)

    in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features
    faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, n_classes)
    faster_rcnn.roi_heads.fg_bg_sampler.batch_size_per_image = 24
    faster_rcnn.roi_heads.fg_bg_sampler.positive_fraction = 0.5

    return faster_rcnn
コード例 #6
0
    def _init_test_rpn(self):
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        out_channels = 256
        rpn_head = RPNHead(out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5
        rpn_pre_nms_top_n = dict(training=2000, testing=1000)
        rpn_post_nms_top_n = dict(training=2000, testing=1000)
        rpn_nms_thresh = 0.7
        rpn_score_thresh = 0.0

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)
        return rpn
コード例 #7
0
    def __init__(self, backbone,
                 dope_roi_pool, dope_head, dope_predictor,
                 # transform parameters
                 min_size=800, max_size=1333,
                 image_mean=None, image_std=None,
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
                 # others
                 num_anchor_poses = {'body': 20, 'hand': 10, 'face': 10},
                 pose2d_reg_weights = {part: 5.0 for part in parts},
                 pose3d_reg_weights = {part: 5.0 for part in parts},
                ):
                
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(dope_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(
                anchor_sizes, aspect_ratios
            )
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
        
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)

        dope_heads = Dope_RoIHeads(dope_roi_pool, dope_head, dope_predictor, num_anchor_poses, pose2d_reg_weights=pose2d_reg_weights, pose3d_reg_weights=pose3d_reg_weights)
            
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = Dope_Transform(min_size, max_size, image_mean, image_std)

        super(Dope_RCNN, self).__init__(backbone, rpn, dope_heads, transform)
コード例 #8
0
ファイル: rpn_train1_org.py プロジェクト: pranoyr/RPN
    def __init__(self):
        super(RPN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        # rpn_head = RPNHead(256, 9)
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        # RPN parameters,
        rpn_pre_nms_top_n_train = 2000
        rpn_pre_nms_top_n_test = 1000
        rpn_post_nms_top_n_train = 2000
        rpn_post_nms_top_n_test = 1000
        rpn_nms_thresh = 0.7
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        # Create RPN
        self.rpn = RegionProposalNetwork(anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)
コード例 #9
0
    def __init__(self):
        super(RPN, self).__init__()
        # Define FPN
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        RPN_PRE_NMS_TOP_N = dict(training=cfg.RPN.PRE_NMS_TOP_N_TRAIN,
                                 testing=cfg.RPN.PRE_NMS_TOP_N_TEST)
        RPN_POST_NMS_TOP_N = dict(training=cfg.RPN.POST_NMS_TOP_N_TRAIN,
                                  testing=cfg.RPN.POST_NMS_TOP_N_TEST)

        # Create RPN
        self.rpn = RegionProposalNetwork(
            anchor_generator, rpn_head, cfg.RPN.FG_IOU_THRESH,
            cfg.RPN.BG_IOU_THRESH, cfg.RPN.BATCH_SIZE_PER_IMAGE,
            cfg.RPN.POSITIVE_FRACTION, RPN_PRE_NMS_TOP_N, RPN_POST_NMS_TOP_N,
            cfg.RPN.NMS_THRESH)
コード例 #10
0
    def __init__(
            self,
            num_classes=2,
            # transform parameters
            backbone_name='resnet50',
            min_size=256,
            max_size=512,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            rpn_score_thresh=0.0,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Ellipse regressor
            ellipse_roi_pool=None,
            ellipse_head=None,
            ellipse_predictor=None,
            ellipse_loss_metric="gaussian-angle"):

        backbone = resnet_fpn_backbone(backbone_name,
                                       pretrained=True,
                                       trainable_layers=5)

        # Input image is grayscale -> in_channels = 1 instead of 3 (COCO)
        backbone.body.conv1 = Conv2d(1,
                                     64,
                                     kernel_size=(7, 7),
                                     stride=(2, 2),
                                     padding=(3, 3),
                                     bias=False)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if ellipse_roi_pool is None:
            ellipse_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if ellipse_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            ellipse_head = TwoMLPHead(out_channels * resolution**2,
                                      representation_size)

        if ellipse_predictor is None:
            representation_size = 1024
            ellipse_predictor = EllipseRegressor(representation_size,
                                                 num_classes)

        roi_heads = EllipseRoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            # Ellipse
            ellipse_roi_pool=ellipse_roi_pool,
            ellipse_head=ellipse_head,
            ellipse_predictor=ellipse_predictor,
            ellipse_loss_metric=ellipse_loss_metric)

        if image_mean is None:
            image_mean = [0.156]
        if image_std is None:
            image_std = [0.272]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super().__init__(backbone, rpn, roi_heads, transform)
コード例 #11
0
ファイル: seqnet.py プロジェクト: Bye-lemon/SeqNet
    def __init__(self, cfg):
        super(SeqNet, self).__init__()

        backbone, box_head = build_resnet(name="resnet50", pretrained=True)

        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))
        head = RPNHead(
            in_channels=backbone.out_channels,
            num_anchors=anchor_generator.num_anchors_per_location()[0],
        )
        pre_nms_top_n = dict(training=cfg.MODEL.RPN.PRE_NMS_TOPN_TRAIN,
                             testing=cfg.MODEL.RPN.PRE_NMS_TOPN_TEST)
        post_nms_top_n = dict(training=cfg.MODEL.RPN.POST_NMS_TOPN_TRAIN,
                              testing=cfg.MODEL.RPN.POST_NMS_TOPN_TEST)
        rpn = RegionProposalNetwork(
            anchor_generator=anchor_generator,
            head=head,
            fg_iou_thresh=cfg.MODEL.RPN.POS_THRESH_TRAIN,
            bg_iou_thresh=cfg.MODEL.RPN.NEG_THRESH_TRAIN,
            batch_size_per_image=cfg.MODEL.RPN.BATCH_SIZE_TRAIN,
            positive_fraction=cfg.MODEL.RPN.POS_FRAC_TRAIN,
            pre_nms_top_n=pre_nms_top_n,
            post_nms_top_n=post_nms_top_n,
            nms_thresh=cfg.MODEL.RPN.NMS_THRESH,
        )

        faster_rcnn_predictor = FastRCNNPredictor(2048, 2)
        reid_head = deepcopy(box_head)
        box_roi_pool = MultiScaleRoIAlign(featmap_names=["feat_res4"],
                                          output_size=14,
                                          sampling_ratio=2)
        box_predictor = BBoxRegressor(2048,
                                      num_classes=2,
                                      bn_neck=cfg.MODEL.ROI_HEAD.BN_NECK)
        roi_heads = SeqRoIHeads(
            # OIM
            num_pids=cfg.MODEL.LOSS.LUT_SIZE,
            num_cq_size=cfg.MODEL.LOSS.CQ_SIZE,
            oim_momentum=cfg.MODEL.LOSS.OIM_MOMENTUM,
            oim_scalar=cfg.MODEL.LOSS.OIM_SCALAR,
            # SeqNet
            faster_rcnn_predictor=faster_rcnn_predictor,
            reid_head=reid_head,
            # parent class
            box_roi_pool=box_roi_pool,
            box_head=box_head,
            box_predictor=box_predictor,
            fg_iou_thresh=cfg.MODEL.ROI_HEAD.POS_THRESH_TRAIN,
            bg_iou_thresh=cfg.MODEL.ROI_HEAD.NEG_THRESH_TRAIN,
            batch_size_per_image=cfg.MODEL.ROI_HEAD.BATCH_SIZE_TRAIN,
            positive_fraction=cfg.MODEL.ROI_HEAD.POS_FRAC_TRAIN,
            bbox_reg_weights=None,
            score_thresh=cfg.MODEL.ROI_HEAD.SCORE_THRESH_TEST,
            nms_thresh=cfg.MODEL.ROI_HEAD.NMS_THRESH_TEST,
            detections_per_img=cfg.MODEL.ROI_HEAD.DETECTIONS_PER_IMAGE_TEST,
        )

        transform = GeneralizedRCNNTransform(
            min_size=cfg.INPUT.MIN_SIZE,
            max_size=cfg.INPUT.MAX_SIZE,
            image_mean=[0.485, 0.456, 0.406],
            image_std=[0.229, 0.224, 0.225],
        )

        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads
        self.transform = transform

        # loss weights
        self.lw_rpn_reg = cfg.SOLVER.LW_RPN_REG
        self.lw_rpn_cls = cfg.SOLVER.LW_RPN_CLS
        self.lw_proposal_reg = cfg.SOLVER.LW_PROPOSAL_REG
        self.lw_proposal_cls = cfg.SOLVER.LW_PROPOSAL_CLS
        self.lw_box_reg = cfg.SOLVER.LW_BOX_REG
        self.lw_box_cls = cfg.SOLVER.LW_BOX_CLS
        self.lw_box_reid = cfg.SOLVER.LW_BOX_REID
コード例 #12
0
ファイル: aff_cf_model.py プロジェクト: cxt98/ACF_perception
class ACFNetwork(nn.Module):
    """Wrapper for pre-built PyTorch models.

    Based off:
        https://pytorch.org/docs/stable/_modules/torchvision/models/detection/mask_rcnn.html
        https://pytorch.org/docs/stable/_modules/torchvision/models/detection/faster_rcnn.html
        https://github.com/pytorch/vision/blob/master/torchvision/models/detection/generalized_rcnn.py
    """
    def __init__(
            self,
            arch,
            pretrained,
            num_classes,
            input_mode,
            acf_head='endpoints',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.5,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):
        super(ACFNetwork, self).__init__()

        self.input_mode = input_mode

        self.backbone = resnet_fpn_backbone(arch, pretrained)
        # change first layer to 4 channel for early fusion with 1 channel depth, load pretrained weights on RGB channels

        conv1_weight_old = nn.Parameter(self.backbone.body.conv1.weight.data
                                        )  # self.backbone.body.conv1.weight
        conv1_weight = torch.zeros((64, 4, 7, 7))
        conv1_weight[:, 0:3, :, :] = conv1_weight_old
        avg_weight = conv1_weight_old.mean(dim=1, keepdim=False)
        conv1_weight[:, 3, :, :] = avg_weight
        self.backbone.body.conv1.weight = torch.nn.Parameter(conv1_weight)

        # self.backbone.body.conv1.weight.detach()
        # self.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)

        out_channels = self.backbone.out_channels
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        self.rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        self.roi_heads = RoIHeadsExtend(out_channels, num_classes,
                                        self.input_mode, acf_head)

        # freeze RGB backbone and RPN when training on poses
        if self.input_mode == config.INPUT_RGBD:
            for param in self.rpn.parameters():
                param.requires_grad = False
            for param in self.backbone.parameters():
                param.requires_grad = False
            # self.backbone_depth = resnet_fpn_backbone(arch, pretrained)

    def forward(self, images, targets=None):
        """
        Arguments:
            images: Image batch, normalized [NxCxHxW]
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).
        """
        image_sizes = [tuple(images.shape[-2:])] * images.shape[0]

        features = self.backbone(images)

        # Might need to torch.chunk the features because it wants it to be a list for some reason.
        image_list = ImageList(images, image_sizes)
        try:
            proposals, proposal_losses = self.rpn(image_list, features,
                                                  targets)
        except Exception as e:
            print(e)  # dirty data not cleaned
        detections, detector_losses = self.roi_heads(features, proposals,
                                                     image_sizes, targets)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        if targets is not None:
            return detections, features, losses
        else:
            return detections, features
コード例 #13
0
rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)




anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)


rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
rpn_head = RPNHead(512, rpn_anchor_generator.num_anchors_per_location()[0])

rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)


trainable_backbone_layers = None
pretrained = True
trainable_backbone_layers = _validate_resnet_trainable_layers(pretrained or pretrained_backbone, trainable_backbone_layers)
if pretrained:
    pretrained_backbone = False

backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, trainable_layers=trainable_backbone_layers)
fasterrcnn = FasterRCNN(backbone, 8)


maskrcnn = MaskRCNN(backbone, 8)
コード例 #14
0
ファイル: jde_rcnn.py プロジェクト: Royzon/tracktor_with_jde
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            len_embeddings=128,
            # transform parameters
            min_size=720,
            max_size=960,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=7,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        if box_predictor is None:
            representation_size = 1024
            box_predictor = JDEPredictor(representation_size, num_classes,
                                         len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        self.eval_embed = False
コード例 #15
0
ファイル: aff_cf_model.py プロジェクト: cxt98/ACF_perception
    def __init__(
            self,
            arch,
            pretrained,
            num_classes,
            input_mode,
            acf_head='endpoints',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.5,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):
        super(ACFNetwork, self).__init__()

        self.input_mode = input_mode

        self.backbone = resnet_fpn_backbone(arch, pretrained)
        # change first layer to 4 channel for early fusion with 1 channel depth, load pretrained weights on RGB channels

        conv1_weight_old = nn.Parameter(self.backbone.body.conv1.weight.data
                                        )  # self.backbone.body.conv1.weight
        conv1_weight = torch.zeros((64, 4, 7, 7))
        conv1_weight[:, 0:3, :, :] = conv1_weight_old
        avg_weight = conv1_weight_old.mean(dim=1, keepdim=False)
        conv1_weight[:, 3, :, :] = avg_weight
        self.backbone.body.conv1.weight = torch.nn.Parameter(conv1_weight)

        # self.backbone.body.conv1.weight.detach()
        # self.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)

        out_channels = self.backbone.out_channels
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        self.rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        self.roi_heads = RoIHeadsExtend(out_channels, num_classes,
                                        self.input_mode, acf_head)

        # freeze RGB backbone and RPN when training on poses
        if self.input_mode == config.INPUT_RGBD:
            for param in self.rpn.parameters():
                param.requires_grad = False
            for param in self.backbone.parameters():
                param.requires_grad = False
コード例 #16
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            scale_factor=2.5,
            scale_factor_jitter=0.25,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # added by Mohamed
            batched_nms=True,
            indep_classif_boxes=False,
            classification_bbox_size=None,
            n_fc_classif_layers=1,
            fc_classif_dropout=0.1,
            cconvhead=None,
            sattention_head=None,
            ignore_label: int = None,
            proposal_augmenter=None,
            n_testtime_augmentations=0):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=7,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                in_channels=representation_size,
                num_classes=num_classes,
                n_fc_classif_layers=n_fc_classif_layers,
                dropout=fc_classif_dropout,
                batched_nms=batched_nms,
            )

        roi_heads = RoIHeads(
            # Box
            box_roi_pool=box_roi_pool,
            box_head=box_head,
            box_predictor=box_predictor,
            fg_iou_thresh=box_fg_iou_thresh,
            bg_iou_thresh=box_bg_iou_thresh,
            batch_size_per_image=box_batch_size_per_image,
            positive_fraction=box_positive_fraction,
            bbox_reg_weights=bbox_reg_weights,
            score_thresh=box_score_thresh,
            nms_thresh=box_nms_thresh,
            detections_per_img=box_detections_per_img,
            # added by Mohamed
            batched_nms=batched_nms,
            indep_classif_boxes=indep_classif_boxes,
            classification_bbox_size=classification_bbox_size,
            cconvhead=cconvhead,
            sattention_head=sattention_head,
            ignore_label=ignore_label,
        )

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        # Mohamed: I changed GeneralizedRCNNTransform to take a scale factor
        #  as opposed to a fixed size to allow free size images in inference
        transform = GeneralizedRCNNTransform(
            scale_factor=scale_factor,
            scale_factor_jitter=scale_factor_jitter,
            image_mean=image_mean,
            image_std=image_std)

        super(FasterRCNN, self).__init__(
            backbone=backbone,
            rpn=rpn,
            roi_heads=roi_heads,
            transform=transform,
            # Mohamed: added this
            proposal_augmenter=proposal_augmenter,
            n_testtime_augmentations=n_testtime_augmentations,
        )
コード例 #17
0
    def __init__(
            self,
            backbone,
            num_classes=2,
            num_pids=5532,
            num_cq_size=5000,
            # transform parameters
            min_size=900,
            max_size=1500,
            image_mean=None,
            image_std=None,
            # Anchor settings:
            anchor_scales=None,
            anchor_ratios=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            rcnn_bbox_bn=True,
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.4,
            box_detections_per_img=300,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.1,
            box_batch_size_per_image=128,
            box_positive_fraction=0.5,
            bbox_reg_weights=None,
            # ReID parameters
            feat_head=None,
            reid_head=None,
            reid_loss=None):
        if rpn_anchor_generator is None:
            anchor_sizes = ((32, 64, 128, 256, 512), )
            aspect_ratios = ((0.5, 1.0, 2.0), )
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        rpn_head = RPNHead(backbone.out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)
        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat2rpn'],
                                              output_size=[14, 14],
                                              sampling_ratio=2)
        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 2048
            box_head = GAP_BOX_HEAD(resolution, feat_head, representation_size)
        if box_predictor is None:
            representation_size = 2048
            box_predictor = FastRCNNPredictor(representation_size,
                                              num_classes,
                                              RCNN_bbox_bn=False)
        if reid_head is None:
            reid_head = REID_HEAD(box_head.out_dims, 256)
        if reid_loss is None:
            reid_loss = OIMLoss(256, num_pids, num_cq_size, 0.5, 30)
        roi_heads = OIM_ROI_HEAD(
            reid_head,
            reid_loss,
            # box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        super(FasterRCNN_OIM, self).__init__(backbone, rpn, roi_heads,
                                             transform)
コード例 #18
0
ファイル: fasterrcnn_train.py プロジェクト: pranoyr/RPN
    def __init__(self):
        super(FasterRCNN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        # rpn_head = RPNHead(256, 9)
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        # RPN parameters,
        rpn_pre_nms_top_n_train = 2000
        rpn_pre_nms_top_n_test = 1000
        rpn_post_nms_top_n_train = 2000
        rpn_post_nms_top_n_test = 1000
        rpn_nms_thresh = 0.7
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        # rpn_nms_thresh = 0.45
        # rpn_fg_iou_thresh = 0.5
        # rpn_bg_iou_thresh = 0.5
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        # Create RPN
        self.rpn = RegionProposalNetwork(anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        # Box parameters
        box_roi_pool = None
        box_head = None
        box_predictor = None
        box_score_thresh = 0.05
        box_nms_thresh = 0.5
        box_detections_per_img = 100
        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        num_classes = 101

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(256 * resolution**2, representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        self.roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
コード例 #19
0
 def _set_rpn(self, *args):
     return RegionProposalNetwork(*args)
コード例 #20
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
コード例 #21
0
ファイル: model.py プロジェクト: Royzon/tracktor_with_jde
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            version='v1',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,  #FIXME 这两个参数是参照论文Towards Real-Time Multi-Object Tracking
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=256,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Embedding parameters ##FIXME 添加的参数
            len_embeddings=128,
            embed_head=None,
            embed_extractor=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        ##FIXME 改了anchor size,并且只使用宽高比1/3的anchor,参考了Towards Real-Time Multi-Object Tracking
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=11,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        ## FIXME 现在用的是v1
        if embed_head is None:
            if version == 'v1':
                resolution = box_roi_pool.output_size[0]
                representation_size = 1024
                embed_head = featureHead(out_channels * resolution**2,
                                         representation_size)
            if version == 'v2':
                embed_head = None

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if embed_extractor is None:
            representation_size = 1024
            embed_extractor = featureExtractor(representation_size,
                                               len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID,
            embed_head,
            embed_extractor)
        roi_heads.version = version

        #FIXME 这一部分是照搬faster RCNN代码里面的###################
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        ###########################################################

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        ## FIXME 跟踪时用的参数,与训练无关
        self.version = version
        self.original_image_sizes = None
        self.preprocessed_images = None
        self.features = None
        self.box_features = None