コード例 #1
0
    def _init_test_rpn(self):
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        out_channels = 256
        rpn_head = RPNHead(out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5
        rpn_pre_nms_top_n = dict(training=2000, testing=1000)
        rpn_post_nms_top_n = dict(training=2000, testing=1000)
        rpn_nms_thresh = 0.7
        rpn_score_thresh = 0.0

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)
        return rpn
コード例 #2
0
def get_model_masck_fpn_new_anchor(num_classes, pretrained, new_AS):
    if pretrained == True:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True)
    else:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=False)

    if new_AS == True:
        anchor_generator = AnchorGenerator(
            sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]),
            aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))

        # CHANGE ANCHOR SIZES
        model.rpn.anchor_generator = anchor_generator
        anchor_generator = AnchorGenerator(
            sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]),
            aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))

        model.rpn.anchor_generator = anchor_generator

        model.rpn.head = RPNHead(
            256,
            anchor_generator.num_anchors_per_location()[0])

        model.roi_heads.mask_roi_pool = None
    else:
        model.roi_heads.mask_roi_pool = None
    # SET CLASSES NUMEBR
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
コード例 #3
0
def get_faster_rcnn(n_classes: int):
    faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)

    anchor_generator = AnchorGenerator(sizes=tuple([(16, 32, 64, 128, 256)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.75, 0.5, 1.25) for _ in range(5)
                                       ]))

    rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])

    faster_rcnn.rpn = RegionProposalNetwork(anchor_generator=anchor_generator,
                                            head=rpn_head,
                                            fg_iou_thresh=0.7,
                                            bg_iou_thresh=0.3,
                                            batch_size_per_image=48,
                                            positive_fraction=0.5,
                                            pre_nms_top_n=dict(training=200,
                                                               testing=100),
                                            post_nms_top_n=dict(training=160,
                                                                testing=80),
                                            nms_thresh=0.7)

    in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features
    faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, n_classes)
    faster_rcnn.roi_heads.fg_bg_sampler.batch_size_per_image = 24
    faster_rcnn.roi_heads.fg_bg_sampler.positive_fraction = 0.5

    return faster_rcnn
コード例 #4
0
    def test_targets_to_anchors(self):
        _, targets = self._make_empty_sample()
        anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)]

        anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
        aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(
            anchor_sizes, aspect_ratios
        )
        rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0])

        head = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            0.5, 0.3,
            256, 0.5,
            2000, 2000, 0.7)

        labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets)

        self.assertEqual(labels[0].sum(), 0)
        self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]]))
        self.assertEqual(labels[0].dtype, torch.float32)

        self.assertEqual(matched_gt_boxes[0].sum(), 0)
        self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape)
        self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
コード例 #5
0
    def __init__(self, backbone,
                 dope_roi_pool, dope_head, dope_predictor,
                 # transform parameters
                 min_size=800, max_size=1333,
                 image_mean=None, image_std=None,
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
                 # others
                 num_anchor_poses = {'body': 20, 'hand': 10, 'face': 10},
                 pose2d_reg_weights = {part: 5.0 for part in parts},
                 pose3d_reg_weights = {part: 5.0 for part in parts},
                ):
                
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(dope_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(
                anchor_sizes, aspect_ratios
            )
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
        
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)

        dope_heads = Dope_RoIHeads(dope_roi_pool, dope_head, dope_predictor, num_anchor_poses, pose2d_reg_weights=pose2d_reg_weights, pose3d_reg_weights=pose3d_reg_weights)
            
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = Dope_Transform(min_size, max_size, image_mean, image_std)

        super(Dope_RCNN, self).__init__(backbone, rpn, dope_heads, transform)
コード例 #6
0
ファイル: test.py プロジェクト: cuijianzhu/6DVNET
def get_model_detection(num_classes):
    model = posercnn_resnet50_fpn(pretrained=False, num_classes=num_classes)

    # in_features = model.roi_heads.box_predictor.cls_score.in_features
    # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    anchor_generator = AnchorGenerator(
        sizes=tuple([(32, 64, 128, 256, 512, 768) for _ in range(5)]),
        aspect_ratios=tuple([(0.5, 1.0, 2.0) for _ in range(5)]))
    model.rpn.anchor_generator = anchor_generator

    # 256 because that's the number of features that resnet_fpn_backbone returns
    model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
    return model
コード例 #7
0
def get_model(
    num_classes,
    anchor_sizes,
    anchor_aspect_ratios,
    rpn_nms_threshold,
    box_nms_threshold,
    box_score_threshold,
    num_box_detections,
):

    # load pre-trained mask R-CNN model
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(
        pretrained=True,
        rpn_nms_thresh=rpn_nms_threshold,
        box_nms_thresh=box_nms_threshold,
        box_score_thresh=box_score_threshold,
        box_detections_per_img=num_box_detections,
    )
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    anchor_sizes = tuple([float(i) for i in anchor_sizes.split(",")])
    anchor_aspect_ratios = tuple([float(i) for i in anchor_aspect_ratios.split(",")])

    # create an anchor_generator for the FPN which by default has 5 outputs
    anchor_generator = AnchorGenerator(
        sizes=tuple([anchor_sizes for _ in range(5)]),
        aspect_ratios=tuple([anchor_aspect_ratios for _ in range(5)]),
    )
    model.rpn.anchor_generator = anchor_generator

    # get number of input features for the RPN returned by FPN (256)
    in_channels = model.backbone.out_channels

    # replace the RPN head
    model.rpn.head = RPNHead(
        in_channels, anchor_generator.num_anchors_per_location()[0]
    )

    # turn off masks since dataset only has bounding boxes
    model.roi_heads.mask_roi_pool = None

    return model
コード例 #8
0
def fr50_Model(pretrained=False):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=pretrained)  #true works
    # create an anchor_generator for the FPN
    # which by default has 5 outputs

    anchor_generator = AnchorGenerator(
        #sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]),
        sizes=tuple([(10, 15, 20, 30, 40) for _ in range(5)]),
        aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))

    model.rpn.anchor_generator = anchor_generator

    # 256 because that's the number of features that FPN returns
    model.rpn.head = RPNHead(256,
                             anchor_generator.num_anchors_per_location()[0])
    return model
コード例 #9
0
ファイル: models.py プロジェクト: jaeeolma/drone_detector
def mask_rcnn_custom_anchors(num_classes: int,
                             backbone=None,
                             pretrained: bool = True,
                             sizes: tuple = ((32, ), (64, ), (128, ), (256, ),
                                             (512, )),
                             aspect_ratios: tuple = (0.5, 1.0, 2.0),
                             min_size: int = 800,
                             max_size: int = 1333) -> nn.Module:
    "Make icevision Mask RCNN with custom anchors. Default values are torchvision defaults"

    if backbone is None:
        backbone = mask_rcnn.backbones.resnet50_fpn(pretrained=pretrained)

    rpn_anchor_generator = AnchorGenerator(sizes=sizes,
                                           aspect_ratios=aspect_ratios)

    rpn_head = RPNHead(256, rpn_anchor_generator.num_anchors_per_location()[0])

    in_features = 1024

    box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = 256

    hidden_layer = 256
    mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer,
                                       num_classes)

    mask_rcnn_kwargs = {
        'rpn_anchor_generator': rpn_anchor_generator,
        'rpn_head': rpn_head,
        'box_predictor': box_predictor,
        'mask_predictor': mask_predictor,
        'num_classes': None,
        'image_mean': [1., 1., 1.],
        'image_std': [1., 1.,
                      1.],  # This way no need to remove model normalization
        'min_size': min_size,
        'max_size': max_size
    }

    custom_model = mask_rcnn.model(backbone=backbone,
                                   remove_internal_transforms=False,
                                   **mask_rcnn_kwargs)

    return custom_model
コード例 #10
0
ファイル: rpn_train1_org.py プロジェクト: pranoyr/RPN
    def __init__(self):
        super(RPN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        # rpn_head = RPNHead(256, 9)
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        # RPN parameters,
        rpn_pre_nms_top_n_train = 2000
        rpn_pre_nms_top_n_test = 1000
        rpn_post_nms_top_n_train = 2000
        rpn_post_nms_top_n_test = 1000
        rpn_nms_thresh = 0.7
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        # Create RPN
        self.rpn = RegionProposalNetwork(anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)
コード例 #11
0
def get_model_frcnn_test(num_classes, new_as):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    if new_as == True:
        anchor_generator = AnchorGenerator(
            sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]),
            aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))

        # CHANGE ANCHOR SIZES
        model.rpn.anchor_generator = anchor_generator
        anchor_generator = AnchorGenerator(
            sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]),
            aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))

        model.rpn.anchor_generator = anchor_generator

        model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)
    return model
コード例 #12
0
    def __init__(self):
        super(RPN, self).__init__()
        # Define FPN
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        RPN_PRE_NMS_TOP_N = dict(training=cfg.RPN.PRE_NMS_TOP_N_TRAIN,
                                 testing=cfg.RPN.PRE_NMS_TOP_N_TEST)
        RPN_POST_NMS_TOP_N = dict(training=cfg.RPN.POST_NMS_TOP_N_TRAIN,
                                  testing=cfg.RPN.POST_NMS_TOP_N_TEST)

        # Create RPN
        self.rpn = RegionProposalNetwork(
            anchor_generator, rpn_head, cfg.RPN.FG_IOU_THRESH,
            cfg.RPN.BG_IOU_THRESH, cfg.RPN.BATCH_SIZE_PER_IMAGE,
            cfg.RPN.POSITIVE_FRACTION, RPN_PRE_NMS_TOP_N, RPN_POST_NMS_TOP_N,
            cfg.RPN.NMS_THRESH)
コード例 #13
0
    def _init_pretrained_model(self, num_classes):
        box_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],  # + "pool" -> 5 feature maps
                output_size=7,
                sampling_ratio=2)
        model = fasterrcnn_resnet50_fpn(pretrained=True, max_size=config.IMAGE_SIZE, box_nms_thresh=.5, 
                                        # rpn_anchor_generator=rpn_anchor_generator, 
                                        box_roi_pool=box_roi_pool)
   
        torch.manual_seed(0)  # Init the same params in all processes
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_channels=model.roi_heads.box_head.fc7.out_features,
            num_classes=num_classes)
        model.rpn.anchor_generator = AnchorGenerator(sizes=[[16], [32], [64], [128], [256]],
                                                aspect_ratios=[.25, .5, 1., 2., 4.])
        model.rpn.head = RPNHead(in_channels=256, num_anchors=5)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)

        model = DDP(model, find_unused_parameters=True)

        return model
コード例 #14
0
def __init_pretrain_faster_rcnn(cfg):
    """
    cfg -- dict / edict, configuration object
    """
    # load in key config from cfg
    class_n = cfg.CLASS_N
    anchor_scales = tuple(cfg.ANCHOR_SCALES)
    anchor_ratios = tuple(cfg.ANCHOR_RATIOS)
    feature_n = cfg.FEATURE_N
    # setup backbone
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)
    # setup RPN
    anchor_generator = AnchorGenerator(
        # size refer to length of one side
        sizes=tuple([anchor_scales for _ in range(feature_n)]),
        aspect_ratios=tuple([anchor_ratios for _ in range(feature_n)]))
    model.rpn.anchor_generator = anchor_generator
    model.rpn.head = RPNHead(256,
                             anchor_generator.num_anchors_per_location()[0])
    # setup RCNN
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, class_n)
    return model
コード例 #15
0
rpn_bg_iou_thresh=0.3
rpn_batch_size_per_image=256
rpn_positive_fraction=0.5

rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)




anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)


rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
rpn_head = RPNHead(512, rpn_anchor_generator.num_anchors_per_location()[0])

rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)


trainable_backbone_layers = None
pretrained = True
trainable_backbone_layers = _validate_resnet_trainable_layers(pretrained or pretrained_backbone, trainable_backbone_layers)
if pretrained:
    pretrained_backbone = False

backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, trainable_layers=trainable_backbone_layers)
コード例 #16
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            scale_factor=2.5,
            scale_factor_jitter=0.25,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # added by Mohamed
            batched_nms=True,
            indep_classif_boxes=False,
            classification_bbox_size=None,
            n_fc_classif_layers=1,
            fc_classif_dropout=0.1,
            cconvhead=None,
            sattention_head=None,
            ignore_label: int = None,
            proposal_augmenter=None,
            n_testtime_augmentations=0):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=7,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                in_channels=representation_size,
                num_classes=num_classes,
                n_fc_classif_layers=n_fc_classif_layers,
                dropout=fc_classif_dropout,
                batched_nms=batched_nms,
            )

        roi_heads = RoIHeads(
            # Box
            box_roi_pool=box_roi_pool,
            box_head=box_head,
            box_predictor=box_predictor,
            fg_iou_thresh=box_fg_iou_thresh,
            bg_iou_thresh=box_bg_iou_thresh,
            batch_size_per_image=box_batch_size_per_image,
            positive_fraction=box_positive_fraction,
            bbox_reg_weights=bbox_reg_weights,
            score_thresh=box_score_thresh,
            nms_thresh=box_nms_thresh,
            detections_per_img=box_detections_per_img,
            # added by Mohamed
            batched_nms=batched_nms,
            indep_classif_boxes=indep_classif_boxes,
            classification_bbox_size=classification_bbox_size,
            cconvhead=cconvhead,
            sattention_head=sattention_head,
            ignore_label=ignore_label,
        )

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        # Mohamed: I changed GeneralizedRCNNTransform to take a scale factor
        #  as opposed to a fixed size to allow free size images in inference
        transform = GeneralizedRCNNTransform(
            scale_factor=scale_factor,
            scale_factor_jitter=scale_factor_jitter,
            image_mean=image_mean,
            image_std=image_std)

        super(FasterRCNN, self).__init__(
            backbone=backbone,
            rpn=rpn,
            roi_heads=roi_heads,
            transform=transform,
            # Mohamed: added this
            proposal_augmenter=proposal_augmenter,
            n_testtime_augmentations=n_testtime_augmentations,
        )
コード例 #17
0
    def __init__(
            self,
            num_classes=2,
            # transform parameters
            backbone_name='resnet50',
            min_size=256,
            max_size=512,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            rpn_score_thresh=0.0,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Ellipse regressor
            ellipse_roi_pool=None,
            ellipse_head=None,
            ellipse_predictor=None,
            ellipse_loss_metric="gaussian-angle"):

        backbone = resnet_fpn_backbone(backbone_name,
                                       pretrained=True,
                                       trainable_layers=5)

        # Input image is grayscale -> in_channels = 1 instead of 3 (COCO)
        backbone.body.conv1 = Conv2d(1,
                                     64,
                                     kernel_size=(7, 7),
                                     stride=(2, 2),
                                     padding=(3, 3),
                                     bias=False)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if ellipse_roi_pool is None:
            ellipse_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if ellipse_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            ellipse_head = TwoMLPHead(out_channels * resolution**2,
                                      representation_size)

        if ellipse_predictor is None:
            representation_size = 1024
            ellipse_predictor = EllipseRegressor(representation_size,
                                                 num_classes)

        roi_heads = EllipseRoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            # Ellipse
            ellipse_roi_pool=ellipse_roi_pool,
            ellipse_head=ellipse_head,
            ellipse_predictor=ellipse_predictor,
            ellipse_loss_metric=ellipse_loss_metric)

        if image_mean is None:
            image_mean = [0.156]
        if image_std is None:
            image_std = [0.272]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super().__init__(backbone, rpn, roi_heads, transform)
コード例 #18
0
ファイル: seqnet.py プロジェクト: Bye-lemon/SeqNet
    def __init__(self, cfg):
        super(SeqNet, self).__init__()

        backbone, box_head = build_resnet(name="resnet50", pretrained=True)

        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))
        head = RPNHead(
            in_channels=backbone.out_channels,
            num_anchors=anchor_generator.num_anchors_per_location()[0],
        )
        pre_nms_top_n = dict(training=cfg.MODEL.RPN.PRE_NMS_TOPN_TRAIN,
                             testing=cfg.MODEL.RPN.PRE_NMS_TOPN_TEST)
        post_nms_top_n = dict(training=cfg.MODEL.RPN.POST_NMS_TOPN_TRAIN,
                              testing=cfg.MODEL.RPN.POST_NMS_TOPN_TEST)
        rpn = RegionProposalNetwork(
            anchor_generator=anchor_generator,
            head=head,
            fg_iou_thresh=cfg.MODEL.RPN.POS_THRESH_TRAIN,
            bg_iou_thresh=cfg.MODEL.RPN.NEG_THRESH_TRAIN,
            batch_size_per_image=cfg.MODEL.RPN.BATCH_SIZE_TRAIN,
            positive_fraction=cfg.MODEL.RPN.POS_FRAC_TRAIN,
            pre_nms_top_n=pre_nms_top_n,
            post_nms_top_n=post_nms_top_n,
            nms_thresh=cfg.MODEL.RPN.NMS_THRESH,
        )

        faster_rcnn_predictor = FastRCNNPredictor(2048, 2)
        reid_head = deepcopy(box_head)
        box_roi_pool = MultiScaleRoIAlign(featmap_names=["feat_res4"],
                                          output_size=14,
                                          sampling_ratio=2)
        box_predictor = BBoxRegressor(2048,
                                      num_classes=2,
                                      bn_neck=cfg.MODEL.ROI_HEAD.BN_NECK)
        roi_heads = SeqRoIHeads(
            # OIM
            num_pids=cfg.MODEL.LOSS.LUT_SIZE,
            num_cq_size=cfg.MODEL.LOSS.CQ_SIZE,
            oim_momentum=cfg.MODEL.LOSS.OIM_MOMENTUM,
            oim_scalar=cfg.MODEL.LOSS.OIM_SCALAR,
            # SeqNet
            faster_rcnn_predictor=faster_rcnn_predictor,
            reid_head=reid_head,
            # parent class
            box_roi_pool=box_roi_pool,
            box_head=box_head,
            box_predictor=box_predictor,
            fg_iou_thresh=cfg.MODEL.ROI_HEAD.POS_THRESH_TRAIN,
            bg_iou_thresh=cfg.MODEL.ROI_HEAD.NEG_THRESH_TRAIN,
            batch_size_per_image=cfg.MODEL.ROI_HEAD.BATCH_SIZE_TRAIN,
            positive_fraction=cfg.MODEL.ROI_HEAD.POS_FRAC_TRAIN,
            bbox_reg_weights=None,
            score_thresh=cfg.MODEL.ROI_HEAD.SCORE_THRESH_TEST,
            nms_thresh=cfg.MODEL.ROI_HEAD.NMS_THRESH_TEST,
            detections_per_img=cfg.MODEL.ROI_HEAD.DETECTIONS_PER_IMAGE_TEST,
        )

        transform = GeneralizedRCNNTransform(
            min_size=cfg.INPUT.MIN_SIZE,
            max_size=cfg.INPUT.MAX_SIZE,
            image_mean=[0.485, 0.456, 0.406],
            image_std=[0.229, 0.224, 0.225],
        )

        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads
        self.transform = transform

        # loss weights
        self.lw_rpn_reg = cfg.SOLVER.LW_RPN_REG
        self.lw_rpn_cls = cfg.SOLVER.LW_RPN_CLS
        self.lw_proposal_reg = cfg.SOLVER.LW_PROPOSAL_REG
        self.lw_proposal_cls = cfg.SOLVER.LW_PROPOSAL_CLS
        self.lw_box_reg = cfg.SOLVER.LW_BOX_REG
        self.lw_box_cls = cfg.SOLVER.LW_BOX_CLS
        self.lw_box_reid = cfg.SOLVER.LW_BOX_REID
コード例 #19
0
                                                   collate_fn=collate_fn)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)

    anchor_generator = AnchorGenerator(
        sizes=((32, ), (24, ), (24, ), (16, ), (8, )),
        aspect_ratios=([1.0, 1.0, 1.0,
                        1.0], [0.8, 1.0, 1.0,
                               1.0], [1.0, 0.8, 1.0,
                                      1.0], [1.0, 1.0, 1.0,
                                             1.0], [1.0, 1.0, 1.0, 1.0]))
    model.rpn.anchor_generator = anchor_generator
    model.rpn.head = RPNHead(256,
                             anchor_generator.num_anchors_per_location()[0])
    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
コード例 #20
0
ファイル: model.py プロジェクト: zhiqwang/sightseq
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        rpn_anchor_generator = task.rpn_anchor_generator
        rpn_head = task.rpn_head
        box_roi_pool = task.box_roi_pool
        box_predictor = task.box_predictor
        box_head = task.box_head

        # setup backbone
        backbone = resnet_fpn_backbone(args.backbone, args.backbone_pretrained)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)"
            )

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if task.num_classes > 0:
            if box_predictor is not None:
                raise ValueError("num_classes should be -1 when box_predictor is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should be > 0 when box_predictor is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0],
            )

        rpn_pre_nms_top_n = dict(training=args.rpn_pre_nms_top_n_train, testing=args.rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=args.rpn_post_nms_top_n_train, testing=args.rpn_post_nms_top_n_test)

        rpn = RPN(
            rpn_anchor_generator, rpn_head,
            args.rpn_fg_iou_thresh, args.rpn_bg_iou_thresh,
            args.rpn_batch_size_per_image, args.rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, args.rpn_nms_thresh,
        )

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],
                output_size=7,
                sampling_ratio=2,
            )

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size,
            )

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                task.num_classes,
            )

        roi_heads = RegionOfInterestHeads(
            # Box
            box_roi_pool, box_head, box_predictor,
            args.box_fg_iou_thresh, args.box_bg_iou_thresh,
            args.box_batch_size_per_image, args.box_positive_fraction,
            args.bbox_reg_weights, args.box_score_thresh,
            args.box_nms_thresh, args.box_detections_per_img,
        )

        if args.image_mean is None:
            args.image_mean = [0.485, 0.456, 0.406]
        if args.image_std is None:
            args.image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(
            args.min_size, args.max_size,
            args.image_mean, args.image_std,
        )

        return cls(backbone, rpn, roi_heads, transform)
コード例 #21
0
ファイル: jde_rcnn.py プロジェクト: Royzon/tracktor_with_jde
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            len_embeddings=128,
            # transform parameters
            min_size=720,
            max_size=960,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=7,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        if box_predictor is None:
            representation_size = 1024
            box_predictor = JDEPredictor(representation_size, num_classes,
                                         len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        self.eval_embed = False
コード例 #22
0
def get_model(pre_trained, pretrained_backbone, numclasses):
    anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.5, 1.0, 2.0) for _ in range(5)
                                       ]))
    rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
    if pre_trained:
        # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_head=rpnhead
        #                                  , rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000
        #                                  , rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000
        #                                  , rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3
        #                                  , rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0)
        #                                  , box_batch_size_per_image=32)
        dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained,
                                         max_size=MAX_SIZE,
                                         rpn_pre_nms_top_n_train=12000,
                                         rpn_pre_nms_top_n_test=6000,
                                         rpn_post_nms_top_n_train=2000,
                                         rpn_post_nms_top_n_test=300,
                                         rpn_fg_iou_thresh=0.5,
                                         rpn_bg_iou_thresh=0.3,
                                         rpn_positive_fraction=0.7,
                                         bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
                                         box_batch_size_per_image=32)
        # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE)

        # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.weight"]
        # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.weight"]
        # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.bias"]
        # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.bias"]

        # Remove incompatible parameters
        # newdict = removekey(dl_model.state_dict(), ['roi_heads.box_predictor.cls_score.bias'
        #                                             , 'roi_heads.box_predictor.cls_score.weight'
        #                                             , 'roi_heads.box_predictor.bbox_pred.bias'
        #                                             , 'roi_heads.box_predictor.bbox_pred.weight'])
        # dl_model.state_dict = newdict
        # dl_model.load_state_dict(newdict)
        for param in dl_model.parameters():
            param.requires_grad = False

        # replace the classifier with a new one, that has
        # num_classes which is user-defined
        num_classes = numclasses  # 1 class (lesion) + background

        # get number of input features for the classifier
        in_features = dl_model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        dl_model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = dl_model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        dl_model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
    else:
        dl_model = maskrcnn_resnet50_fpn(
            num_classes=numclasses,
            pretrained_backbone=pretrained_backbone,
            max_size=MAX_SIZE,
            rpn_head=rpnhead,
            rpn_anchor_generator=anchor_generator,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.3,
            rpn_positive_fraction=0.7,
            bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
            box_batch_size_per_image=32)
    return dl_model
コード例 #23
0
    def __init__(
            self,
            backbone,
            num_classes=2,
            num_pids=5532,
            num_cq_size=5000,
            # transform parameters
            min_size=900,
            max_size=1500,
            image_mean=None,
            image_std=None,
            # Anchor settings:
            anchor_scales=None,
            anchor_ratios=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            rcnn_bbox_bn=True,
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.4,
            box_detections_per_img=300,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.1,
            box_batch_size_per_image=128,
            box_positive_fraction=0.5,
            bbox_reg_weights=None,
            # ReID parameters
            feat_head=None,
            reid_head=None,
            reid_loss=None):
        if rpn_anchor_generator is None:
            anchor_sizes = ((32, 64, 128, 256, 512), )
            aspect_ratios = ((0.5, 1.0, 2.0), )
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        rpn_head = RPNHead(backbone.out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)
        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat2rpn'],
                                              output_size=[14, 14],
                                              sampling_ratio=2)
        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 2048
            box_head = GAP_BOX_HEAD(resolution, feat_head, representation_size)
        if box_predictor is None:
            representation_size = 2048
            box_predictor = FastRCNNPredictor(representation_size,
                                              num_classes,
                                              RCNN_bbox_bn=False)
        if reid_head is None:
            reid_head = REID_HEAD(box_head.out_dims, 256)
        if reid_loss is None:
            reid_loss = OIMLoss(256, num_pids, num_cq_size, 0.5, 30)
        roi_heads = OIM_ROI_HEAD(
            reid_head,
            reid_loss,
            # box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        super(FasterRCNN_OIM, self).__init__(backbone, rpn, roi_heads,
                                             transform)
コード例 #24
0
ファイル: fasterrcnn_train.py プロジェクト: pranoyr/RPN
    def __init__(self):
        super(FasterRCNN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        # rpn_head = RPNHead(256, 9)
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        # RPN parameters,
        rpn_pre_nms_top_n_train = 2000
        rpn_pre_nms_top_n_test = 1000
        rpn_post_nms_top_n_train = 2000
        rpn_post_nms_top_n_test = 1000
        rpn_nms_thresh = 0.7
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        # rpn_nms_thresh = 0.45
        # rpn_fg_iou_thresh = 0.5
        # rpn_bg_iou_thresh = 0.5
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        # Create RPN
        self.rpn = RegionProposalNetwork(anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        # Box parameters
        box_roi_pool = None
        box_head = None
        box_predictor = None
        box_score_thresh = 0.05
        box_nms_thresh = 0.5
        box_detections_per_img = 100
        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        num_classes = 101

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(256 * resolution**2, representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        self.roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
コード例 #25
0
def define_model(num_classes,
                 net,
                 anchors,
                 up_thres=0.5,
                 low_thres=0.2,
                 box_score=0.3,
                 data='binary'):
    if net == 'mobilenet':
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        # FasterRCNN needs to know the number of
        # output channels in a backbone. For mobilenet_v2, it's 1280
        # so we need to add it here
        backbone.out_channels = 1280

        # let's make the RPN generate 5 x 3 anchors per spatial
        # location, with 5 different sizes and 3 different aspect
        # ratios. We have a Tuple[Tuple[int]] because each feature
        # map could potentially have different sizes and
        # aspect ratios>
        if data == 'tick_bite':
            anchor_generator = AnchorGenerator(sizes=((8, 16, 32, 64, 128), ),
                                               aspect_ratios=((0.5, 1.0,
                                                               2.0), ))
        else:
            anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128,
                                                       256), ),
                                               aspect_ratios=((0.5, 1.0,
                                                               2.0), ))

        # let's define what are the feature maps that we will
        # use to perform the region of interest cropping, as well as
        # the size of the crop after rescaling.
        # if your backbone returns a Tensor, featmap_names is expected to
        # be [0]. More generally, the backbone should return an
        # OrderedDict[Tensor], and in featmap_names you can choose which
        # feature maps to use.
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names='0',
                                                        output_size=7,
                                                        sampling_ratio=2)

        # put the pieces together inside a FasterRCNN model
        model = FasterRCNN(backbone,
                           num_classes=num_classes,
                           rpn_anchor_generator=anchor_generator,
                           rpn_fg_iou_thresh=up_thres,
                           rpn_bg_iou_thresh=low_thres,
                           box_roi_pool=roi_pooler,
                           box_score_thresh=box_score)

    elif net == 'resnet50':
        resnet50 = init_model(num_classes=num_classes)
        anchor_sizes = anchors
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        rpn_head = RPNHead(resnet50.backbone.out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names='0',
                                                        output_size=7,
                                                        sampling_ratio=2)
        model = FasterRCNN(resnet50.backbone,
                           num_classes=num_classes,
                           rpn_anchor_generator=rpn_anchor_generator,
                           rpn_head=rpn_head,
                           rpn_fg_iou_thresh=up_thres,
                           rpn_bg_iou_thresh=low_thres,
                           box_roi_pool=roi_pooler,
                           box_score_thresh=box_score)
    return model
コード例 #26
0
def main():
    anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.5, 1.0, 2.0) for _ in range(5)
                                       ]))
    rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
    model = maskrcnn_resnet50_fpn(num_classes=2,
                                  pretrained_backbone=True,
                                  max_size=MAX_SIZE,
                                  rpn_head=rpnhead,
                                  rpn_anchor_generator=anchor_generator,
                                  rpn_pre_nms_top_n_train=12000,
                                  rpn_pre_nms_top_n_test=6000,
                                  rpn_post_nms_top_n_train=2000,
                                  rpn_post_nms_top_n_test=300,
                                  rpn_fg_iou_thresh=0.5,
                                  rpn_bg_iou_thresh=0.3,
                                  rpn_positive_fraction=0.7,
                                  bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
                                  box_batch_size_per_image=32)
    model.load_state_dict(
        torch.load('saved_models' + os.sep + '0_deeplesion.pth',
                   map_location='cpu'))
    data_transforms = {
        'train':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ]),
        'val':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ]),
        'test':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ])
    }
    image_datasets = {
        x: DeepLesion(DIR_IN + os.sep + x, GT_FN_DICT[x], data_transforms[x])
        for x in ['train', 'val', 'test']
    }

    dataloaders = {
        x: DataLoader(image_datasets[x],
                      batch_size=3,
                      shuffle=True,
                      num_workers=0,
                      collate_fn=BatchCollator)
        for x in ['train', 'val', 'test']
    }
    for batch_id, (inputs, targets) in enumerate(dataloaders['test']):
        outputs = test_model(model, inputs)
        outputs = remove_overlapping(outputs, 0.655)
        for image, target, output in zip(inputs, targets, outputs):
            img_copy = image.squeeze().numpy()
            images = [img_copy] * 3
            images = [im.astype(float) for im in images]
            img_copy = cv2.merge(images)
            for bbox, pseudo_mask in zip(target["boxes"], target["masks"]):
                bbox = bbox.squeeze().numpy()
                bbox = np.int16(bbox)
                mask = pseudo_mask.squeeze().numpy()
                cv2.rectangle(img_copy, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              (0, 255, 0), 1)
                msk_idx = np.where(mask == 1)
                img_copy[msk_idx[0], msk_idx[1], 0] = 255
            for predbox, predmask, score in zip(output['boxes'],
                                                output['masks'],
                                                output['scores']):
                if score < 0.655:
                    break
                predbox = predbox.numpy()
                predmask = predmask.squeeze().numpy()
                score = score.numpy()
                predmask = np.where(predmask > 0.5, 1, 0)
                cv2.rectangle(img_copy, (predbox[0], predbox[1]),
                              (predbox[2], predbox[3]), (0, 0, 255), 1)
                pmsk_idx = np.where(predmask == 1)
                img_copy[pmsk_idx[0], pmsk_idx[1], 2] = 255
                cv2.putText(img_copy, str(score),
                            (int(predbox[0]), int(predbox[1] - 5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1,
                            cv2.LINE_AA)
            # cv2.imshow(str(target['image_id']), img_copy)
            cv2.imwrite(
                'simple_test' + os.sep +
                str(target['image_id']).replace(os.sep, '_') + '_pred.jpg',
                img_copy * 255)
コード例 #27
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            num_pids=5532,
            num_cq_size=5000,
            # transform parameters
            min_size=900,
            max_size=1500,
            image_mean=None,
            image_std=None,
            # Anchor settings:
            anchor_scales=None,
            anchor_ratios=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            feat_head=None,
            box_predictor=None,
            box_score_thresh=0.0,
            box_nms_thresh=0.4,
            box_detections_per_img=300,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.1,
            box_batch_size_per_image=128,
            box_positive_fraction=0.5,
            bbox_reg_weights=None,
            # ReID parameters
            embedding_head=None,
            reid_loss=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                'backbone should contain an attribute out_channels '
                'specifying the number of output channels (assumed to be the '
                'same for all the levels)')

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    'num_classes should be None when box_predictor is specified'
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    'num_classes should not be None when box_predictor'
                    'is not specified')

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            if anchor_scales is None:
                anchor_scales = ((32, 64, 128, 256, 512), )
            if anchor_ratios is None:
                anchor_ratios = ((0.5, 1.0, 2.0), )
            rpn_anchor_generator = AnchorGenerator(anchor_scales,
                                                   anchor_ratios)

        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = self._set_rpn(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh,
                            rpn_bg_iou_thresh, rpn_batch_size_per_image,
                            rpn_positive_fraction, rpn_pre_nms_top_n,
                            rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat_res4'],
                                              output_size=14,
                                              sampling_ratio=2)

        if feat_head is None:
            raise ValueError('feat_head should be specified manually.')
            # resolution = box_roi_pool.output_size[0]
            # representation_size = 2048
            # # ConvHead should be part of the backbone
            # # feat_head = TwoMLPHead(
            # #     out_channels * resolution ** 2,
            # #     representation_size)

        if box_predictor is None:
            box_predictor = CoordRegressor(2048, num_classes)

        if embedding_head is None:
            embedding_head = ReIDEmbeddingProj(
                featmap_names=['feat_res4', 'feat_res5'],
                in_channels=[1024, 2048],
                dim=256)

        if reid_loss is None:
            reid_loss = HOIMLoss(256, num_pids, num_cq_size, 0.5, 30.0)

        roi_heads = self._set_roi_heads(
            embedding_head, reid_loss, box_roi_pool, feat_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image,
            box_positive_fraction, bbox_reg_weights, box_score_thresh,
            box_nms_thresh, box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(FasterRCNN_HOIM, self).__init__(backbone, rpn, roi_heads,
                                              transform)
コード例 #28
0
ファイル: aff_cf_model.py プロジェクト: cxt98/ACF_perception
    def __init__(
            self,
            arch,
            pretrained,
            num_classes,
            input_mode,
            acf_head='endpoints',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.5,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):
        super(ACFNetwork, self).__init__()

        self.input_mode = input_mode

        self.backbone = resnet_fpn_backbone(arch, pretrained)
        # change first layer to 4 channel for early fusion with 1 channel depth, load pretrained weights on RGB channels

        conv1_weight_old = nn.Parameter(self.backbone.body.conv1.weight.data
                                        )  # self.backbone.body.conv1.weight
        conv1_weight = torch.zeros((64, 4, 7, 7))
        conv1_weight[:, 0:3, :, :] = conv1_weight_old
        avg_weight = conv1_weight_old.mean(dim=1, keepdim=False)
        conv1_weight[:, 3, :, :] = avg_weight
        self.backbone.body.conv1.weight = torch.nn.Parameter(conv1_weight)

        # self.backbone.body.conv1.weight.detach()
        # self.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)

        out_channels = self.backbone.out_channels
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        self.rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        self.roi_heads = RoIHeadsExtend(out_channels, num_classes,
                                        self.input_mode, acf_head)

        # freeze RGB backbone and RPN when training on poses
        if self.input_mode == config.INPUT_RGBD:
            for param in self.rpn.parameters():
                param.requires_grad = False
            for param in self.backbone.parameters():
                param.requires_grad = False
コード例 #29
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
コード例 #30
0
ファイル: model.py プロジェクト: Royzon/tracktor_with_jde
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            version='v1',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,  #FIXME 这两个参数是参照论文Towards Real-Time Multi-Object Tracking
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=256,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Embedding parameters ##FIXME 添加的参数
            len_embeddings=128,
            embed_head=None,
            embed_extractor=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        ##FIXME 改了anchor size,并且只使用宽高比1/3的anchor,参考了Towards Real-Time Multi-Object Tracking
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=11,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        ## FIXME 现在用的是v1
        if embed_head is None:
            if version == 'v1':
                resolution = box_roi_pool.output_size[0]
                representation_size = 1024
                embed_head = featureHead(out_channels * resolution**2,
                                         representation_size)
            if version == 'v2':
                embed_head = None

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if embed_extractor is None:
            representation_size = 1024
            embed_extractor = featureExtractor(representation_size,
                                               len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID,
            embed_head,
            embed_extractor)
        roi_heads.version = version

        #FIXME 这一部分是照搬faster RCNN代码里面的###################
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        ###########################################################

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        ## FIXME 跟踪时用的参数,与训练无关
        self.version = version
        self.original_image_sizes = None
        self.preprocessed_images = None
        self.features = None
        self.box_features = None