def _init_test_roi_heads_faster_rcnn(self): out_channels = 256 num_classes = 91 box_fg_iou_thresh = 0.5 box_bg_iou_thresh = 0.5 box_batch_size_per_image = 512 box_positive_fraction = 0.25 bbox_reg_weights = None box_score_thresh = 0.05 box_nms_thresh = 0.5 box_detections_per_img = 100 box_roi_pool = ops.MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) return roi_heads
def __init__(self, num_thing_classes, backbone_out_channels=256, roi_out_res=14, feat_maps_names=['P4', 'P8', 'P16', 'P32'], representation_size=1024, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): super().__init__() ## Boxes box_roi_pool = RoiAlign(feat_maps_names, roi_out_res, 2) bbox_head = box_head(backbone_out_channels*roi_out_res ** 2, representation_size) bbox_predictor = box_predictor(representation_size, num_thing_classes) #Masks mask_roi_pool = RoiAlign(feat_maps_names, roi_out_res, 2) m_head = mask_head(backbone_out_channels) m_predictor = mask_predictor(backbone_out_channels, num_thing_classes) self.heads = RoIHeads(box_roi_pool, bbox_head, bbox_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img, mask_roi_pool, m_head, m_predictor)
def test_assign_targets_to_proposals(self): proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)] gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)] gt_labels = [torch.tensor([[0]], dtype=torch.int64)] box_roi_pool = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(4 * resolution**2, representation_size) representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, 2) roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, 0.5, 0.5, 512, 0.25, None, 0.05, 0.5, 100) matched_idxs, labels = roi_heads.assign_targets_to_proposals( proposals, gt_boxes, gt_labels) self.assertEqual(matched_idxs[0].sum(), 0) self.assertEqual(matched_idxs[0].shape, torch.Size([proposals[0].shape[0]])) self.assertEqual(matched_idxs[0].dtype, torch.int64) self.assertEqual(labels[0].sum(), 0) self.assertEqual(labels[0].shape, torch.Size([proposals[0].shape[0]])) self.assertEqual(labels[0].dtype, torch.int64)
def __init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, ): RoIHeads.__init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img)
def __init__(self, backbone, num_classes=None, min_size = 800, max_size = 1333, image_mean = None, image_std = None, rpn_anchor_generator = None, rpn_head = None, rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000, rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError("backbone should contain an attribute out_channels") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError("num_classes should be None when box_predictor is specified") else: if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names = ['0', '1', '2', '3'], output_size=7, sampling_ratio = 2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) #--------------------------------------- roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
def __init__( self, backbone, num_classes=2, num_pids=5532, num_cq_size=5000, # transform parameters min_size=900, max_size=1500, image_mean=None, image_std=None, # Anchor settings: anchor_scales=None, anchor_ratios=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters rcnn_bbox_bn=True, box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.0, box_nms_thresh=0.4, box_detections_per_img=300, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.1, box_batch_size_per_image=128, box_positive_fraction=0.5, bbox_reg_weights=None, # ReID parameters feat_head=None, reid_head=None, reid_loss=None): if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) rpn_head = RPNHead(backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat2rpn'], output_size=[7, 7], sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 2048 box_head = TwoMLPHead(backbone.out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 2048 box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads( # box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
def __init__( self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.3, box_detections_per_img=128, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=64, box_positive_fraction=0.25, bbox_reg_weights=None): print("Using modified Faster RCNN....") if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) rpn = None roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super().__init__(backbone, rpn, roi_heads, transform)
def __init__( self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.1, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None, depth_estimator_path='_depth_net.pth'): # if not hasattr(backbone, "out_channels"): # raise ValueError( # "backbone should contain an attribute out_channels " # "specifying the number of output channels (assumed to be the " # "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = 6 # backbone.out_channels if rpn_anchor_generator is None: # anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) # anchor_sizes = ((4,), (8,), (16,),) anchor_sizes = ( (16, ), (32, ), (64, ), ) aspect_ratios = (( 0.5, 0.7, 1.0, ), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( # featmap_names=['0', '1', '2', '3'], featmap_names=['0'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead( 64 * resolution**2, # out_channels * resolution ** 2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) mask_net = MaskNet(out_channels) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(ModifiedFasterRCNN, self).__init__(backbone, rpn, roi_heads, mask_net, transform, depth_estimator_path=depth_estimator_path)