def __init__(self, backbone, num_classes, min_size=800, max_size=1333, image_mean=None, image_std=None, anchor_generator=None, head=None, proposal_matcher=None, score_thresh=0.05, nms_thresh=0.5, detections_per_img=300, fg_iou_thresh=0.5, bg_iou_thresh=0.4, topk_candidates=1000): super(RetinaNet, self).__init__() if not hasattr(backbone, "out_channels"): raise ValueError("backbone should contain an attribute out_channels specifying the number of output channels " "assumed be the samefor all the levels") self.backbone = backbone assert isinstance(anchor_generator, (AnchorGenerator, type(None))) if anchor_generator is None: anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [32, 64, 128, 256, 512]) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) self.anchor_generator = anchor_generator if head is None: head = RetinaNetHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0], num_classes) self.head = head if proposal_matcher is None: proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches = True, ) self.proposal_matcher = proposal_matcher self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.topk_candidates = topk_candidates self.has_warned = False
def get_model(): # load a model pre-trained pre-trained on COCO anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) model = torchvision.models.detection.fasterrcnn_resnet50_fpn( min_size=500, max_size=833, pretrained=True, progress=True, box_detections_per_img=58, image_mean=[0.17045, 0.1338, 0.2242], image_std=[0.17390, 0.1502, 0.13195], rpn_anchor_generator=rpn_anchor_generator) # replace the classifier with a new one, that has # num_classes which is user-defined num_classes = len(OBJ_TYPES) + 1 # 12 class + background # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) model.to(device) return model
def test_incorrect_anchors(self): incorrect_sizes = ((2, 4, 8), (32, 8), ) incorrect_aspects = (0.5, 1.0) anc = AnchorGenerator(incorrect_sizes, incorrect_aspects) image1 = torch.randn(3, 800, 800) image_list = ImageList(image1, [(800, 800)]) feature_maps = [torch.randn(1, 50)] pytest.raises(ValueError, anc, image_list, feature_maps)
def retinanet_mobilenet(pretrained=False, progress=True, num_classes=91, pretrained_backbone=False, trainable_backbone_layers=None, min_size=320, max_size=640, **kwargs): """ Constructs a RetinaNet model with a MobileNetV3-Large backbone. It works similarly to RetinaNet with ResNet-50-FPN backbone. See `retinanet_resnet50_fpn` for more details. Example:: >>> model = torchvision.models.detection.retinanet_mobilenet_v3_large(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Args: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone """ # check default parameters and by default set it to 6 if possible trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6) if pretrained: pretrained_backbone = False backbone = mobilenet_backbone("retinanet_mobilenet_v3_large", pretrained_backbone, trainable_layers=trainable_backbone_layers) anchor_sizes = (( 16, 32, 64, 128, 256, ), ) aspect_ratios = ((0.5, 1.0, 2.0), ) model = RetinaNet(backbone, num_classes, anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['retinanet_mobilenet_v3_large_coco'], progress=progress) model.load_state_dict(state_dict) return model
def __init__(self, args): super().__init__() self.args = args anchor_sizes = tuple((x, int(x * 2**(1.0 / 3)), int(x * 2**(2.0 / 3))) for x in [32, 64, 128, 256, 512]) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) self.anchor_generator = anchor_generator self.backbone = self.get_backbone(True) self.model = RetinaNetEmbedding(self.backbone, num_classes=2378) self.save_hyperparameters() self.teacher_model = self.get_teacher(args) self.tm_full = self.teacher_model.get_model() self.data_dir = args.data_dir if self.args.loss == 'CrossEntropy': self.loss = torch.nn.CrossEntropyLoss() self.loss_requires_classifier = True
def __init__(self, args): super().__init__() self.args = args anchor_sizes = tuple((x, int(x * 2**(1.0 / 3)), int(x * 2**(2.0 / 3))) for x in [32, 64, 128, 256, 512]) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) self.anchor_generator = anchor_generator self.backbone = self.backbone1(False) self.head = HeadJDE( self.backbone.out_channels, self.anchor_generator.num_anchors_per_location()[0], 195, self.args) self.model = RetinaNetEmbedding(self.backbone, num_classes=195, head=self.head) self.save_hyperparameters() self.teacher_model = self.teacher(args) self.tm_full = self.teacher_model.get_model() self.tm_extractor = self.teacher_model.get_extractor()
def _fasterrcnn_mobilenet_v3_large_fpn(weights_name, pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs): trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3) if pretrained: pretrained_backbone = False backbone = mobilenet_backbone("mobilenet_v3_large", pretrained_backbone, True, trainable_layers=trainable_backbone_layers) anchor_sizes = (( 32, 64, 128, 256, 512, ), ) * 3 aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=AnchorGenerator( anchor_sizes, aspect_ratios), **kwargs) if pretrained: if model_urls.get(weights_name, None) is None: raise ValueError( "No checkpoint is available for model {}".format(weights_name)) state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress) model.load_state_dict(state_dict) return model
def keypointrcnn_mobilenet(backbone_name, path, device): if backbone_name == "mobilenet_v3_large": backbone = torchvision.models.mobilenet_v3_large( pretrained=True).features backbone.out_channels = 960 elif backbone_name == "mobilenet_v3_small": backbone = torchvision.models.mobilenet_v3_small( pretrained=True).features backbone.out_channels = 576 elif backbone_name == "mobilenet_v2": backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 else: raise Exception('Bad backbone name') anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], output_size=14, sampling_ratio=2) model_keypoints = KeypointRCNN(backbone, num_classes=6, num_keypoints=20, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) model_keypoints = model_keypoints.to(device) model_keypoints.load_state_dict(torch.load(path, map_location=device)) model_keypoints.eval() return model_keypoints
def _init_test_anchor_generator(self): anchor_sizes = ((10, ), ) aspect_ratios = ((1, ), ) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) return anchor_generator
def __init__( self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
rpn_fg_iou_thresh=0.7 rpn_bg_iou_thresh=0.3 rpn_batch_size_per_image=256 rpn_positive_fraction=0.5 rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) rpn_head = RPNHead(512, rpn_anchor_generator.num_anchors_per_location()[0]) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) trainable_backbone_layers = None pretrained = True trainable_backbone_layers = _validate_resnet_trainable_layers(pretrained or pretrained_backbone, trainable_backbone_layers) if pretrained: pretrained_backbone = False
def __init__( self, num_classes=2, # transform parameters backbone_name='resnet50', min_size=256, max_size=512, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None, # Ellipse regressor ellipse_roi_pool=None, ellipse_head=None, ellipse_predictor=None, ellipse_loss_metric="gaussian-angle"): backbone = resnet_fpn_backbone(backbone_name, pretrained=True, trainable_layers=5) # Input image is grayscale -> in_channels = 1 instead of 3 (COCO) backbone.body.conv1 = Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) if ellipse_roi_pool is None: ellipse_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if ellipse_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 ellipse_head = TwoMLPHead(out_channels * resolution**2, representation_size) if ellipse_predictor is None: representation_size = 1024 ellipse_predictor = EllipseRegressor(representation_size, num_classes) roi_heads = EllipseRoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img, # Ellipse ellipse_roi_pool=ellipse_roi_pool, ellipse_head=ellipse_head, ellipse_predictor=ellipse_predictor, ellipse_loss_metric=ellipse_loss_metric) if image_mean is None: image_mean = [0.156] if image_std is None: image_std = [0.272] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super().__init__(backbone, rpn, roi_heads, transform)
def get_model(config): model = None # input_size = 0 if config.model_name == "resnet": """ Resnet34 """ model = models.resnet18(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.fc.in_features model.fc = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "alexnet": """ Alexnet """ model = models.alexnet(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "vgg": """ VGG16_bn """ model = models.vgg16_bn(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "densenet": """ Densenet """ model = models.densenet121(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier.in_features model.classifier = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == 'mobilenet': model = models.mobilenet_v2(pretrained = config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) n_features elif config.model_name == "KeypointRCNN": backbone = models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 roi_pooler = MultiScaleRoIAlign( featmap_names=['0'], output_size=7, sampling_ratio=2 ) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) keypoint_roi_pooler = MultiScaleRoIAlign( featmap_names=['0'], output_size=14, sampling_ratio=2 ) model = KeypointRCNN( backbone, num_classes=2, num_keypoints=24, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler,rpn_anchor_generator=anchor_generator ) elif config.model_name == "keypointrcnn_resnet50": model = models.detection.keypointrcnn_resnet50_fpn(pretrained=config.use_pretrained, progress=False) model.roi_heads.keypoint_predictor.kps_score_lowres = nn.ConvTranspose2d(512, 24, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) elif config.model_name == "keypointrcnn_resnet101": pretrained_backbone = True pretrained = False trainable_backbone_layers = None trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3) backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers) model = KeypointRCNN( backbone, num_classes=2, num_keypoints=24) else: raise NotImplementedError('You need to specify model name.') return model
def get_fasterrcnn_model(arch_str, num_classes, pretrained=True, pretrained_backbone=True, trainable_layers=5, **kwargs): """Creates FasterRCNN model with resnet backbone""" #if pretrained == True: pretrained_backbone=False backbone = resnet_fpn_backbone(arch_str, pretrained=pretrained_backbone, trainable_layers=trainable_layers) anchor_sizes = ( (16, ), (32, ), (64, ), (128, ), (256, ), ) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) model = FasterRCNN( backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, image_mean=[0.0, 0.0, 0.0], # already normalized by fastai image_std=[1.0, 1.0, 1.0], #min_size = 1, #box_score_thresh=0.6, **kwargs) if pretrained: try: pretrained_dict = load_state_dict_from_url( _model_urls['fasterrcnn_' + arch_str + '_fpn_coco'], progress=True) model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if (k in model_dict) and ( model_dict[k].shape == pretrained_dict[k].shape) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) #overwrite_eps(model, 0.0) for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = 0.0 except Exception as e: #print(e) print("No pretrained coco model found for fasterrcnn_" + arch_str) print("This does not affect the backbone.") return model.train()
def get_fasterrcnn_model_swin(arch_str, num_classes, pretrained=False, pretrained_backbone=True, **kwargs): """Creates FasterRCNN model with swin transformer backbone""" anchor_sizes = ( (32, ), (64, ), (128, ), (256, ), ) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) #roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0','1','2','3'], # output_size=7, # sampling_ratio=2) img_size = 224 if arch_str in "swin_tiny swin_small".split() else 384 window_size = 7 if arch_str in "swin_tiny swin_small".split() else 12 depths = [2, 2, 6, 2] if arch_str == "swin_tiny" else [2, 2, 18, 2] scale_factors = { "swin_tiny": 1.0, "swin_small": 1.5, "swin_base": 2.0, "swin_large": 2.0 } sf = scale_factors[arch_str] embed_dim = int(96 * sf) fpn_cin = [int(96 * sf * 2**i) for i in range(4)] #fpn_cin = [int(i*sf) for i in [96, 192, 384, 768]] backbone = SwinTransformerFPN(img_size=img_size, window_size=window_size, embed_dim=embed_dim, depths=depths, fpn_cin=fpn_cin, fpn_cout=256) if pretrained_backbone: sd = load_state_dict_from_url(_model_urls[f'{arch_str}_{img_size}'], progress=True, map_location=default_device())['model'] sd_model = backbone.state_dict() sd = {k: v for k, v in sd.items() if k in sd_model.keys()} sd_model.update(sd) backbone.load_state_dict(sd_model) model = FasterRCNN( backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, #box_roi_pool=roi_pooler, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, image_mean=[0.0, 0.0, 0.0], # already normalized by fastai image_std=[1.0, 1.0, 1.0], #min_size=IMG_SIZE, #max_size=IMG_SIZE, **kwargs) return model.train()
def _init_test_anchor_generator(self): anchor_sizes = tuple((x, ) for x in [32, 64, 128]) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) return anchor_generator
import torch import torchvision from torchvision.models.detection import MaskRCNN from torchvision.models.detection.anchor_utils import AnchorGenerator backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7,sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14,sampling_ratio=2) MaskRCNN_mobile_model = MaskRCNN(backbone,num_classes=2,rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,mask_roi_pool=mask_roi_pooler) if __name__ == '__main__': model = MaskRCNN_mobile_model model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] predictions = model(x)