def _load_mobile_net_model(self): model = _MobileNetV1() state_dict = load_state_dict_from_url( 'https://www.dropbox.com/s/bd1keyo085pscfu/mobilenetv1_pretrain.pt?dl=1', map_location=self._device_control) # # load params model.load_state_dict(state_dict) return model
def resnet50_se(): model = ResNet(SEBottleneck, [3, 4, 6, 3]) state_dict = load_state_dict_from_url(model_urls["resnet50"], progress=True) model.load_state_dict(state_dict, strict=False) return model
def _resnet(arch: str, block: nn.Module, layers: List[int], pretrained: bool, progress: bool, **kwargs) -> nn.Module: model = ResNet(block, layers, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def resnet34_se(): model = ResNet(SEBasicBlock, [3, 4, 6, 3]) state_dict = load_state_dict_from_url(model_urls["resnet34"], progress=True) model.load_state_dict(state_dict, strict=False) return model
def __init__(self, input_channels=3, num_classes=1000, pretrained=True, block=Bottleneck, layers=[3, 4, 23, 3], zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet101, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0) if pretrained: state_dict = load_state_dict_from_url(model_urls['resnet101'], progress=True) model_dict = self.state_dict() pretrained_dict = {k: v for k, v in state_dict.items() if k in model_dict} model_dict.update(pretrained_dict) self.load_state_dict(pretrained_dict, strict=False)
def keypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=17, pretrained_backbone=True, **kwargs): """ Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets dictionary, containing: - boxes (``Tensor[N, 4]``): the ground-truth boxes in ``[x0, y0, x1, y1]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Tensor[N]``): the class label for each ground-truth box - keypoints (``Tensor[N, K, 3]``): the ``K`` keypoints location for each of the ``N`` instances, in the format ``[x, y, visibility]``, where ``visibility=0`` means that the keypoint is not visible. The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN, and the keypoint loss. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``Tensor[N, 4]``): the predicted boxes in ``[x0, y0, x1, y1]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction - keypoints (``Tensor[N, K, 3]``): the locations of the predicted keypoints, in ``[x, y, v]`` format. Example:: >>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['keypointrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # noqa >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) # noqa Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr Parameters ---------- pretrained progress pretrained_backbone num_classes """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def inception_v3(pretrained=False, progress=True, quantize=False, **kwargs): r"""Inception v3 model architecture from `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_. .. note:: **Important**: In contrast to the other models the inception_v3 expects tensors with a size of N x 3 x 299 x 299, so ensure your images are sized accordingly. Note that quantize = True returns a quantized model with 8 bit weights. Quantized models only support inference and run on CPUs. GPU inference is not yet supported Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr aux_logits (bool): If True, add an auxiliary branch that can improve training. Default: *True* transform_input (bool): If True, preprocesses the input according to the method with which it was trained on ImageNet. Default: *False* """ if pretrained: if "transform_input" not in kwargs: kwargs["transform_input"] = True if "aux_logits" in kwargs: original_aux_logits = kwargs["aux_logits"] kwargs["aux_logits"] = True else: original_aux_logits = False model = QuantizableInception3(**kwargs) _replace_relu(model) if quantize: # TODO use pretrained as a string to specify the backend backend = 'fbgemm' quantize_model(model, backend) else: assert pretrained in [True, False] if pretrained: if quantize: if not original_aux_logits: model.aux_logits = False model.AuxLogits = None model_url = quant_model_urls['inception_v3_google' + '_' + backend] else: model_url = inception_module.model_urls['inception_v3_google'] state_dict = load_state_dict_from_url(model_url, progress=progress) model.load_state_dict(state_dict) if not quantize: if not original_aux_logits: model.aux_logits = False model.AuxLogits = None return model
def unet_resnext50(num_classes=1, pretrained=False, progress=True): model = UResNet(resnext50, num_classes=num_classes) if pretrained: if num_classes != 1: raise ValueError('Pretrained weights are for num_classes=1 only') state_dict = load_state_dict_from_url(model_urls['unet_resnext50'], progress=progress) model.load_state_dict(state_dict) return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs): model = ResNet(block, layers, **kwargs) if pretrained: pretrained_state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) now_state_dict = model.state_dict() now_state_dict.update(pretrained_state_dict) model.load_state_dict(now_state_dict) return model
def __init__(self, alpha, pretrained=False, progress=True, block=Bottleneck, conv_makers=[Conv2Plus1DKeepTimeScaleDownsample] * 4, layers=[3, 4, 6, 3], stem=SpatialStem, input_channels=3, zero_init_residual=False): """Generic resnet video generator. Args: block (nn.Module): resnet building block conv_makers (list(functions)): generator function for each layer layers (List[int]): number of blocks per layer stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ super(MSLT, self).__init__() self.alpha = alpha self.inplanes = 64 + 64 // self.alpha self.stem = stem() self.layer1 = self._make_layer(block, conv_makers[0], 64, layers[0], stride=1) self.layer2 = self._make_layer(block, conv_makers[1], 128, layers[1], stride=2) self.layer3 = self._make_layer(block, conv_makers[2], 256, layers[2], stride=2) self.layer4 = self._make_layer(block, conv_makers[3], 512, layers[3], stride=2) # self.pooling = nn.MaxPool3d(kernel_size=(3, 1, 1), stride=(2, 1, 1), padding=(1, 0, 0)) self.fusion0 = nn.Conv3d(64 // self.alpha, 64 // self.alpha, kernel_size=(3, 1, 1), stride=(self.alpha, 1, 1), padding=(1, 0, 0),bias=False) self.fusion1 = nn.Conv3d(256 // self.alpha, 256 // self.alpha, kernel_size=(3, 1, 1), stride=(self.alpha, 1, 1), padding=(1, 0, 0),bias=False) self.fusion2 = nn.Conv3d(512 // self.alpha, 512 // self.alpha, kernel_size=(3, 1, 1), stride=(self.alpha, 1, 1), padding=(1, 0, 0,),bias=False) self.fusion3 = nn.Conv3d(1024 // self.alpha, 1024 // self.alpha, kernel_size=(3, 1, 1), stride=(self.alpha, 1, 1), padding=(1, 0, 0),bias=False) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) # init weights self._initialize_weights() if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) if pretrained: state_dict = load_state_dict_from_url(model_urls['r2plus1d_18'], progress=progress) model_dict = self.state_dict() pretrained_dict = {k: v for k, v in state_dict.items() if k in model_dict} model_dict.update(pretrained_dict) self.load_state_dict(pretrained_dict, strict=False) if input_channels != 3: self.stem[0] = nn.Conv3d(input_channels, 45, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=True)
def segnet_vgg19_bn(pretrained=False, progress=True, **kwargs): """Constructs a DeepLabV3+ model with a mobilenet backbone. """ model = SegNet(arch='segnet_vgg19_bn', **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls['segnet_vgg19_bn'], progress=progress) model.load_state_dict(state_dict) return model
def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs): if pretrained: kwargs['init_weights'] = False model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) return model
def retinanet_mobilenet(pretrained=False, progress=True, num_classes=91, pretrained_backbone=False, trainable_backbone_layers=None, min_size=320, max_size=640, **kwargs): """ Constructs a RetinaNet model with a MobileNetV3-Large backbone. It works similarly to RetinaNet with ResNet-50-FPN backbone. See `retinanet_resnet50_fpn` for more details. Example:: >>> model = torchvision.models.detection.retinanet_mobilenet_v3_large(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Args: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone """ # check default parameters and by default set it to 6 if possible trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6) if pretrained: pretrained_backbone = False backbone = mobilenet_backbone("retinanet_mobilenet_v3_large", pretrained_backbone, trainable_layers=trainable_backbone_layers) anchor_sizes = (( 16, 32, 64, 128, 256, ), ) aspect_ratios = ((0.5, 1.0, 2.0), ) model = RetinaNet(backbone, num_classes, anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['retinanet_mobilenet_v3_large_coco'], progress=progress) model.load_state_dict(state_dict) return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs): model = ResNet(block, layers, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) # TODO: wait to be fixed model.fc = nn.Linear(512 * block.expansion, 2) return model
def VGG16(pretrained, in_channels, **kwargs): model = VGG(make_layers(cfgs["D"], batch_norm = False, in_channels = in_channels), **kwargs) if pretrained: state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir="./model_data") model.load_state_dict(state_dict) del model.avgpool del model.classifier return model
def _resnet(arch, block, layers, pretrained, progress, resnet_local_path=None, **kwargs): model = ResNet(block, layers, **kwargs) if pretrained: if resnet_local_path is not None: state_dict = torch.load(resnet_local_path) else: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) return model
def _resnet(arch, block, layers, pretrained, progress, n_channels, **kwargs): model = ResNet(block, layers, n_channels=n_channels, **kwargs) if pretrained: pretrained_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model_dict = model.state_dict() model_dict = weight_transform(model_dict, pretrained_dict, n_channels) model.load_state_dict(model_dict) return model
def maskrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN, and the mask loss. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to obtain the final segmentation masks, the soft masks can be thresholded, generally with a value of 0.5 (``mask >= 0.5``) Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Example:: >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) >>> >>> # optionally, if you want to export the model to ONNX: >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ # if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = MaskRCNN(backbone, num_classes, **kwargs) # if pretrained: state_dict = load_state_dict_from_url( model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) del state_dict["roi_heads.box_predictor.bbox_pred.weight"] del state_dict["roi_heads.box_predictor.cls_score.weight"] del state_dict["roi_heads.mask_predictor.mask_fcn_logits.weight"] del state_dict["roi_heads.box_predictor.cls_score.bias"] del state_dict["roi_heads.box_predictor.bbox_pred.bias"] del state_dict["roi_heads.mask_predictor.mask_fcn_logits.bias"] model.load_state_dict(state_dict, strict=False) return model
def __load_model(self, net_class: type, url: str) -> torch.nn.Module: """Download and construct the models""" try: state_dict = load_state_dict_from_url(url, map_location=self._device_control) except urllib.error.HTTPError: #type: ignore raise DetectorModelError('Invalid model weights url: ' + url) model = net_class() model.load_state_dict(state_dict, strict=False) return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs): model = ResNet(block, layers, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress, map_location=kwargs.get( 'map_location', None)) model.load_state_dict(state_dict, strict=False) return model
def _resnet(arch, block, layers, pretrained=None, progress=True, args=None, logger=None, **kwargs): model = ResNet(block, layers, args, logger, **kwargs) if pretrained==True: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict, strict=False) elif pretrained is not None: model.load_state_dict(torch.load(pretrained)) return model
def mobilenet_v2(pretrained=False, progress=True): model = MobileNetV2() if pretrained: state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir="model_data", progress=progress) model.load_state_dict(state_dict) return model
def _load_state_dict(model, model_url, load_classifier=True): state_dict = load_state_dict_from_url(model_url, progress=False) model_dict = model.state_dict() for key in list(state_dict.keys()): if not load_classifier: if 'fc' in key: state_dict[key] = model_dict[ key] # keep classifier weights unchanged model.load_state_dict(state_dict)
def init_weights(self, pretrained, pretrained_num_classes, num_classes): if pretrained != "": state_dict = load_state_dict_from_url(pretrained, progress=True) self.load_state_dict(state_dict=state_dict, strict=False) if num_classes != pretrained_num_classes: fc = self.head.fc fc_features = fc.in_features self.head.fc = nn.Linear(fc_features, num_classes) self.head.init_weights()
def _resnet(arch: str, block: Type[Union[BasicBlock, Bottleneck]], layers: List[int], pretrained: bool, progress: bool, **kwargs: Any) -> ResNet: model = ResNet(block, layers, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def _xresnet(pretrained, expansion, layers, **kwargs): # TODO pretrain all sizes. Currently will fail with non-xrn50 url = 'https://s3.amazonaws.com/fast-ai-modelzoo/xrn50_940.pth' res = XResNet(ResBlock, expansion, layers, **kwargs) if pretrained: res.load_state_dict(load_state_dict_from_url( url, map_location='cpu')['model'], strict=False) return res
def resnext101_32x4d(pretrained=False, progress=True, **kwargs): kwargs['groups'] = 32 kwargs['width_per_group'] = 4 model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls['resnext101_32x8d'], progress=progress) model.load_state_dict(state_dict) return model
def _load_pretrained_model(model, url): pretrain_dict = load_state_dict_from_url(url) model_dict = {} state_dict = model.state_dict() for k, v in pretrain_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) model.load_state_dict(state_dict)
def _resnet(arch, block, layers, pretrained, progress, **kwargs): model = MyResNet(block, layers, **kwargs) if pretrained: print("开始下载预训练模型") state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) print("预训练模型参数加载完成") return model