Ejemplo n.º 1
0
def get_bisenet(dataset='pascal_paper',
                backbone='resnet18',
                pretrained_base=True,
                pretrained=False,
                root=os.path.expanduser('~/.torch/models'),
                **kwargs):
    acronyms = {
        'pascal_voc': 'voc',
        'pascal_paper': 'voc',
        'pascal_aug': 'voc',
        'ade20k': 'ade',
        'coco': 'coco',
        'citys': 'citys',
    }
    from data import datasets
    config = bisenet_spec[backbone]
    if config[2] is not None:
        feat = nn.ModuleList(
            _parse_network(config[1],
                           outputs=config[2],
                           pretrained=pretrained_base))
    else:
        feat = config[1]
    model = BiseNet(config[0],
                    datasets[dataset].NUM_CLASS,
                    backbone=feat,
                    **kwargs)
    if pretrained:
        from model.model_store import get_model_file
        model.load_state_dict(
            torch.load(
                get_model_file('bisenet_%s_%s' % (backbone, acronyms[dataset]),
                               root=root)))
    return model
Ejemplo n.º 2
0
def centernet_dla34_dcn_coco(pretrained=False,
                             pretrained_base=False,
                             **kwargs):
    from data.mscoco.detection_cv import COCODetection
    classes = COCODetection.CLASSES
    pretrained_base = False if pretrained else pretrained_base
    name = 'centernet_dla34_dcn_coco'
    norm_layer = kwargs.get(
        'norm_layer') if 'norm_layer' in kwargs else nn.BatchNorm2d
    norm_kwargs = kwargs.get(
        'norm_kwargs') if 'norm_kwargs' in kwargs else None
    outputs = [[1], [2], [3], [4], [5], [6]]
    features = nn.ModuleList(
        _parse_network('dla34',
                       outputs,
                       pretrained_base,
                       norm_layer=norm_layer,
                       norm_kwargs=norm_kwargs))
    deconv_layers = DLADeConvLayer([16, 32, 64, 128, 256, 512],
                                   down_ratio=4,
                                   last_level=5,
                                   norm_layer=norm_layer,
                                   norm_kwargs=norm_kwargs)
    return get_centernet(name,
                         features,
                         deconv_layers,
                         heads={
                             'hm': 80,
                             'wh': 2,
                             'reg': 2
                         },
                         head_conv=256,
                         classes=classes,
                         pretrained=pretrained,
                         **kwargs)
Ejemplo n.º 3
0
    def __init__(self,
                 nclass,
                 aux,
                 backbone='resnet50',
                 jpu=False,
                 dilated=True,
                 height=None,
                 width=None,
                 base_size=520,
                 crop_size=480,
                 keep_shape=False,
                 pretrained_base=True,
                 **kwargs):
        super(SegBaseModel, self).__init__()
        self.aux = aux
        self.nclass = nclass
        self.jpu = jpu
        self.keep_shape = keep_shape
        if isinstance(backbone, torch.nn.ModuleList) and len(backbone) == 3:
            self.base1, self.base2, self.base3 = backbone[0], backbone[
                1], backbone[2]
        else:
            if backbone == 'resnet50':
                outputs = [[11, 3], [12, 5], [13, 2]]
            elif backbone == 'resnet101':
                outputs = [[11, 3], [12, 22], [13, 2]]
            elif backbone == 'resnet152':
                outputs = [[11, 7], [12, 35], [13, 2]]
            else:
                raise RuntimeError('unknown backbone: {}'.format(backbone))

            # TODO: change
            self.base1, self.base2, self.base3 = _parse_network(
                backbone + '_v1s',
                outputs,
                pretrained=pretrained_base,
                dilated=dilated)

        height = height if height is not None else crop_size
        width = width if width is not None else crop_size
        self._up_kwargs = (height, width)
        self.base_size = base_size
        self.crop_size = crop_size
        if jpu:
            self.JPU = JPU([512, 1024, 2048], width=512)
Ejemplo n.º 4
0
def mask_rcnn_resnet50_v1b_coco(pretrained=False, pretrained_base=True, **kwargs):
    r"""Mask RCNN model from the paper
    "He, K., Gkioxari, G., Doll&ar, P., & Girshick, R. (2017). Mask R-CNN"

    Parameters
    ----------
    pretrained : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    pretrained_base : bool or str, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `True`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = mask_rcnn_resnet50_v1b_coco(pretrained=True)
    >>> print(model)
    """
    from data.mscoco.detection_cv import COCODetection
    classes = COCODetection.CLASSES
    pretrained_base = False if pretrained else pretrained_base
    outputs = [[6, 5], [7, 2]]
    features, top_features = _parse_network('resnet50_v1b', outputs, pretrained_base)

    train_patterns = '|'.join(['.*dense', '.*rpn', '.*mask',
                               '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
    return get_mask_rcnn(
        name='resnet50_v1b', dataset='coco', pretrained=pretrained,
        features=features, top_features=top_features, classes=classes,
        mask_channels=256, rcnn_max_dets=1000,
        short=800, max_size=1333, train_patterns=train_patterns,
        nms_thresh=0.5, nms_topk=-1, post_nms=-1, in_channels=2048,
        roi_mode='align', roi_size=(14, 14), strides=16, clip=4.42,
        rpn_in_channel=1024, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32),
        ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7,
        rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=0,
        num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25,
        **kwargs)
Ejemplo n.º 5
0
def faster_rcnn_resnet101_v1d_coco(pretrained=False,
                                   pretrained_base=True,
                                   **kwargs):
    r"""Faster RCNN model from the paper
    "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards
    real-time object detection with region proposal networks"

    Parameters
    ----------
    pretrained : bool, optional, default is False
        Load pretrained weights.
    pretrained_base : bool or str, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `True`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = get_faster_rcnn_resnet101_v1d_coco(pretrained=True)
    >>> print(model)
    """
    from data.mscoco.detection_cv import COCODetection
    classes = COCODetection.CLASSES
    pretrained_base = False if pretrained else pretrained_base
    outputs = [[12, 21], [13, 2]]
    features, top_features = _parse_network('resnet101_v1d', outputs,
                                            pretrained_base)

    train_patterns = '|'.join(
        ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
    return get_faster_rcnn(name='resnet101_v1d',
                           dataset='coco',
                           pretrained=pretrained,
                           features=features,
                           top_features=top_features,
                           classes=classes,
                           short=800,
                           max_size=1333,
                           train_patterns=train_patterns,
                           nms_thresh=0.5,
                           nms_topk=-1,
                           post_nms=-1,
                           roi_mode='align',
                           roi_size=(14, 14),
                           strides=16,
                           clip=4.42,
                           rpn_in_channel=1024,
                           rpn_channel=1024,
                           base_size=16,
                           scales=(2, 4, 8, 16, 32),
                           ratios=(0.5, 1, 2),
                           alloc_size=(128, 128),
                           rpn_nms_thresh=0.7,
                           rpn_train_pre_nms=12000,
                           rpn_train_post_nms=2000,
                           rpn_test_pre_nms=6000,
                           rpn_test_post_nms=1000,
                           rpn_min_size=0,
                           num_sample=128,
                           pos_iou_thresh=0.5,
                           pos_ratio=0.25,
                           max_num_gt=100,
                           **kwargs)