Ejemplo n.º 1
0
    def __init__(self,
                 pretrained=True,
                 average_pool=True,
                 semantic=True,
                 final_dim=1024):
        """
        :param average_pool: whether or not to average pool the representations
        :param pretrained: Whether we need to load from scratch
        :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes)
        """
        super(SimpleDetector, self).__init__()
        USE_IMAGENET_PRETRAINED = True
        # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py
        backbone = _load_resnet_imagenet(
            pretrained=pretrained
        ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained)

        self.backbone = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool,
            backbone.layer1,
            backbone.layer2,
            backbone.layer3,
            # backbone.layer4
        )
        self.roi_align = RoIAlign((7, 7) if USE_IMAGENET_PRETRAINED else
                                  (14, 14),
                                  spatial_scale=1 / 16,
                                  sample_num=0)

        if semantic:
            self.mask_dims = 32
            self.object_embed = torch.nn.Embedding(num_embeddings=81,
                                                   embedding_dim=128)
            self.mask_upsample = torch.nn.Conv2d(
                1,
                self.mask_dims,
                kernel_size=3,
                stride=2 if USE_IMAGENET_PRETRAINED else 1,
                padding=1,
                bias=True)
        else:
            self.object_embed = None
            self.mask_upsample = None

        after_roi_align = [backbone.layer4]
        self.final_dim = final_dim
        if average_pool:
            after_roi_align += [nn.AvgPool2d(7, stride=1), Flattener()]

        self.after_roi_align = torch.nn.Sequential(*after_roi_align)

        self.obj_downsample = torch.nn.Sequential(
            torch.nn.Dropout(p=0.1),
            torch.nn.Linear(2048 + (128 if semantic else 0), final_dim),
            torch.nn.ReLU(inplace=True),
        )
        self.regularizing_predictor = torch.nn.Linear(2048, 81)
Ejemplo n.º 2
0
    def __init__(self,
                 roi_layer_type='RoIAlign',
                 featmap_stride=16,
                 output_size=16,
                 sampling_ratio=0,
                 pool_mode='avg',
                 aligned=True,
                 with_temporal_pool=True,
                 temporal_pool_mode='avg',
                 with_global=False):
        super().__init__()
        self.roi_layer_type = roi_layer_type
        assert self.roi_layer_type in ['RoIPool', 'RoIAlign']
        self.featmap_stride = featmap_stride
        self.spatial_scale = 1. / self.featmap_stride

        self.output_size = output_size
        self.sampling_ratio = sampling_ratio
        self.pool_mode = pool_mode
        self.aligned = aligned

        self.with_temporal_pool = with_temporal_pool
        self.temporal_pool_mode = temporal_pool_mode

        self.with_global = with_global

        try:
            from mmcv.ops import RoIAlign, RoIPool
        except (ImportError, ModuleNotFoundError):
            raise ImportError('Failed to import `RoIAlign` and `RoIPool` from '
                              '`mmcv.ops`. The two modules will be used in '
                              '`SingleRoIExtractor3D`! ')

        if self.roi_layer_type == 'RoIPool':
            self.roi_layer = RoIPool(self.output_size, self.spatial_scale)
        else:
            self.roi_layer = RoIAlign(
                self.output_size,
                self.spatial_scale,
                sampling_ratio=self.sampling_ratio,
                pool_mode=self.pool_mode,
                aligned=self.aligned)
        self.global_pool = nn.AdaptiveAvgPool2d(self.output_size)
Ejemplo n.º 3
0
def _test_roialign_gradcheck(device, dtype):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import RoIAlign
    except ModuleNotFoundError:
        pytest.skip('RoIAlign op is not successfully compiled')
    if dtype is torch.half:
        pytest.skip('grad check does not support fp16')
    for case in inputs:
        np_input = np.array(case[0])
        np_rois = np.array(case[1])

        x = torch.tensor(np_input,
                         dtype=dtype,
                         device=device,
                         requires_grad=True)
        rois = torch.tensor(np_rois, dtype=dtype, device=device)

        froipool = RoIAlign((pool_h, pool_w), spatial_scale, sampling_ratio)

        gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
Ejemplo n.º 4
0
def test_roialign():
    try:
        from mmcv.ops import RoIAlign
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    # roi align config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2.], [3., 4.]], [[4., 3.],
                                        [2., 1.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]

    wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio,
                             'avg', True).cuda()
    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input).cuda()
        rois = torch.from_numpy(np_rois).cuda()

        with torch.no_grad():
            torch.onnx.export(wrapped_model, (input, rois),
                              onnx_file,
                              export_params=True,
                              keep_initializers_as_inputs=True,
                              input_names=['input', 'rois'],
                              output_names=['roi_feat'],
                              opset_version=11)
        onnx_model = onnx.load(onnx_file)

        # create trt engine and wrapper
        opt_shape_dict = {
            'input': [list(input.shape),
                      list(input.shape),
                      list(input.shape)],
            'rois': [list(rois.shape),
                     list(rois.shape),
                     list(rois.shape)]
        }
        trt_engine = onnx2trt(onnx_model,
                              opt_shape_dict,
                              fp16_mode=fp16_mode,
                              max_workspace_size=max_workspace_size)
        save_trt_engine(trt_engine, trt_file)
        trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat'])

        with torch.no_grad():
            trt_outputs = trt_model({'input': input, 'rois': rois})
            trt_roi_feat = trt_outputs['roi_feat']

        # compute pytorch_output
        with torch.no_grad():
            pytorch_roi_feat = wrapped_model(input, rois)

        # allclose
        if os.path.exists(onnx_file):
            os.remove(onnx_file)
        if os.path.exists(trt_file):
            os.remove(trt_file)
        assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
Ejemplo n.º 5
0
    def __init__(self,
                 pretrained=True,
                 average_pool=True,
                 semantic=True,
                 final_dim=1024,
                 layer_fix=True):
        """
        :param average_pool: whether or not to average pool the representations
        :param pretrained: Whether we need to load from scratch
        :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes)
        """
        super(SimpleDetector, self).__init__()
        USE_IMAGENET_PRETRAINED = True
        # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py
        backbone = _load_resnet_imagenet(
            pretrained=pretrained
        ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained)
        self.pre_backbone = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool,
            backbone.layer1,
        )
        self.layer2 = backbone.layer2
        self.cvm_2 = RegionCVM(in_channels=128 * 4, grid=[6, 6])
        self.layer3 = backbone.layer3
        self.cvm_3 = RegionCVM(in_channels=256 * 4, grid=[4, 4])
        self.roi_align = RoIAlign((7, 7) if USE_IMAGENET_PRETRAINED else
                                  (14, 14),
                                  spatial_scale=1 / 16,
                                  sampling_ratio=0)
        if semantic:
            self.mask_dims = 32
            self.object_embed = torch.nn.Embedding(num_embeddings=81,
                                                   embedding_dim=128)
            self.mask_upsample = torch.nn.Conv2d(
                1,
                self.mask_dims,
                kernel_size=3,
                stride=2 if USE_IMAGENET_PRETRAINED else 1,
                padding=1,
                bias=True)
        else:
            self.object_embed = None
            self.mask_upsample = None

        self.layer4 = backbone.layer4
        self.cvm_4 = RegionCVM(in_channels=512 * 4, grid=[1, 1])
        after_roi_align = []

        self.final_dim = final_dim
        if average_pool:
            after_roi_align += [nn.AvgPool2d(7, stride=1), Flattener()]

        self.after_roi_align = torch.nn.Sequential(*after_roi_align)

        self.obj_downsample = torch.nn.Sequential(
            torch.nn.Dropout(p=0.1),
            torch.nn.Linear(2048 + (128 if semantic else 0), final_dim),
            torch.nn.ReLU(inplace=True),
        )
        self.regularizing_predictor = torch.nn.Linear(2048, 81)

        for m in self.pre_backbone.modules():
            for p in m.parameters():
                p.requires_grad = False

        def set_bn_fix(m):
            classname = m.__class__.__name__
            if classname.find('BatchNorm') != -1:
                for p in m.parameters():
                    p.requires_grad = False

        self.layer2.apply(set_bn_fix)
        self.layer3.apply(set_bn_fix)
        self.layer4.apply(set_bn_fix)
        if layer_fix:
            for m in self.layer2.modules():
                for p in m.parameters():
                    p.requires_grad = False
            for m in self.layer3.modules():
                for p in m.parameters():
                    p.requires_grad = False
            for m in self.layer4.modules():
                for p in m.parameters():
                    p.requires_grad = False