def __init__(self, pretrained=True, average_pool=True, semantic=True, final_dim=1024): """ :param average_pool: whether or not to average pool the representations :param pretrained: Whether we need to load from scratch :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes) """ super(SimpleDetector, self).__init__() USE_IMAGENET_PRETRAINED = True # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py backbone = _load_resnet_imagenet( pretrained=pretrained ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained) self.backbone = nn.Sequential( backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool, backbone.layer1, backbone.layer2, backbone.layer3, # backbone.layer4 ) self.roi_align = RoIAlign((7, 7) if USE_IMAGENET_PRETRAINED else (14, 14), spatial_scale=1 / 16, sample_num=0) if semantic: self.mask_dims = 32 self.object_embed = torch.nn.Embedding(num_embeddings=81, embedding_dim=128) self.mask_upsample = torch.nn.Conv2d( 1, self.mask_dims, kernel_size=3, stride=2 if USE_IMAGENET_PRETRAINED else 1, padding=1, bias=True) else: self.object_embed = None self.mask_upsample = None after_roi_align = [backbone.layer4] self.final_dim = final_dim if average_pool: after_roi_align += [nn.AvgPool2d(7, stride=1), Flattener()] self.after_roi_align = torch.nn.Sequential(*after_roi_align) self.obj_downsample = torch.nn.Sequential( torch.nn.Dropout(p=0.1), torch.nn.Linear(2048 + (128 if semantic else 0), final_dim), torch.nn.ReLU(inplace=True), ) self.regularizing_predictor = torch.nn.Linear(2048, 81)
def __init__(self, roi_layer_type='RoIAlign', featmap_stride=16, output_size=16, sampling_ratio=0, pool_mode='avg', aligned=True, with_temporal_pool=True, temporal_pool_mode='avg', with_global=False): super().__init__() self.roi_layer_type = roi_layer_type assert self.roi_layer_type in ['RoIPool', 'RoIAlign'] self.featmap_stride = featmap_stride self.spatial_scale = 1. / self.featmap_stride self.output_size = output_size self.sampling_ratio = sampling_ratio self.pool_mode = pool_mode self.aligned = aligned self.with_temporal_pool = with_temporal_pool self.temporal_pool_mode = temporal_pool_mode self.with_global = with_global try: from mmcv.ops import RoIAlign, RoIPool except (ImportError, ModuleNotFoundError): raise ImportError('Failed to import `RoIAlign` and `RoIPool` from ' '`mmcv.ops`. The two modules will be used in ' '`SingleRoIExtractor3D`! ') if self.roi_layer_type == 'RoIPool': self.roi_layer = RoIPool(self.output_size, self.spatial_scale) else: self.roi_layer = RoIAlign( self.output_size, self.spatial_scale, sampling_ratio=self.sampling_ratio, pool_mode=self.pool_mode, aligned=self.aligned) self.global_pool = nn.AdaptiveAvgPool2d(self.output_size)
def _test_roialign_gradcheck(device, dtype): if not torch.cuda.is_available() and device == 'cuda': pytest.skip('test requires GPU') try: from mmcv.ops import RoIAlign except ModuleNotFoundError: pytest.skip('RoIAlign op is not successfully compiled') if dtype is torch.half: pytest.skip('grad check does not support fp16') for case in inputs: np_input = np.array(case[0]) np_rois = np.array(case[1]) x = torch.tensor(np_input, dtype=dtype, device=device, requires_grad=True) rois = torch.tensor(np_rois, dtype=dtype, device=device) froipool = RoIAlign((pool_h, pool_w), spatial_scale, sampling_ratio) gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
def test_roialign(): try: from mmcv.ops import RoIAlign except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') # trt config fp16_mode = False max_workspace_size = 1 << 30 # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True).cuda() for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input).cuda() rois = torch.from_numpy(np_rois).cuda() with torch.no_grad(): torch.onnx.export(wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], output_names=['roi_feat'], opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wrapper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], 'rois': [list(rois.shape), list(rois.shape), list(rois.shape)] } trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat']) with torch.no_grad(): trt_outputs = trt_model({'input': input, 'rois': rois}) trt_roi_feat = trt_outputs['roi_feat'] # compute pytorch_output with torch.no_grad(): pytorch_roi_feat = wrapped_model(input, rois) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
def __init__(self, pretrained=True, average_pool=True, semantic=True, final_dim=1024, layer_fix=True): """ :param average_pool: whether or not to average pool the representations :param pretrained: Whether we need to load from scratch :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes) """ super(SimpleDetector, self).__init__() USE_IMAGENET_PRETRAINED = True # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py backbone = _load_resnet_imagenet( pretrained=pretrained ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained) self.pre_backbone = nn.Sequential( backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool, backbone.layer1, ) self.layer2 = backbone.layer2 self.cvm_2 = RegionCVM(in_channels=128 * 4, grid=[6, 6]) self.layer3 = backbone.layer3 self.cvm_3 = RegionCVM(in_channels=256 * 4, grid=[4, 4]) self.roi_align = RoIAlign((7, 7) if USE_IMAGENET_PRETRAINED else (14, 14), spatial_scale=1 / 16, sampling_ratio=0) if semantic: self.mask_dims = 32 self.object_embed = torch.nn.Embedding(num_embeddings=81, embedding_dim=128) self.mask_upsample = torch.nn.Conv2d( 1, self.mask_dims, kernel_size=3, stride=2 if USE_IMAGENET_PRETRAINED else 1, padding=1, bias=True) else: self.object_embed = None self.mask_upsample = None self.layer4 = backbone.layer4 self.cvm_4 = RegionCVM(in_channels=512 * 4, grid=[1, 1]) after_roi_align = [] self.final_dim = final_dim if average_pool: after_roi_align += [nn.AvgPool2d(7, stride=1), Flattener()] self.after_roi_align = torch.nn.Sequential(*after_roi_align) self.obj_downsample = torch.nn.Sequential( torch.nn.Dropout(p=0.1), torch.nn.Linear(2048 + (128 if semantic else 0), final_dim), torch.nn.ReLU(inplace=True), ) self.regularizing_predictor = torch.nn.Linear(2048, 81) for m in self.pre_backbone.modules(): for p in m.parameters(): p.requires_grad = False def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.layer2.apply(set_bn_fix) self.layer3.apply(set_bn_fix) self.layer4.apply(set_bn_fix) if layer_fix: for m in self.layer2.modules(): for p in m.parameters(): p.requires_grad = False for m in self.layer3.modules(): for p in m.parameters(): p.requires_grad = False for m in self.layer4.modules(): for p in m.parameters(): p.requires_grad = False