def test_roi_pool_gradient_cuda(self): device = torch.device('cuda') layer = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device) x = torch.ones(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True) rois = torch.tensor([ [0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 0, 0, 4, 4]], dtype=self.dtype, device=device) y = layer(x, rois) s = y.sum() s.backward() gt_grad = torch.tensor([[[[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.], [2., 1., 2., 1., 2., 0., 1., 0., 1., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.], [2., 1., 2., 1., 2., 0., 1., 0., 1., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.], [2., 1., 2., 1., 2., 0., 1., 0., 1., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.], [2., 1., 2., 1., 2., 0., 1., 0., 1., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.]]]], device=device, dtype=self.dtype) assert torch.allclose(x.grad, gt_grad), 'gradient incorrect for roi_pool'
def test_roi_pool(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) pool_h = 5 pool_w = 5 model = ops.RoIPool((pool_h, pool_w), 2) self.run_model(model, [(x, rois)])
def test_roi_pool_cpu_empty_rois(self): device = torch.device('cpu') x = torch.tensor( [[[[0.1767, 1.2851, 4.2325, 4.8645, 7.1496]], [[2.5916, 4.3361, 3.8143, 6.1329, 2.0230]], [[1.4492, 3.3384, 4.0816, 6.3116, 5.1068]]]], dtype=self.dtype, device=device) rois = torch.tensor( [[0., 1., 0., 4., 0.], [0., 2., 0., 3., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 2., 0., 2., 0.]], dtype=self.dtype, device=device) pool_h, pool_w = (1, 2) roi_pool = ops.RoIPool((pool_h, pool_w), 1) y = roi_pool(x, rois) gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU empty rois' # non-contiguous y = roi_pool(x.permute(0, 1, 3, 2), rois) gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for empty rois non-contiguous'
def test_roi_pool_basic_cpu(self): device = torch.device('cpu') x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device) rois = torch.tensor( [[0, 0, 0, 4, 4]], # format is (xyxy) dtype=self.dtype, device=device) pool_h, pool_w = (5, 5) roi_pool = ops.RoIPool((pool_h, pool_w), 1) y = roi_pool(x, rois) gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU' # non-contiguous y = roi_pool(x.permute(0, 1, 3, 2), rois) gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU'
def test_roi_pool_cuda(self): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') x = torch.rand(2, 1, 10, 10, dtype=self.dtype, device=device) rois = torch.tensor( [ [0, 0, 0, 9, 9], # format is (xyxy) [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9] ], dtype=self.dtype, device=device) pool_h, pool_w = (5, 5) roi_pool = ops.RoIPool((pool_h, pool_w), 1) y = roi_pool(x, rois) gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect' y = roi_pool(x.permute(0, 1, 3, 2), rois) gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype) assert torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect'
def test_roi_pool_gradcheck_cuda(self): device = torch.device('cuda') x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True) rois = torch.tensor( [[0, 0, 0, 9, 9], [0, 0, 5, 5, 9], [0, 5, 5, 9, 9]], dtype=self.dtype, device=device) m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device) def func(input): return m(input, rois) assert gradcheck(func, (x, )), 'gradcheck failed for roi_pool CUDA' assert gradcheck( func, (x.permute(0, 1, 3, 2), )), 'gradcheck failed for roi_pool CUDA' @torch.jit.script def script_func(input, rois): return ops.roi_pool(input, rois, 5, 1.0)[0] assert gradcheck( lambda x: script_func(x, rois), (x, )), 'gradcheck failed for scripted roi_pool on CUDA'
def __init__(self, classes_num): super(AlexNet, self).__init__() self._name = 'AlexNet' self._classes_num = classes_num self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.roi_pool = ops.RoIPool(output_size=(6, 6), spatial_scale=1) self.fcs = nn.Sequential(nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout()) self.cls = nn.Linear(4096, self._classes_num + 1) self.bbox = nn.Linear(4096, 5)
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
def __init__(self): super().__init__() base = __backbones['vgg16'](pretrained=True) self.convs = base.features[:-1] self.pooler = ops.RoIPool((7,7), 1./16.) self.fc = base.classifier[:-1] self.loc = LocalisationModule() self.dis = DiscoveryModule(4096, 20)
def __init__(self): super().__init__() self.base = models.vgg16(pretrained=True) self.convs = base.features[:-1] self.pooler = ops.RoIPool((7,7), 1./16.) self.fc = base.classifier[:-1] self.instance_selector = InstanceSelector(in_features, 20) self.detector_estimator = DetectorEstimator(in_features, 21) self.lambda = 0.0 self.schedule = self.build_schedule(epochs, schedule)
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in res = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.roi_pool = ops.RoIPool(output_size=(res, res), spatial_scale=spatial_scale) self.spatial_scale = spatial_scale self.dim_out = hidden_dim = 4096 roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc1 = nn.Linear(dim_in * roi_size**2, hidden_dim) self.fc2 = nn.Linear(hidden_dim, hidden_dim)
def test_roi_pool_gradcheck_cpu(self): device = torch.device('cpu') x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True) rois = torch.tensor([ [0, 0, 0, 9, 9], [0, 0, 5, 5, 9], [0, 5, 5, 9, 9]], dtype=self.dtype, device=device) m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device) def func(input): return m(input, rois) assert gradcheck(func, (x,)), 'gradcheck failed for roi_pool CPU' assert gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CPU'
import torch import torch.nn as nn import torch.nn.functional as F import torchvision.ops as ops x = torch.randn(1, 1, 10, 10) * 10 print(x) y1 = F.adaptive_max_pool2d(x, (5, 5)) print(y1) roi_pool = ops.RoIPool(output_size=(5, 5), spatial_scale=1) rois = torch.tensor([[0, 0, 0, 9, 9]], dtype=torch.float) y2 = roi_pool(x, rois) print(y2) rois = [torch.tensor([[0, 0, 9, 9]], dtype=torch.float)] y3 = roi_pool(x, rois) print(y3)
def __init__(self, cfg): super().__init__() self.device = cfg.MODEL.DEVICE self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES # Test mode details self.test_nms_threshold = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST self.test_score_threshold = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST self.test_max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.test_out_layers = cfg.MODEL.PREDICTION_LAYERS # Normalization details self.pixel_mean = torch.tensor(cfg.MODEL.PIXEL_MEAN).view(1,3,1,1).to(self.device) self.pixel_std = torch.tensor(cfg.MODEL.PIXEL_STD).view(1,3,1,1).to(self.device) # Set up the model base backbone_name = cfg.MODEL.BACKBONE.NAME dilated = backbone_name.endswith('_dilated') backbone_name = backbone_name[:-len('_dilated')] if dilated else backbone_name pretrained = cfg.MODEL.BACKBONE.WEIGHTS convs, fc = _backbones[backbone_name](pretrained=='imagenet') if pretrained not in ['imagenet', '']: utils.load_weights(convs, fc, pretrained) if dilated: convs = dilate_convs(convs) utils.freeze_convs(convs, cfg.MODEL.BACKBONE.FREEZE_CONVS) self.convs = convs self.fc = fc # Set up the pooling layer scale = utils.get_conv_scale(convs) res = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pool_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE if pool_type.lower() == 'roipool': self.pooler = ops.RoIPool((res, res), scale) else: raise NotImplementedError(f'Pooler type {pool_type} not implemented') # Set up the heads fc_features = utils.get_out_features(fc) nc, nd = cfg.MODEL.MIDN_HEAD.NUM_CLASSIFIER, cfg.MODEL.MIDN_HEAD.NUM_DETECTOR if nc > 0 and nd > 0: self.midn = heads.MultipleMidnHead( in_features=fc_features, out_features=self.num_classes, t_cls=cfg.MODEL.MIDN_HEAD.CLASSIFIER_TEMP, t_det=cfg.MODEL.MIDN_HEAD.DETECTOR_TEMP, k_cls=nc, k_det=nd ) nr = cfg.MODEL.REFINEMENT_HEAD.K if nr > 0: self.refinement = heads.RefinementHeads( in_features=fc_features, out_features=self.num_classes+1, #BG Class k=3 ) if cfg.TEST.AUG.ENABLED: self.tta = self._init_tta_fn(cfg) else: self.tta = lambda x: x self.build_loss = LOSS_FUNCTIONS[cfg.MODEL.LOSS_FN] self.init_layers()