def __init__(self, dim_in, spatial_scale, norm='bn'): super().__init__() self.dim_in = dim_in[-1] if cfg.BACKBONE.RESNET.USE_ALIGN: block = res.AlignedBottleneck else: if cfg.BACKBONE.RESNET.BOTTLENECK: block = res.Bottleneck # not use the original Bottleneck module else: block = res.BasicBlock self.expansion = block.expansion self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3 self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN self.norm = norm layers = cfg.BACKBONE.RESNET.LAYERS self.base_width = cfg.BACKBONE.RESNET.WIDTH stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION self.inplanes = self.dim_in c5_stride = 2 if c5_dilation == 1 else 1 self.layer4 = self._make_layer(block, 512, layers[3], c5_stride, dilation=c5_dilation, conv=stage_with_conv[3], context=stage_with_context[3]) self.conv_new = nn.Sequential( nn.Conv2d(512 * self.expansion, 256, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True) ) self.dim_in = 256 method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM self.fc1 = nn.Linear(input_size, mlp_dim) self.fc2 = nn.Linear(mlp_dim, mlp_dim) self.dim_out = mlp_dim del self.conv1 del self.bn1 del self.relu del self.maxpool del self.layer1 del self.layer2 del self.layer3 del self.avgpool del self.fc self._init_weights()
def __init__(self, dim_in, spatial_scale, norm='bn'): super().__init__() self.dim_in = dim_in[-1] if cfg.BACKBONE.RESNET.USE_ALIGN: block = res.AlignedBottleneck else: if cfg.BACKBONE.RESNET.BOTTLENECK: block = res.Bottleneck # not use the original Bottleneck module else: block = res.BasicBlock self.expansion = block.expansion self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3 self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN self.norm = norm layers = cfg.BACKBONE.RESNET.LAYERS self.base_width = cfg.BACKBONE.RESNET.WIDTH stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.inplanes = self.dim_in c5_stride = min(resolution) // 7 self.layer4 = self._make_layer(block, 512, layers[3], c5_stride, dilation=c5_dilation, conv=stage_with_conv[3], context=stage_with_context[3]) self.dim_out = self.stage_out_dim[-1] del self.conv1 del self.bn1 del self.relu del self.maxpool del self.layer1 del self.layer2 del self.layer3 del self.avgpool del self.fc self._init_weights()
def __init__(self, dim_in, spatial_scale, stage=1): super().__init__() self.dim_in = dim_in[-1] method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION xconvs = [] for ix in range(num_stacked_convs): xconvs.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)) self.dim_in = conv_dim self.add_module("xconvs", nn.Sequential(*xconvs)) input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False) self.dim_out = mlp_dim self.stage = stage if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS: self = convert_conv2convws_model(self)
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in[-1] method = cfg.GRID_RCNN.ROI_XFORM_METHOD resolution = cfg.GRID_RCNN.ROI_XFORM_RESOLUTION_CLS sampling_ratio = cfg.GRID_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.GRID_RCNN.MLP_HEAD.MLP_DIM use_bn = cfg.GRID_RCNN.MLP_HEAD.USE_BN use_gn = cfg.GRID_RCNN.MLP_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn) self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn) self.dim_out = mlp_dim if cfg.GRID_RCNN.MLP_HEAD.USE_WS: self = convert_conv2convws_model(self)
def __init__(self, dim_in, spatial_scale): super(roi_convx_head, self).__init__() self.dim_in = dim_in[-1] resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.HRCNN.CONVX_HEAD.USE_LITE use_bn = cfg.HRCNN.CONVX_HEAD.USE_BN use_gn = cfg.HRCNN.CONVX_HEAD.USE_GN conv_dim = cfg.HRCNN.CONVX_HEAD.CONV_DIM num_stacked_convs = cfg.HRCNN.CONVX_HEAD.NUM_STACKED_CONVS dilation = cfg.HRCNN.CONVX_HEAD.DILATION self.blocks = [] for layer_idx in range(num_stacked_convs): layer_name = "hier_fcn{}".format(layer_idx + 1) module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.add_module(layer_name, module) self.dim_in = conv_dim self.blocks.append(layer_name) self.dim_out = self.dim_in
def __init__(self, dim_in, spatial_scale, stage): super(roi_grid_head, self).__init__() self.grid_points = cfg.GRID_RCNN.GRID_POINTS if not cfg.GRID_RCNN.CASCADE_MAPPING_ON else \ cfg.GRID_RCNN.CASCADE_MAPPING_OPTION.GRID_NUM[stage] self.roi_feat_size = cfg.GRID_RCNN.ROI_FEAT_SIZE self.num_convs = cfg.GRID_RCNN.GRID_HEAD.NUM_CONVS self.point_feat_channels = cfg.GRID_RCNN.GRID_HEAD.POINT_FEAT_CHANNELS self.conv_out_channels = self.point_feat_channels * self.grid_points self.class_agnostic = False self.dim_in = dim_in[-1] assert self.grid_points >= 4 self.grid_size = int(np.sqrt(self.grid_points)) if self.grid_size * self.grid_size != self.grid_points: raise ValueError('grid_points must be a square number') # the predicted heatmap is half of whole_map_size if not isinstance(self.roi_feat_size, int): raise ValueError('Only square RoIs are supporeted in Grid R-CNN') self.whole_map_size = self.roi_feat_size * 4 self.convs = [] conv_kernel_size = 3 for i in range(self.num_convs): in_channels = (self.dim_in if i == 0 else self.conv_out_channels) stride = 2 if i == 0 else 1 padding = (conv_kernel_size - 1) // 2 self.convs.append( nn.Sequential( nn.Conv2d(in_channels, self.conv_out_channels, kernel_size=conv_kernel_size, stride=stride, padding=padding), nn.GroupNorm(4 * self.grid_points, self.conv_out_channels, eps=1e-5), nn.ReLU(inplace=True))) self.convs = nn.Sequential(*self.convs) # find the 4-neighbor of each grid point self.neighbor_points = self._get_neighbors() # total edges in the grid self.num_edges = sum([len(p) for p in self.neighbor_points]) if cfg.GRID_RCNN.FUSED_ON: self.forder_trans = self._build_trans( nn.ModuleList()) # first-order feature transition self.sorder_trans = self._build_trans( nn.ModuleList()) # second-order feature transition method = cfg.GRID_RCNN.ROI_XFORM_METHOD resolution = cfg.GRID_RCNN.ROI_XFORM_RESOLUTION_GRID sampling_ratio = cfg.GRID_RCNN.ROI_XFORM_SAMPLING_RATIO spatial_scale = [spatial_scale[0] ] if cfg.GRID_RCNN.FINEST_LEVEL_ROI else spatial_scale pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.dim_out = dim_in if cfg.GRID_RCNN.OFFSET_ON: self.offset_conv = make_conv(self.dim_in, 64, kernel=3, stride=2) self.offset_fc = make_fc(64 * 7 * 7, 4 * self.grid_points)