def __init__(self, dim_in, spatial_scale): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(roi_convx_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.MRCNN.ROI_XFORM_METHOD resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS dilation = cfg.MRCNN.CONVX_HEAD.DILATION self.blocks = [] for layer_idx in range(num_stacked_convs): layer_name = "mask_fcn{}".format(layer_idx + 1) module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.add_module(layer_name, module) self.dim_in = conv_dim self.blocks.append(layer_name) self.dim_out = self.dim_in if cfg.MRCNN.CONVX_HEAD.USE_WS: self = convert_conv2convws_model(self) for m in self.modules(): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity="relu") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, dim_in, spatial_scale, norm='bn'): super().__init__() self.dim_in = dim_in[-1] if cfg.BACKBONE.RESNET.USE_ALIGN: block = res.AlignedBottleneck else: if cfg.BACKBONE.RESNET.BOTTLENECK: block = res.Bottleneck # not use the original Bottleneck module else: block = res.BasicBlock self.expansion = block.expansion self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3 self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN self.norm = norm self.radix = cfg.BACKBONE.RESNET.RADIX layers = cfg.BACKBONE.RESNET.LAYERS self.base_width = cfg.BACKBONE.RESNET.WIDTH stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.inplanes = self.dim_in c5_stride = min(resolution) // 7 self.layer4 = self._make_layer(block, 512, layers[3], c5_stride, dilation=c5_dilation, conv=stage_with_conv[3], context=stage_with_context[3]) self.dim_out = self.stage_out_dim[-1] del self.conv1 del self.bn1 del self.relu del self.maxpool del self.layer1 del self.layer2 del self.layer3 del self.avgpool del self.fc self._init_weights()
def __init__(self, dim_in, spatial_scale): super(RoIOPLDHead, self).__init__() self.num_points = cfg.OPLD.NUM_POINTS self.roi_feat_size = cfg.OPLD.ROI_FEAT_SIZE self.num_convs = cfg.OPLD.ROI_HEAD.NUM_CONVS self.point_feat_channels = cfg.OPLD.ROI_HEAD.POINT_FEAT_CHANNELS self.neighbor_points = cfg.OPLD.ROI_HEAD.NEIGHBOR_POINTS self.conv_out_channels = self.point_feat_channels * self.num_points self.class_agnostic = False self.dim_in = dim_in[-1] self.whole_map_size = self.roi_feat_size * 4 self.convs = [] conv_kernel_size = 3 stride = 1 for i in range(self.num_convs): in_channels = (self.dim_in if i == 0 else self.conv_out_channels) padding = (conv_kernel_size - 1) // 2 self.convs.append( nn.Sequential( nn.Conv2d(in_channels, self.conv_out_channels, kernel_size=conv_kernel_size, stride=stride, padding=padding), nn.GroupNorm(32, self.conv_out_channels, eps=1e-5), nn.ReLU(inplace=True))) self.convs = nn.Sequential(*self.convs) self.forder_trans = self._build_trans( nn.ModuleList()) # first-order feature transition self.sorder_trans = self._build_trans( nn.ModuleList()) # second-order feature transition method = cfg.OPLD.ROI_XFORM_METHOD resolution = cfg.OPLD.ROI_XFORM_RESOLUTION sampling_ratio = cfg.OPLD.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.dim_out = [self.conv_out_channels]
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in[-1] method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION xconvs = [] for ix in range(num_stacked_convs): xconvs.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True) ) self.dim_in = conv_dim self.add_module("xconvs", nn.Sequential(*xconvs)) input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False) self.dim_out = mlp_dim if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS: self = convert_conv2convws_model(self)
def __init__(self, dim_in, spatial_scale): super(roi_convx_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.UVRCNN.ROI_XFORM_METHOD resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS dilation = cfg.UVRCNN.CONVX_HEAD.DILATION self.blocks = [] for layer_idx in range(num_stacked_convs): layer_name = "UV_fcn{}".format(layer_idx + 1) module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.add_module(layer_name, module) self.dim_in = conv_dim self.blocks.append(layer_name) self.dim_out = self.dim_in
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in[-1] method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn) self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn) self.dim_out = mlp_dim if cfg.FAST_RCNN.MLP_HEAD.USE_WS: self = convert_conv2convws_model(self)
def __init__(self, dim_in, spatial_scale): super(roi_gce_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.PRCNN.ROI_XFORM_METHOD resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_nl = cfg.HRCNN.GCE_HEAD.USE_NL use_bn = cfg.HRCNN.GCE_HEAD.USE_BN use_gn = cfg.HRCNN.GCE_HEAD.USE_GN conv_dim = cfg.HRCNN.GCE_HEAD.CONV_DIM asppv3_dim = cfg.HRCNN.GCE_HEAD.ASPPV3_DIM num_convs_before_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3 asppv3_dilation = cfg.HRCNN.GCE_HEAD.ASPPV3_DILATION num_convs_after_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3 # convx before asppv3 module before_asppv3_list = [] for _ in range(num_convs_before_asppv3): before_asppv3_list.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = conv_dim self.conv_before_asppv3 = nn.Sequential( *before_asppv3_list) if len(before_asppv3_list) else None # asppv3 module self.asppv3 = [] self.asppv3.append( make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) for dilation in asppv3_dilation: self.asppv3.append( make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.asppv3 = nn.ModuleList(self.asppv3) self.im_pool = nn.Sequential( nn.AdaptiveAvgPool2d(1), make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim feat_list = [] feat_list.append( make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) if use_nl: feat_list.append( NonLocal2d(conv_dim, int(conv_dim * cfg.HRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)) self.feat = nn.Sequential(*feat_list) self.dim_in = conv_dim # convx after asppv3 module assert num_convs_after_asppv3 >= 1 after_asppv3_list = [] for _ in range(num_convs_after_asppv3): after_asppv3_list.append( make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = conv_dim self.conv_after_asppv3 = nn.Sequential( *after_asppv3_list) if len(after_asppv3_list) else None self.dim_out = self.dim_in
def __init__(self, dim_in, spatial_scale, norm='bn'): super().__init__() self.dim_in = dim_in[-1] if cfg.BACKBONE.RESNET.USE_ALIGN: block = res.AlignedBottleneck else: if cfg.BACKBONE.RESNET.BOTTLENECK: block = res.Bottleneck # not use the original Bottleneck module else: block = res.BasicBlock self.expansion = block.expansion self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3 self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN self.norm = norm layers = cfg.BACKBONE.RESNET.LAYERS self.base_width = cfg.BACKBONE.RESNET.WIDTH stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION self.inplanes = self.dim_in c5_stride = 2 if c5_dilation == 1 else 1 self.layer4 = self._make_layer(block, 512, layers[3], c5_stride, dilation=c5_dilation, conv=stage_with_conv[3], context=stage_with_context[3]) self.conv_new = nn.Sequential( nn.Conv2d(512 * self.expansion, 256, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True)) self.dim_in = 256 method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM self.fc1 = nn.Linear(input_size, mlp_dim) self.fc2 = nn.Linear(mlp_dim, mlp_dim) self.dim_out = mlp_dim del self.conv1 del self.bn1 del self.relu del self.maxpool del self.layer1 del self.layer2 del self.layer3 del self.avgpool del self.fc self._init_weights()