def __init__(self, dim_in, spatial_scale): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(roi_convx_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.MRCNN.ROI_XFORM_METHOD resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS dilation = cfg.MRCNN.CONVX_HEAD.DILATION self.blocks = [] for layer_idx in range(num_stacked_convs): layer_name = "mask_fcn{}".format(layer_idx + 1) module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.add_module(layer_name, module) self.dim_in = conv_dim self.blocks.append(layer_name) self.dim_out = self.dim_in if cfg.MRCNN.CONVX_HEAD.USE_WS: self = convert_conv2convws_model(self) for m in self.modules(): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity="relu") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = sum(dim_in) self.spatial_scale = spatial_scale hrfpn_dim = cfg.FPN.HRFPN.DIM # 256 use_lite = cfg.FPN.HRFPN.USE_LITE use_bn = cfg.FPN.HRFPN.USE_BN use_gn = cfg.FPN.HRFPN.USE_GN if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG': self.pooling = F.avg_pool2d else: self.pooling = F.max_pool2d self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING # 1 self.num_output = len(dim_in) + self.num_extra_pooling # 5 self.reduction_conv = make_conv(self.dim_in, hrfpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn) self.dim_in = hrfpn_dim self.fpn_conv = nn.ModuleList() for i in range(self.num_output): self.fpn_conv.append( make_conv(self.dim_in, hrfpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)) self.dim_in = hrfpn_dim if self.num_extra_pooling: self.spatial_scale.append(self.spatial_scale[-1] * 0.5) self.dim_out = [self.dim_in for _ in range(self.num_output)] self._init_weights()
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in[-1] method = cfg.FAST_RCNN.ROI_XFORM_METHOD resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION xconvs = [] for ix in range(num_stacked_convs): xconvs.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True) ) self.dim_in = conv_dim self.add_module("xconvs", nn.Sequential(*xconvs)) input_size = self.dim_in * resolution[0] * resolution[1] mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False) self.dim_out = mlp_dim if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS: self = convert_conv2convws_model(self)
def __init__(self, dim_in): super(convx_head, self).__init__() self.dim_in = dim_in + cfg.PRCNN.NUM_PARSING num_stacked_convs = cfg.PRCNN.PARSINGIOU.NUM_STACKED_CONVS # default = 2 conv_dim = cfg.PRCNN.PARSINGIOU.CONV_DIM mlp_dim = cfg.PRCNN.PARSINGIOU.MLP_DIM use_bn = cfg.PRCNN.PARSINGIOU.USE_BN use_gn = cfg.PRCNN.PARSINGIOU.USE_GN convx = [] for _ in range(num_stacked_convs): layer_stride = 1 if _ < num_stacked_convs - 1 else 2 convx.append( make_conv( self.dim_in, conv_dim, kernel=3, stride=layer_stride, use_bn=use_bn, use_gn=use_gn, use_relu=True ) ) self.dim_in = conv_dim self.convx = nn.Sequential(*convx) self.avgpool = nn.AdaptiveAvgPool2d(1) self.parsingiou_fc1 = make_fc(self.dim_in, mlp_dim, use_bn=False, use_gn=False) self.parsingiou_fc2 = make_fc(mlp_dim, mlp_dim, use_bn=False, use_gn=False) self.dim_out = mlp_dim # Initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, dim_in, spatial_scale): super(roi_convx_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.UVRCNN.ROI_XFORM_METHOD resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS dilation = cfg.UVRCNN.CONVX_HEAD.DILATION self.blocks = [] for layer_idx in range(num_stacked_convs): layer_name = "UV_fcn{}".format(layer_idx + 1) module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.add_module(layer_name, module) self.dim_in = conv_dim self.blocks.append(layer_name) self.dim_out = self.dim_in
def __init__(self, dim_in): super(Hier_output, self).__init__() num_classes = cfg.HRCNN.NUM_CLASSES num_convs = cfg.HRCNN.OUTPUT_NUM_CONVS conv_dim = cfg.HRCNN.OUTPUT_CONV_DIM use_lite = cfg.HRCNN.OUTPUT_USE_LITE use_bn = cfg.HRCNN.OUTPUT_USE_BN use_gn = cfg.HRCNN.OUTPUT_USE_GN use_dcn = cfg.HRCNN.OUTPUT_USE_DCN prior_prob = cfg.HRCNN.PRIOR_PROB self.norm_reg_targets = cfg.HRCNN.NORM_REG_TARGETS self.centerness_on_reg = cfg.HRCNN.CENTERNESS_ON_REG cls_tower = [] bbox_tower = [] for i in range(num_convs): conv_type = 'deform' if use_dcn and i == num_convs - 1 else 'normal' cls_tower.append( make_conv(dim_in, conv_dim, kernel=3, stride=1, dilation=1, use_dwconv=use_lite, conv_type=conv_type, use_bn=use_bn, use_gn=use_gn, use_relu=True, kaiming_init=False, suffix_1x1=use_lite)) bbox_tower.append( make_conv(dim_in, conv_dim, kernel=3, stride=1, dilation=1, use_dwconv=use_lite, conv_type=conv_type, use_bn=use_bn, use_gn=use_gn, use_relu=True, kaiming_init=False, suffix_1x1=use_lite)) dim_in = conv_dim self.add_module('cls_tower', nn.Sequential(*cls_tower)) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.cls_deconv = ConvTranspose2d(conv_dim, conv_dim, 2, 2, 0) self.bbox_deconv = ConvTranspose2d(conv_dim, conv_dim, 2, 2, 0) self.cls_logits = Conv2d(conv_dim, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = Conv2d(conv_dim, 4, kernel_size=3, stride=1, padding=1) self.centerness = Conv2d(conv_dim, 1, kernel_size=3, stride=1, padding=1) # Initialization for m in self.modules(): if isinstance(m, nn.Conv2d): torch.nn.init.normal_(m.weight, std=0.01) if m.bias is not None: torch.nn.init.constant_(m.bias, 0) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # initialize the bias for focal loss bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(1)])
def __init__(self, dim_in, spatial_scale): super(roi_gce_head, self).__init__() self.dim_in = dim_in[-1] method = cfg.PRCNN.ROI_XFORM_METHOD resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO pooler = Pooler( method=method, output_size=resolution, scales=spatial_scale, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_nl = cfg.HRCNN.GCE_HEAD.USE_NL use_bn = cfg.HRCNN.GCE_HEAD.USE_BN use_gn = cfg.HRCNN.GCE_HEAD.USE_GN conv_dim = cfg.HRCNN.GCE_HEAD.CONV_DIM asppv3_dim = cfg.HRCNN.GCE_HEAD.ASPPV3_DIM num_convs_before_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3 asppv3_dilation = cfg.HRCNN.GCE_HEAD.ASPPV3_DILATION num_convs_after_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3 # convx before asppv3 module before_asppv3_list = [] for _ in range(num_convs_before_asppv3): before_asppv3_list.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = conv_dim self.conv_before_asppv3 = nn.Sequential( *before_asppv3_list) if len(before_asppv3_list) else None # asppv3 module self.asppv3 = [] self.asppv3.append( make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) for dilation in asppv3_dilation: self.asppv3.append( make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.asppv3 = nn.ModuleList(self.asppv3) self.im_pool = nn.Sequential( nn.AdaptiveAvgPool2d(1), make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim feat_list = [] feat_list.append( make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) if use_nl: feat_list.append( NonLocal2d(conv_dim, int(conv_dim * cfg.HRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)) self.feat = nn.Sequential(*feat_list) self.dim_in = conv_dim # convx after asppv3 module assert num_convs_after_asppv3 >= 1 after_asppv3_list = [] for _ in range(num_convs_after_asppv3): after_asppv3_list.append( make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = conv_dim self.conv_after_asppv3 = nn.Sequential( *after_asppv3_list) if len(after_asppv3_list) else None self.dim_out = self.dim_in
def __init__(self, dim_in, spatial_scale): super().__init__() self.dim_in = dim_in[-1] # 2048 self.spatial_scale = spatial_scale fpn_dim = cfg.FPN.DIM # 256 use_lite = cfg.FPN.USE_LITE use_bn = cfg.FPN.USE_BN use_gn = cfg.FPN.USE_GN min_level, max_level = get_min_max_levels() # 2, 6 self.num_backbone_stages = len(dim_in) - ( min_level - cfg.FPN.LOWEST_BACKBONE_LVL ) # 4 (cfg.FPN.LOWEST_BACKBONE_LVL=2) # P5 in self.p5_in = make_conv(self.dim_in, fpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn) # P5 out self.p5_out = make_conv(fpn_dim, fpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) # fpn module self.fpn_in = [] self.fpn_out = [] for i in range(self.num_backbone_stages - 1): # skip the top layer px_in = make_conv(dim_in[-i - 2], fpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn) # from P4 to P2 px_out = make_conv(fpn_dim, fpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) self.fpn_in.append(px_in) self.fpn_out.append(px_out) self.fpn_in = nn.ModuleList(self.fpn_in) # [P4, P3, P2] self.fpn_out = nn.ModuleList(self.fpn_out) self.dim_in = fpn_dim # P6. Original FPN P6 level implementation from CVPR'17 FPN paper. if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == cfg.FPN.HIGHEST_BACKBONE_LVL + 1: self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0) self.spatial_scale.append(self.spatial_scale[-1] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > cfg.FPN.HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = nn.ModuleList() if cfg.FPN.USE_C5: self.dim_in = dim_in[-1] for i in range(cfg.FPN.HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules.append( make_conv(self.dim_in, fpn_dim, kernel=3, stride=2, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)) self.dim_in = fpn_dim self.spatial_scale.append(self.spatial_scale[-1] * 0.5) # self.spatial_scale.reverse() # [1/64, 1/32, 1/16, 1/8, 1/4] # self.dim_out = [self.dim_in] num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `self.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.spatial_scale = self.spatial_scale[:num_roi_levels] self.dim_out = [self.dim_in for _ in range(num_roi_levels)] if cfg.FPN.USE_WS: self = convert_conv2convws_model(self) self._init_weights()
def __init__(self, dim_in, spatial_scale): super(fused_head, self).__init__() self.dim_in = dim_in[-1] self.fusion_level = cfg.SEMSEG.SEMSEG_HEAD.FUSION_LEVEL self.num_convs = cfg.SEMSEG.SEMSEG_HEAD.NUM_CONVS num_ins = cfg.SEMSEG.SEMSEG_HEAD.NUM_IN_STAGE conv_dim = cfg.SEMSEG.SEMSEG_HEAD.CONV_DIM use_bn = cfg.SEMSEG.SEMSEG_HEAD.USE_BN use_gn = cfg.SEMSEG.SEMSEG_HEAD.USE_GN lateral_convs = [] for layer_idx in range(num_ins): lateral_convs.append( make_conv(self.dim_in, conv_dim, kernel=1, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True, inplace=False)) self.add_module('lateral_convs', nn.Sequential(*lateral_convs)) self.dim_in = conv_dim convs = [] for layer_idx in range(self.num_convs): convs.append( make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)) self.dim_in = conv_dim self.add_module('convs', nn.Sequential(*convs)) self.conv_embedding = make_conv(self.dim_in, dim_in[-1], kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) self.dim_out = self.dim_in # Initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)