def __init__(self, cfg, input_shape: List[ShapeSpec]): """ Arguments: in_channels (int): number of channels of the input feature """ super().__init__() # TODO: Implement the sigmoid version first. self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES head_configs = { "cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE), "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE), "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, False) } norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM self.num_levels = len(input_shape) in_channels = [s.channels for s in input_shape] assert len( set(in_channels)) == 1, "Each level must have the same channel!" in_channels = in_channels[0] self.in_channels_to_top_module = in_channels for head in head_configs: tower = [] num_convs, use_deformable = head_configs[head] for i in range(num_convs): if use_deformable and i == num_convs - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) if norm == "GN": tower.append(nn.GroupNorm(32, in_channels)) elif norm == "NaiveGN": tower.append(NaiveGroupNorm(32, in_channels)) elif norm == "BN": tower.append( ModuleListDial([ nn.BatchNorm2d(in_channels) for _ in range(self.num_levels) ])) elif norm == "SyncBN": tower.append( ModuleListDial([ NaiveSyncBatchNorm(in_channels) for _ in range(self.num_levels) ])) tower.append(nn.ReLU()) self.add_module('{}_tower'.format(head), nn.Sequential(*tower)) self.cls_logits = nn.Conv2d(in_channels, self.num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.ctrness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) if cfg.MODEL.FCOS.USE_SCALE: self.scales = nn.ModuleList( [Scale(init_value=1.0) for _ in range(self.num_levels)]) else: self.scales = None for modules in [ self.cls_tower, self.bbox_tower, self.share_tower, self.cls_logits, self.bbox_pred, self.ctrness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value)
def __init__(self, cfg, input_shape: List[ShapeSpec]): """ Arguments: in_channels (int): number of channels of the input feature """ super().__init__() self.num_classes = cfg.MODEL.DTInst.NUM_CLASSES self.fpn_strides = cfg.MODEL.DTInst.FPN_STRIDES self.num_codes = cfg.MODEL.DTInst.NUM_CODE self.use_gcn_in_mask = cfg.MODEL.DTInst.USE_GCN_IN_MASK self.gcn_kernel_size = cfg.MODEL.DTInst.GCN_KERNEL_SIZE self.mask_size = cfg.MODEL.DTInst.MASK_SIZE self.if_whiten = cfg.MODEL.DTInst.WHITEN head_configs = { "cls": (cfg.MODEL.DTInst.NUM_CLS_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE), "bbox": (cfg.MODEL.DTInst.NUM_BOX_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE), "share": (cfg.MODEL.DTInst.NUM_SHARE_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE), "mask": (cfg.MODEL.DTInst.NUM_MASK_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE) } self.type_deformable = cfg.MODEL.DTInst.TYPE_DEFORMABLE self.last_deformable = cfg.MODEL.DTInst.LAST_DEFORMABLE norm = None if cfg.MODEL.DTInst.NORM == "none" else cfg.MODEL.DTInst.NORM in_channels = [s.channels for s in input_shape] assert len( set(in_channels)) == 1, "Each level must have the same channel!" in_channels = in_channels[0] for head in head_configs: tower = [] num_convs, use_deformable = head_configs[head] for i in range(num_convs): # conv type. if use_deformable: if self.last_deformable: if i == num_convs - 1: conv_func = DFConv2d type_func = self.type_deformable else: conv_func = nn.Conv2d type_func = "Conv2d" else: conv_func = DFConv2d type_func = self.type_deformable else: conv_func = nn.Conv2d type_func = "Conv2d" # conv operation. if type_func == "DCNv1": tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, with_modulated_dcn=False)) elif type_func == "DCNv2": tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, with_modulated_dcn=True)) elif type_func == "Conv2d": tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) else: raise NotImplementedError # norm. if norm == "GN": tower.append(nn.GroupNorm(32, in_channels)) elif norm == "NaiveGN": tower.append(NaiveGroupNorm(32, in_channels)) # activation. tower.append(nn.ReLU()) self.add_module('{}_tower'.format(head), nn.Sequential(*tower)) self.cls_logits = nn.Conv2d(in_channels, self.num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.ctrness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # self.residual = nn.Sequential( # nn.Conv2d(in_channels * 2 + self.mask_size ** 2, in_channels, kernel_size=3, stride=1, padding=1), # nn.GroupNorm(32, in_channels), # nn.ReLU(), # nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1), # nn.GroupNorm(32, in_channels), # nn.ReLU(), # nn.Conv2d(in_channels, self.mask_size ** 2, kernel_size=1, stride=1, padding=0), # ) self.residual = nn.Sequential( nn.Conv2d(in_channels * 3, in_channels, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1), nn.ReLU(), ) # self.residual = nn.Sequential( # nn.Conv2d(in_channels, in_channels * 2, kernel_size=3, stride=1, padding=1), # nn.ReLU(), # nn.Conv2d(in_channels * 2, in_channels, kernel_size=3, stride=1, padding=1), # nn.ReLU(), # ) self.code_transform = nn.Conv2d(in_channels, 3, kernel_size=3, stride=1, padding=1) torch.nn.init.constant_(self.code_transform.bias[0], 1) torch.nn.init.constant_(self.code_transform.bias[1], 0) torch.nn.init.constant_(self.code_transform.bias[2], 0.75) if self.use_gcn_in_mask: self.mask_pred = GCN(in_channels, self.num_codes, k=self.gcn_kernel_size) else: self.mask_pred = nn.Conv2d(in_channels, self.num_codes, kernel_size=3, stride=1, padding=1) if cfg.MODEL.DTInst.USE_SCALE: self.scales = nn.ModuleList( [Scale(init_value=1.0) for _ in self.fpn_strides]) else: self.scales = None for modules in [ self.cls_tower, self.bbox_tower, self.share_tower, self.cls_logits, self.bbox_pred, self.ctrness, self.mask_tower, self.mask_pred, self.residual ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.DTInst.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value)