def group_norm(out_channels, affine=True, divisor=1): out_channels = out_channels // divisor dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 return nn.GroupNorm( get_group_gn(out_channels, dim_per_gp, num_groups), out_channels, eps, affine )
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(FCOSSharedHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 self.identity = cfg.MODEL.FCOS.RESIDUAL_CONNECTION shared_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): shared_tower.append( nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) shared_tower.append(nn.GroupNorm(32, in_channels)) shared_tower.append(nn.ReLU()) setattr(self, 'shared_tower', nn.Sequential(*shared_tower)) self.dense_points = cfg.MODEL.FCOS.DENSE_POINTS self.cls_logits = nn.Conv(in_channels, num_classes * self.dense_points, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv(in_channels, 4 * self.dense_points, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv(in_channels, 1 * self.dense_points, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.shared_tower, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv): nn.init.gauss_(l.weight, std=0.01) nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)])
def __init__( self, input_depth, output_depth, kernel, stride, pad, no_bias, use_relu, bn_type, group=1, *args, **kwargs ): super(ConvBNRelu, self).__init__() assert use_relu in ["relu", None] if isinstance(bn_type, (list, tuple)): assert len(bn_type) == 2 assert bn_type[0] == "gn" gn_group = bn_type[1] bn_type = bn_type[0] assert bn_type in ["bn", "af", "gn", None] assert stride in [1, 2, 4] op = Conv2d( input_depth, output_depth, kernel_size=kernel, stride=stride, padding=pad, bias=not no_bias, groups=group, *args, **kwargs ) nn.init.kaiming_normal_(op.weight, mode="fan_out", nonlinearity="relu") if op.bias is not None: nn.init.constant_(op.bias, 0.0) self.add_module("conv", op) if bn_type == "bn": bn_op = BatchNorm2d(output_depth) elif bn_type == "gn": bn_op = nn.GroupNorm(num_groups=gn_group, num_channels=output_depth) elif bn_type == "af": bn_op = FrozenBatchNorm2d(output_depth) if bn_type is not None: self.add_module("bn", bn_op) if use_relu == "relu": self.add_module("relu", nn.ReLU())
def __init__(self, cin, cout, zdim=128, nf=64): super(ConfNet, self).__init__() network = [ nn.Conv(cin, nf, 4, stride=2, padding=1, bias=False), nn.GroupNorm(16, nf), nn.LeakyReLU(scale=0.2), nn.Conv(nf, (nf * 2), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 2), (nf * 2)), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 2), (nf * 4), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 4), (nf * 4)), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 4), (nf * 8), 4, stride=2, padding=1, bias=False), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 8), zdim, 4, stride=1, padding=0, bias=False), nn.ReLU() ] network += [ nn.ConvTranspose(zdim, (nf * 8), 4, padding=0, bias=False), nn.ReLU(), nn.ConvTranspose((nf * 8), (nf * 4), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 4), (nf * 4)), nn.ReLU(), nn.ConvTranspose((nf * 4), (nf * 2), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 2), (nf * 2)), nn.ReLU() ] self.network = nn.Sequential(*network) out_net1 = [ nn.ConvTranspose((nf * 2), nf, 4, stride=2, padding=1, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.ConvTranspose(nf, nf, 4, stride=2, padding=1, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.Conv(nf, 2, 5, stride=1, padding=2, bias=False), nn.Softplus() ] self.out_net1 = nn.Sequential(*out_net1) out_net2 = [ nn.Conv((nf * 2), 2, 3, stride=1, padding=1, bias=False), nn.Softplus() ] self.out_net2 = nn.Sequential(*out_net2)
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(EmbedMaskHead, self).__init__() # TODO: Implement the sigmoid version first. self.fpn_strides = cfg.MODEL.EMBED_MASK.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.EMBED_MASK.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.EMBED_MASK.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.EMBED_MASK.USE_DCN_IN_TOWER num_classes = cfg.MODEL.EMBED_MASK.NUM_CLASSES - 1 embed_dim = cfg.MODEL.EMBED_MASK.EMBED_DIM prior_margin = cfg.MODEL.EMBED_MASK.PRIOR_MARGIN self.init_sigma_bias = math.log(-math.log(0.5) / (prior_margin**2)) cls_tower = [] bbox_tower = [] mask_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.FCOS.NUM_CONVS - 1: #conv_func = DFConv2d pass else: conv_func = nn.Conv cls_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) mask_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) mask_tower.append(nn.GroupNorm(32, in_channels)) mask_tower.append(nn.ReLU()) setattr(self, 'cls_tower', nn.Sequential(*cls_tower)) setattr(self, 'bbox_tower', nn.Sequential(*bbox_tower)) self.cls_logits = nn.Conv(in_channels, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv(in_channels, 1, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv): nn.init.gauss_(l.weight, std=0.01) nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.EMBED_MASK.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)]) ########### Mask Predictions ############ # proposal embedding self.proposal_spatial_embed_pred = nn.Conv(in_channels, 2, kernel_size=3, stride=1, padding=1, bias=True) self.proposal_other_embed_pred = nn.Conv(in_channels, embed_dim - 2, kernel_size=3, stride=1, padding=1, bias=True) for modules in [ self.proposal_spatial_embed_pred, self.proposal_other_embed_pred ]: for l in modules.modules(): if isinstance(l, nn.Conv): nn.init.gauss_(l.weight, std=0.01) nn.init.constant_(l.bias, 0) # proposal margin self.proposal_margin_pred = nn.Conv(in_channels, 1, kernel_size=3, stride=1, padding=1, bias=True) nn.init.gauss_(self.proposal_margin_pred.weight, std=0.01) nn.init.constant_(self.proposal_margin_pred.bias, self.init_sigma_bias) # pixel embedding setattr(self, 'mask_tower', nn.Sequential(*mask_tower)) self.pixel_spatial_embed_pred = nn.Conv(in_channels, 2, kernel_size=3, stride=1, padding=1, bias=True) self.pixel_other_embed_pred = nn.Conv(in_channels, embed_dim - 2, kernel_size=3, stride=1, padding=1, bias=True) for modules in [ self.mask_tower, self.pixel_spatial_embed_pred, self.pixel_other_embed_pred ]: for l in modules.modules(): if isinstance(l, nn.Conv): nn.init.gauss_(l.weight, std=0.01) nn.init.constant_(l.bias, 0) self.position_scale = Scale(init_value=1.0)
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(FCOSHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): cls_tower.append( nn.Conv(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( nn.Conv(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) setattr(self, 'cls_tower', nn.Sequential(*cls_tower)) setattr(self, 'bbox_tower', nn.Sequential(*bbox_tower)) self.dense_points = cfg.MODEL.FCOS.DENSE_POINTS self.cls_logits = nn.Conv(in_channels, num_classes * self.dense_points, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv(in_channels, 4 * self.dense_points, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv(in_channels, 1 * self.dense_points, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv): nn.init.gauss_(l.weight, std=0.01) nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) nn.init.constant_(self.cls_logits.bias, bias_value) self.cfg = cfg self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)])
def __init__(self, cin, cout, zdim=128, nf=64, activation=nn.Tanh): super(EDDeconv, self).__init__() network = [ nn.Conv(cin, nf, 4, stride=2, padding=1, bias=False), nn.GroupNorm(16, nf), nn.LeakyReLU(scale=0.2), nn.Conv(nf, (nf * 2), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 2), (nf * 2)), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 2), (nf * 4), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 4), (nf * 4)), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 4), (nf * 8), 4, stride=2, padding=1, bias=False), nn.LeakyReLU(scale=0.2), nn.Conv((nf * 8), zdim, 4, stride=1, padding=0, bias=False), nn.ReLU() ] network += [ nn.ConvTranspose(zdim, (nf * 8), 4, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv((nf * 8), (nf * 8), 3, stride=1, padding=1, bias=False), nn.ReLU(), nn.ConvTranspose((nf * 8), (nf * 4), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 4), (nf * 4)), nn.ReLU(), nn.Conv((nf * 4), (nf * 4), 3, stride=1, padding=1, bias=False), nn.GroupNorm((16 * 4), (nf * 4)), nn.ReLU(), nn.ConvTranspose((nf * 4), (nf * 2), 4, stride=2, padding=1, bias=False), nn.GroupNorm((16 * 2), (nf * 2)), nn.ReLU(), nn.Conv((nf * 2), (nf * 2), 3, stride=1, padding=1, bias=False), nn.GroupNorm((16 * 2), (nf * 2)), nn.ReLU(), nn.ConvTranspose((nf * 2), nf, 4, stride=2, padding=1, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.Conv(nf, nf, 3, stride=1, padding=1, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.Upsample(scale_factor=2, mode='nearest'), nn.Conv(nf, nf, 3, stride=1, padding=1, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.Conv(nf, nf, 5, stride=1, padding=2, bias=False), nn.GroupNorm(16, nf), nn.ReLU(), nn.Conv(nf, cout, 5, stride=1, padding=2, bias=False) ] if (activation is not None): network += [activation()] self.network = nn.Sequential(*network)
def __init__(self, layers, num_groups=32): super().__init__(layers, norm_layer=lambda x: nn.GroupNorm(num_groups, x))