def __init__(self, config): super(ResNet50Conv5ROIFeatureExtractor, self).__init__() resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) stage = resnet.StageSpec(index=4, block_count=3, return_features=False) head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage, ), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, stride_init=None, res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, dilation=config.MODEL.RESNETS.RES5_DILATION, cfg=config) self.pooler = pooler self.head = head if config.MODEL.DECONV.LAYERWISE_NORM: norm_type = config.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if config.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=config.MODEL.DECONV.EPS)
def __init__(self, cfg): super(FPNPredictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV: block = cfg.MODEL.DECONV.BLOCK_FC self.cls_score = Delinear(representation_size, num_classes, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.bbox_pred = Delinear(representation_size, num_classes * 4, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) else: self.cls_score = nn.Linear(representation_size, num_classes) self.bbox_pred = nn.Linear(representation_size, num_classes * 4) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0)
def __init__(self, cfg, in_channels, num_anchors): """ Arguments: cfg : config in_channels (int): number of channels of the input feature num_anchors (int): number of anchors to be predicted """ super(RPNHead, self).__init__() if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type=cfg.MODEL.DECONV.RPN_NORM_TYPE else: norm_type='none' if cfg.MODEL.DECONV.RPN_NORM_TYPE=='layernorm': self.rpn_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS) if cfg.MODEL.RPN.USE_DECONV: self.conv = Deconv(in_channels, in_channels, kernel_size=3, stride=1, padding=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type) self.cls_logits = Deconv(in_channels, num_anchors, kernel_size=1, stride=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type) self.bbox_pred = Deconv(in_channels, num_anchors * 4, kernel_size=1, stride=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type) else: self.conv = nn.Conv2d( in_channels, in_channels, kernel_size=3, stride=1, padding=1 ) self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) self.bbox_pred = nn.Conv2d( in_channels, num_anchors * 4, kernel_size=1, stride=1 ) for l in [self.conv, self.cls_logits, self.bbox_pred]: torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV block = cfg.MODEL.DECONV.BLOCK if use_deconv: use_gn = False use_gw = False next_feature = input_size self.blocks = [] if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm': self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_gw=use_gw, use_deconv=use_deconv, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor if cfg.MODEL.ROI_MASK_HEAD.USE_DECONV: block = cfg.MODEL.DECONV.BLOCK if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm': self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) self.conv5_mask = DeconvTransposed( num_inputs, dim_reduced, 2, 2, 0, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.mask_fcn_logits = Deconv( dim_reduced, num_classes, 1, 1, 0, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) else: self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW block = 0 use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV if use_delinear: use_gn = False use_gw = False block = cfg.MODEL.DECONV.BLOCK_FC #check here if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn, use_gw, use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.fc7 = make_fc(representation_size, representation_size, use_gn, use_gw, use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)
def __init__(self, config, pretrained=None): super(FastRCNNPredictor, self).__init__() stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) if config.MODEL.DECONV.LAYERWISE_NORM: norm_type = config.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if config.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or config.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=config.MODEL.DECONV.EPS) if config.MODEL.ROI_BOX_HEAD.USE_DECONV: block = config.MODEL.DECONV.BLOCK_FC self.cls_score = Delinear(num_inputs, num_classes, block=block, sync=config.MODEL.DECONV.SYNC, norm_type=norm_type) self.bbox_pred = Delinear(num_inputs, num_classes * 4, block=block, sync=config.MODEL.DECONV.SYNC, norm_type=norm_type) else: self.cls_score = nn.Linear(num_inputs, num_classes) self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) nn.init.constant_(self.cls_score.bias, 0) nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) nn.init.constant_(self.bbox_pred.bias, 0)
def __init__(self, cfg): super(FPNXconv1fcFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) xconvs = [] for ix in range(num_stacked_convs): if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV: xconvs.append( Deconv(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=True, block=cfg.MODEL.DECONV.BLOCK, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)) in_channels = conv_head_dim else: xconvs.append( nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if (use_gn or use_gw) else True)) in_channels = conv_head_dim if use_gn or use_gw: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) for modules in [ self.xconvs, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d) or isinstance(l, Deconv): torch.nn.init.normal_(l.weight, std=0.01) if not (use_gn or use_gw): torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM block = 0 use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV if use_delinear: block = cfg.MODEL.DECONV.BLOCK_FC #check here self.fc6 = make_fc(input_size, representation_size, use_gn=False, use_gw=False, use_delinear=use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)
def __init__(self, cfg): super(ResNet, self).__init__() # If we want to use the cfg in forward(), then we should make a copy # of it and store it for later use: # self.cfg = cfg.clone() # Translate string names to implementations stem_module = _STEM_MODULES[cfg.MODEL.RESNETS.STEM_FUNC] stage_specs = _STAGE_SPECS[cfg.MODEL.BACKBONE.CONV_BODY] transformation_module = _TRANSFORMATION_MODULES[cfg.MODEL.RESNETS.TRANS_FUNC] if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type=cfg.MODEL.DECONV.BOTTLENECK_NORM_TYPE else: norm_type='none' if 'Deconv' in cfg.MODEL.RESNETS.TRANS_FUNC: transformation_module=functools.partial( _TRANSFORMATION_MODULES[cfg.MODEL.RESNETS.TRANS_FUNC], block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type) # Construct the stem module self.stem = stem_module(cfg) # Constuct the specified ResNet stages num_groups = cfg.MODEL.RESNETS.NUM_GROUPS width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS stage2_bottleneck_channels = num_groups * width_per_group stage2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS self.stages = [] self.return_features = {} for stage_spec in stage_specs: name = "layer" + str(stage_spec.index) stage2_relative_factor = 2 ** (stage_spec.index - 1) bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor out_channels = stage2_out_channels * stage2_relative_factor module = _make_stage( transformation_module, in_channels, bottleneck_channels, out_channels, stage_spec.block_count, num_groups, cfg.MODEL.RESNETS.STRIDE_IN_1X1, first_stride=int(stage_spec.index > 1) + 1, ) in_channels = out_channels self.add_module(name, module) self.stages.append(name) self.return_features[name] = stage_spec.return_features # Optionally freeze (requires_grad=False) parts of the backbone self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT) if cfg.MODEL.DECONV.LAYERWISE_NORM: pass else: if cfg.MODEL.DECONV.STEM_NORM_TYPE=='layernorm': self.stem_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS) if cfg.MODEL.DECONV.BOTTLENECK_NORM_TYPE=='layernorm': self.bottleneck_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS) if cfg.MODEL.DECONV.FPN_NORM_TYPE=='layernorm': self.fpn_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS)