def __init__(self, cfg): super(PRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO self.conv1 = Conv2d(3, 32, 3, stride=1, padding=1) self.conv2 = Conv2d(32, 32, 3, stride=1, padding=1) self.conv3 = Conv2d(32, 64, 3, stride=1, padding=1) self.conv4 = Conv2d(64, 64, 3, stride=1, padding=1) self.conv5 = Conv2d(64, 128, 3, stride=1, padding=1) self.conv6 = Conv2d(128, 128, 3, stride=1, padding=1) self.conv7 = Conv2d(128, 256, 3, stride=1, padding=1) self.conv8 = Conv2d(256, 256, 3, stride=1, padding=1) # pdb.set_trace() self.pooler1 = Pooler( output_size=(25, 25), scales=(1., ), sampling_ratio=sampling_ratio, ) self.p1 = nn.MaxPool2d(3, 2, 1) self.pooler2 = Pooler( output_size=(25, 25), scales=(0.5, ), sampling_ratio=sampling_ratio, ) self.p2 = nn.MaxPool2d(3, 2, 1) self.pooler3 = Pooler( output_size=(25, 25), scales=(0.25, ), sampling_ratio=sampling_ratio, ) self.p3 = nn.MaxPool2d(3, 2, 1) self.pooler4 = Pooler( output_size=(25, 25), scales=(0.125, ), sampling_ratio=sampling_ratio, ) self.posconv1 = Conv2d(480, 256, 3, stride=1, padding=1) self.posconv2 = Conv2d(256, 32, 3, stride=1, padding=1) for layer in [ self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7, self.conv8, self.posconv1, self.posconv2 ]: nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(layer.bias, 0)
def __init__(self, cfg, in_channels): super(FPN2ORNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN nOrientation = cfg.MODEL.OR_RESNETS.ORIENTATION self.pooler = pooler self.orn6 = ORConv2d(int(input_size / nOrientation), int(input_size * 4 / nOrientation), arf_config=nOrientation, kernel_size=3, stride=2) self.orn7 = ORConv2d(int(input_size * 4 / nOrientation), int(representation_size * 2 / nOrientation), arf_config=nOrientation, kernel_size=3, stride=2) nn.init.kaiming_uniform_(self.orn6.weight, a=1) nn.init.constant_(self.orn6.bias, 0) nn.init.kaiming_uniform_(self.orn7.weight, a=1) nn.init.constant_(self.orn7.bias, 0) self.out_channels = representation_size
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, drop_last=True, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.pooler = pooler self.fc6 = nn.Linear(input_size, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) for l in [self.fc6, self.fc7]: nn.init.kaiming_uniform_(l.weight, a=1) nn.init.constant_(l.bias, 0)
def __init__(self, config, pretrained=None): super(ResNet50Conv5ROIFeatureExtractor, self).__init__() resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, drop_last=False, ) stage = resnet.StageSpec(index=5, block_count=3, return_features=False) head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage,), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, stride_init=None, ) if pretrained: state_dict = torch.load(pretrained) load_state_dict(head, state_dict, strict=False) self.pooler = pooler self.head = head
def __init__(self, cfg): """ Arguments: cfg: YACS config node containing configuration settings """ super(VLineFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.VLINE_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.VLINE_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.VLINE_HEAD.POOLER_SAMPLING_RATIO input_size = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS layers = cfg.MODEL.VLINE_HEAD.CONV_LAYERS self.pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio) next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "vp_mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels, half_out=False, cat_all_levels=False): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, in_channels=in_channels, cat_all_levels=cat_all_levels, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) if half_out: out_dim = int(representation_size / 2) else: out_dim = representation_size self.fc7 = make_fc(representation_size, out_dim, use_gn) self.out_channels = out_dim
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() self.cfg = cfg self.in_channels = 1024 self.out_channels = cfg.REID.OUT_CHANNELS self.fc = make_fc(self.in_channels, self.out_channels) if self.cfg.REID.USE_DIFF_FEAT: resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) if self.cfg.MODEL.RETINANET_ON: scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler
def __init__(self, config): super(ResNet50Conv5ROIFeatureExtractor, self).__init__() resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) stage = resnet.StageSpec(index=4, block_count=3, return_features=False) head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage, ), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, stride_init=None, res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, dilation=config.MODEL.RESNETS.RES5_DILATION) self.pooler = pooler self.head = head
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(AlignHead, self).__init__() # TODO: Implement the sigmoid version first. resolution = cfg.MODEL.ALIGN.POOLER_RESOLUTION canonical_scale = cfg.MODEL.ALIGN.POOLER_CANONICAL_SCALE self.scales = cfg.MODEL.ALIGN.POOLER_SCALES self.pooler = Pooler( output_size=resolution, scales=self.scales, sampling_ratio=1, canonical_scale=canonical_scale, mode='bezier') for head in ['rec']: tower = [] conv_block = conv_with_kaiming_uniform( True, True, False, False) for i in range(cfg.MODEL.ALIGN.NUM_CONVS): tower.append( conv_block(in_channels, in_channels, 3, 1)) self.add_module('{}_tower'.format(head), nn.Sequential(*tower)) self.predict_type = cfg.MODEL.ALIGN.PREDICTOR if self.predict_type == "ctc": self.predictor = CTCPredictor(cfg, in_channels) elif self.predict_type == "attention": self.predictor = ATTPredictor(cfg, in_channels) else: raise("Unknown recognition predictor.")
def __init__(self, cfg, in_channels): super(BB8KeypointRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_features = in_channels layers = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.CONV_LAYERS next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "conv_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() pooler = Pooler(cfg.MODEL.ROI_MASK_HEAD) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, config, in_channels, RCNN_top=None): super(ResNet50Conv5ROIFeatureExtractorFlatten, self).__init__() resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) stage = resnet.StageSpec(index=4, block_count=3, return_features=False) head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage, ), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, stride_init=None, res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, dilation=config.MODEL.RESNETS.RES5_DILATION) self.pooler = pooler self.head = head hidden_channels = head.out_channels use_gn = config.MODEL.ROI_BOX_HEAD.USE_GN self.out_channels = config.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( make_fc(hidden_channels, self.out_channels, use_gn), nn.ReLU())
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn ) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO if cfg.MODEL.ROI_HEADS.USE_CASCADE_POOLING: pooler = CascadePooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) else: pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg): super().__init__() self.cfg = cfg.clone() resolution = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.MLP_HEAD_DIM self.pooler = pooler self.fc6 = nn.Linear(input_size, representation_size) self.fc6_bn = nn.BatchNorm1d(representation_size) self.fc7 = nn.Linear(representation_size, representation_size) self.fc7_bn = nn.BatchNorm1d(representation_size) nn.init.constant_(self.fc6_bn.weight, 1.0) nn.init.constant_(self.fc7_bn.weight, 1.0) for l in [self.fc6, self.fc7]: # Caffe2 implementation uses XavierFill, which in fact # corresponds to kaiming_uniform_ in PyTorch XavierFill(l.weight) nn.init.constant_(l.bias, 0)
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler # @depreated # self.fc6 = nn.Linear(input_size, representation_size) # self.fc7 = nn.Linear(representation_size, representation_size) # for l in [self.fc6, self.fc7]: # # Caffe2 implementation uses XavierFill, which in fact # # corresponds to kaiming_uniform_ in PyTorch # nn.init.kaiming_uniform_(l.weight, a=1) # nn.init.constant_(l.bias, 0) self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn)
def __init__(self, cfg, in_channels): super(FPNDetNetFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD) input_size = in_channels * resolution**2 self.use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = nn.Conv2d( in_channels, in_channels * 4, kernel_size=resolution, stride=resolution, padding=0, bias=False if self.use_gn else True, ) if self.use_gn: self.gn6 = group_norm(in_channels * 4) self.fc7 = nn.Conv2d( in_channels * 4, in_channels * 4, kernel_size=1, stride=1, padding=0, bias=False if self.use_gn else True, ) if self.use_gn: self.gn7 = group_norm(in_channels * 4) self.out_channels = in_channels * 4
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV block = cfg.MODEL.DECONV.BLOCK if use_deconv: use_gn = False use_gw = False next_feature = input_size self.blocks = [] if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm': self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_gw=use_gw, use_deconv=use_deconv, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(FPNXconv1fcFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, cfg=cfg) self.pooler = pooler use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION use_ws = cfg.MODEL.USE_WS xconvs = [] for ix in range(num_stacked_convs): if use_ws: xconvs.append( Conv2dWS(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True)) else: xconvs.append( nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True)) in_channels = conv_head_dim if use_gn: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) for modules in [ self.xconvs, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) if not use_gn: torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.fc6 = make_fc(input_size, representation_size, use_gn=False) self.out_channels = representation_size
def __init__(self, cfg, in_channels, architecture=None): super(AutoPanoptic_MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] self.architecture = None if 'AutoPanoptic' in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR and \ 'search' not in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR: assert architecture is not None, 'architecture not specified in AutoPanoptic mask head' assert len(architecture) == len( cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS) self.architecture = architecture for layer_idx, layer_features in enumerate(layers, 1): if architecture is None: _ops = nn.ModuleList() for i in range(len(head_ss_keys)): _ops.append( make_layer(head_ss_keys[i], next_feature, layer_features, relu=False, gn=True)) next_feature = layer_features self.blocks.append(_ops) else: _ops = make_layer(head_ss_keys[architecture[layer_idx - 1]], next_feature, layer_features, relu=False, gn=True) next_feature = layer_features self.blocks.append(_ops) self.add_module( 'AutoPanoptic_mask_fcn_{}'.format(layer_idx), _ops ) # inconsistent module name between search and single model can incur problem in model reloading self.out_channels = layer_features
def __init__(self, cfg): super(Box3dPCFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX3D_HEAD.POOLER_RESOLUTION scales = (1., ) sampling_ratio = cfg.MODEL.ROI_BOX3D_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler
def __init__(self, cfg): super(make_roi_box_feature_extractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler
def init_pooler(self): """ build roi pooler """ scales = (0.25, 0.125, 0.0625, 0.03125 ) # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = 2 # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO resolution = 14 # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION self.collector = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.num_levels = len(self.collector.poolers) self.output_size = (resolution, resolution)
def __init__(self, cfg, in_channels, RCNN_top=None): super(BottomUpFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.pooler = pooler self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2ROIFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION # 7 scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # 2 pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.out_channels = in_channels
def __init__(self, cfg): self.cfg = cfg.clone() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler self.avgpool = nn.AvgPool2d(kernel_size=resolution, stride=resolution)
def __init__(self, cfg): super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION pooler = Pooler( output_size=(resolution, resolution), scales=cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES, sampling_ratio=cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO, ) self.pooler = pooler layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS next_feature = cfg.MODEL.BACKBONE.OUT_CHANNELS self.blocks = [] self.use_attn = False if cfg.MODEL.ROI_MASK_HEAD.ATTN == "" else True # Determine whether upsampling is necessary from the resolution # if cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / (2.0 * resolution) == 2.0: # use_upsample = True # else: # use_upsample = False use_upsample = \ True if (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / resolution) == 4.0 \ else False for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) # if layer_idx % 2 == 1 and use_upsample: # module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0) # else: # module = Conv2d(next_feature, layer_features, 3, 1, 1) if layer_idx == 3 and use_upsample: module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0) else: module = Conv2d(next_feature, layer_features, 3, 1, 1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) if self.use_attn and layer_idx in [2]: attn_name = "mask_attn{}".format(layer_idx) size = (layer_features, resolution, resolution) self.add_module(attn_name, RoIAttnModule(cfg, size)) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(SupportFPN2ROIFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM # default to 1024 use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler