def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: conv_dim: the output dimension of the conv layers fc_dim: the feature dimenstion of the FC layers num_fc: the number of FC layers output_side_resolution: side resolution of the output square mask prediction """ super(CoarseMaskHead, self).__init__() # fmt: off self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM self.fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION self.input_channels = input_shape.channels self.input_h = input_shape.height self.input_w = input_shape.width # fmt: on self.conv_layers = [] if self.input_channels > conv_dim: self.reduce_channel_dim_conv = Conv2d( self.input_channels, conv_dim, kernel_size=1, stride=1, padding=0, bias=True, activation=F.relu, ) self.conv_layers.append(self.reduce_channel_dim_conv) self.reduce_spatial_dim_conv = Conv2d( conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu ) self.conv_layers.append(self.reduce_spatial_dim_conv) input_dim = conv_dim * self.input_h * self.input_w input_dim //= 4 self.fcs = [] for k in range(num_fc): fc = nn.Linear(input_dim, self.fc_dim) self.add_module("coarse_mask_fc{}".format(k + 1), fc) self.fcs.append(fc) input_dim = self.fc_dim output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution self.prediction = nn.Linear(self.fc_dim, output_dim) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.prediction.weight, std=0.001) nn.init.constant_(self.prediction.bias, 0) for layer in self.conv_layers: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, in_channels, out_channels, *, stride=1, norm="BN", activation=None, **kwargs): """ The standard block type for ResNet18 and ResNet34. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. stride (int): Stride for the first conv. norm (str or callable): A callable that takes the number of channels and returns a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() # fmt: off self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES feature_strides = {k: v.stride for k, v in input_shape.items()} feature_channels = {k: v.channels for k, v in input_shape.items()} self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE norm = cfg.MODEL.SEM_SEG_HEAD.NORM self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT # fmt: on self.scale_heads = [] for in_feature in self.in_features: head_ops = [] head_length = max( 1, int( np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))) for k in range(head_length): norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None conv = Conv2d( feature_channels[in_feature] if k == 0 else conv_dims, conv_dims, kernel_size=3, stride=1, padding=1, bias=not norm, norm=norm_module, activation=F.relu, ) weight_init.c2_msra_fill(conv) head_ops.append(conv) if feature_strides[in_feature] != self.common_stride: head_ops.append( nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)) self.scale_heads.append(nn.Sequential(*head_ops)) self.add_module(in_feature, self.scale_heads[-1]) self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) weight_init.c2_msra_fill(self.predictor)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv: the number of conv layers conv_dim: the dimension of the conv layers norm: normalization for the conv layers """ super(MaskRCNNConvUpsampleHead, self).__init__() # fmt: off num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV input_channels = input_shape.channels cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK # fmt: on self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( input_channels if k == 0 else conv_dims, conv_dims, kernel_size=3, stride=1, padding=1, bias=not self.norm, norm=get_norm(self.norm, conv_dims), activation=F.relu, ) self.add_module("mask_fcn{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self.deconv = ConvTranspose2d( conv_dims if num_conv > 0 else input_channels, conv_dims, kernel_size=2, stride=2, padding=0, ) num_mask_classes = 1 if cls_agnostic_mask else num_classes self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0) for layer in self.conv_norm_relus + [self.deconv]: weight_init.c2_msra_fill(layer) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.predictor.weight, std=0.001) if self.predictor.bias is not None: nn.init.constant_(self.predictor.bias, 0)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM norm = cfg.MODEL.ROI_BOX_HEAD.NORM # fmt: on assert num_conv + num_fc > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not norm, norm=get_norm(norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = nn.Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: fc_dim: the output dimension of each FC layers num_fc: the number of FC layers coarse_pred_each_layer: if True, coarse prediction features are concatenated to each layer's input """ super(StandardPointHead, self).__init__() # fmt: off num_classes = cfg.MODEL.POINT_HEAD.NUM_CLASSES fc_dim = cfg.MODEL.POINT_HEAD.FC_DIM num_fc = cfg.MODEL.POINT_HEAD.NUM_FC cls_agnostic_mask = cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK self.coarse_pred_each_layer = cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER input_channels = input_shape.channels # fmt: on fc_dim_in = input_channels + num_classes self.fc_layers = [] for k in range(num_fc): fc = nn.Conv1d(fc_dim_in, fc_dim, kernel_size=1, stride=1, padding=0, bias=True) self.add_module("fc{}".format(k + 1), fc) self.fc_layers.append(fc) fc_dim_in = fc_dim fc_dim_in += num_classes if self.coarse_pred_each_layer else 0 num_mask_classes = 1 if cls_agnostic_mask else num_classes self.predictor = nn.Conv1d(fc_dim_in, num_mask_classes, kernel_size=1, stride=1, padding=0) for layer in self.fc_layers: weight_init.c2_msra_fill(layer) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.predictor.weight, std=0.001) if self.predictor.bias is not None: nn.init.constant_(self.predictor.bias, 0)
def __init__( self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32, ): super().__init__() self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.deep_stem = deep_stem if self.deep_stem: self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) else: self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, deform_modulated=False, deform_num_groups=1, ): """ Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.deform_modulated = deform_modulated if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) if deform_modulated: deform_conv_op = ModulatedDeformConv # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size offset_channels = 27 else: deform_conv_op = DeformConv offset_channels = 18 self.conv2_offset = Conv2d( bottleneck_channels, offset_channels * deform_num_groups, kernel_size=3, stride=stride_3x3, padding=1 * dilation, dilation=dilation, ) self.conv2 = deform_conv_op( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, deformable_groups=deform_num_groups, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) nn.init.constant_(self.conv2_offset.weight, 0) nn.init.constant_(self.conv2_offset.bias, 0)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, avd=False, avg_down=False, radix=1, bottleneck_width=64, ): super().__init__(in_channels=in_channels, out_channels=out_channels, bottleneck_channels=bottleneck_channels, stride=stride, num_groups=num_groups, norm=norm, activation=activation, stride_in_1x1=stride_in_1x1, dilation=dilation) self.avd = avd and (stride > 1) self.avg_down = avg_down self.radix = radix cardinality = num_groups group_width = int(bottleneck_channels * (bottleneck_width / 64.)) * cardinality # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) if in_channels != out_channels and self.avg_down: assert self.shortcut is not None self.shortcut_avgpool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, out_channels), ) if self.radix > 1: from .splat import SplAtConv2d self.conv2 = SplAtConv2d( group_width, group_width, kernel_size=3, stride=1 if self.avd else stride_3x3, padding=dilation, dilation=dilation, groups=cardinality, bias=False, radix=self.radix, norm=norm, ) else: assert hasattr(self, "conv2") if self.avd: self.avd_layer = nn.AvgPool2d(3, stride, padding=1) if self.radix > 1: for layer in [self.conv1, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) else: for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, avd=False, avg_down=False, radix=1, bottleneck_width=64, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.avd = avd and (stride > 1) self.avg_down = avg_down self.radix = radix cardinality = num_groups group_width = int(bottleneck_channels * (bottleneck_width / 64.)) * cardinality if in_channels != out_channels: if self.avg_down: self.shortcut_avgpool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, group_width), ) if self.radix > 1: from .splat import SplAtConv2d self.conv2 = SplAtConv2d( group_width, group_width, kernel_size=3, stride=1 if self.avd else stride_3x3, padding=dilation, dilation=dilation, groups=cardinality, bias=False, radix=self.radix, norm=norm, ) else: self.conv2 = Conv2d( group_width, group_width, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, group_width), ) if self.avd: self.avd_layer = nn.AvgPool2d(3, stride, padding=1) self.conv3 = Conv2d( group_width, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) if self.radix > 1: for layer in [self.conv1, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) else: for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, num_branch=3, dilations=(1, 2, 3), concat_output=False, test_branch_idx=-1, ): """ Args: num_branch (int): the number of branches in TridentNet. dilations (tuple): the dilations of multiple branches in TridentNet. concat_output (bool): if concatenate outputs of multiple branches in TridentNet. Use 'True' for the last trident block. """ super().__init__(in_channels, out_channels, stride) assert num_branch == len(dilations) self.num_branch = num_branch self.concat_output = concat_output self.test_branch_idx = test_branch_idx if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = TridentConv( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, paddings=dilations, bias=False, groups=num_groups, dilations=dilations, num_branch=num_branch, test_branch_idx=test_branch_idx, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)