def __init__(self, inp, oup, stride, expand_ratio): super(InvertedResidual, self).__init__() self.stride = stride assert stride in [1, 2] hidden_dim = int(round(inp * expand_ratio)) self.use_res_connect = self.stride == 1 and inp == oup if expand_ratio == 1: self.conv = nn.Sequential( # dw Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), FrozenBatchNorm2d(hidden_dim), nn.ReLU6(), # pw-linear Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), FrozenBatchNorm2d(oup), ) else: self.conv = nn.Sequential( # pw Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), FrozenBatchNorm2d(hidden_dim), nn.ReLU6(), # dw Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), FrozenBatchNorm2d(hidden_dim), nn.ReLU6(), # pw-linear Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), FrozenBatchNorm2d(oup), )
def create_texture_extractor(x, num_channels, iterations=3): conv1 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) conv2 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) conv3 = Conv2d( num_channels, int(num_channels / 2), kernel_size=1, bias=False, ) out = x for i in range(iterations): out = conv1(out) out = F.relu_(out) out = conv2(out) out = F.relu_(out) out = conv3(out) out = F.relu_(out) return out
def __init__(self, C): super(SEModule, self).__init__() mid = max(C // self.reduction, 8) conv1 = Conv2d(C, mid, 1, 1, 0) conv2 = Conv2d(mid, C, 1, 1, 0) self.op = nn.Sequential( nn.AdaptiveAvgPool2d(1), conv1, nn.ReLU(), conv2, nn.Sigmoid() )
def __init__(self, C_in, C_out, stride): assert stride in [1, 2] ops = [ Conv2d(C_in, C_in, 3, stride, 1, bias=False), BatchNorm2d(C_in), nn.ReLU(), Conv2d(C_in, C_out, 3, 1, 1, bias=False), BatchNorm2d(C_out), ] super(CascadeConv3x3, self).__init__(*ops) self.res_connect = (stride == 1) and (C_in == C_out)
def make_conv3x3( in_channels, out_channels, dilation=1, stride=1, use_gn=False, use_relu=False, kaiming_init=True ): conv = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False if use_gn else True ) if kaiming_init: init.kaiming_normal_( conv.weight, mode="fan_out", nonlinearity="relu" ) else: init.gauss_(conv.weight, std=0.01) if not use_gn: init.constant_(conv.bias, 0) module = [conv,] if use_gn: module.append(group_norm(out_channels)) if use_relu: module.append(nn.ReLU()) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, cfg, in_channels): super(KeypointRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_features = in_channels layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "conv_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") init.constant_(module.bias, 0) setattr(self, layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def make_conv( in_channels, out_channels, kernel_size, stride=1, dilation=1 ): conv = Conv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation * (kernel_size - 1) // 2, dilation=dilation, bias=False if use_gn else True ) # Caffe2 implementation uses XavierFill, which in fact # corresponds to kaiming_uniform_ in PyTorch nn.init.kaiming_uniform_(conv.weight, a=1) if not use_gn: nn.init.constant_(conv.bias, 0) module = [conv,] if use_gn: module.append(group_norm(out_channels)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def create_convs(num_channels, iter=3): conv1 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, norm=get_norm(norm, num_channels), ) conv2 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, norm=get_norm(norm, num_channels), ) return (conv1, conv2, iter)
def __init__(self, C_in, C_out, expansion, stride): assert stride in [1, 2] self.res_connect = (stride == 1) and (C_in == C_out) C_mid = _get_divisible_by(C_in * expansion, 8, 8) ops = [ # pw Conv2d(C_in, C_mid, 1, 1, 0, bias=False), BatchNorm2d(C_mid), nn.ReLU(), # shift Shift(C_mid, 5, stride, 2), # pw-linear Conv2d(C_mid, C_out, 1, 1, 0, bias=False), BatchNorm2d(C_out), ] super(ShiftBlock5x5, self).__init__(*ops)
def __init__( self, input_depth, output_depth, kernel, stride, pad, no_bias, use_relu, bn_type, group=1, *args, **kwargs ): super(ConvBNRelu, self).__init__() assert use_relu in ["relu", None] if isinstance(bn_type, (list, tuple)): assert len(bn_type) == 2 assert bn_type[0] == "gn" gn_group = bn_type[1] bn_type = bn_type[0] assert bn_type in ["bn", "af", "gn", None] assert stride in [1, 2, 4] op = Conv2d( input_depth, output_depth, kernel_size=kernel, stride=stride, padding=pad, bias=not no_bias, groups=group, *args, **kwargs ) nn.init.kaiming_normal_(op.weight, mode="fan_out", nonlinearity="relu") if op.bias is not None: nn.init.constant_(op.bias, 0.0) self.add_module("conv", op) if bn_type == "bn": bn_op = BatchNorm2d(output_depth) elif bn_type == "gn": bn_op = nn.GroupNorm(num_groups=gn_group, num_channels=output_depth) elif bn_type == "af": bn_op = FrozenBatchNorm2d(output_depth) if bn_type is not None: self.add_module("bn", bn_op) if use_relu == "relu": self.add_module("relu", nn.ReLU())
def __init__(self, cfg, in_channels): super(MaskRCNNConv1x1Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES num_inputs = in_channels self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) for param in self.parameters(): name = param.name() if "bias" in name: init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, norm_func): super(BaseStem, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS self.conv1 = Conv2d(3, out_channels, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_func(out_channels) for l in [ self.conv1, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def make_conv( in_channels, out_channels, kernel_size, stride=1, dilation=1 ): conv = Conv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation * (kernel_size - 1) // 2, dilation=dilation, bias=False if use_gn else True ) init.kaiming_uniform_(conv.weight, a=1) if not use_gn: nn.init.constant_(conv.bias, 0) module = [conv,] if use_gn: module.append(group_norm(out_channels)) if use_relu: module.append(nn.Relu()) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"): """ Args: bottom_up (Backbone): module representing the bottom up subnetwork. Must be a subclass of :class:`Backbone`. The multi-scale feature maps generated by the bottom up network, and listed in `in_features`, are used to generate FPN levels. in_features (list[str]): names of the input feature maps coming from the backbone to which FPN is attached. For example, if the backbone produces ["res2", "res3", "res4"], any *contiguous* sublist of these may be used; order must be from high to low resolution. out_channels (int): number of channels in the output feature maps. norm (str): the normalization to use. top_block (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list. The top_block further downsamples the feature map. It must have an attribute "num_levels", meaning the number of extra FPN levels added by this block, and "in_feature", which is a string representing its input feature (e.g., p5). fuse_type (str): types for fusing the top down features and the lateral ones. It can be "sum" (default), which sums up element-wise; or "avg", which takes the element-wise mean of the two. """ #print("\n\n CONFIRMING THAT NEW FPN IS PRINTED\n\n") super(FPN, self).__init__() assert isinstance(bottom_up, Backbone) assert in_features, in_features #print(in_features) #['res2', 'res3', 'res4', 'res5', 'res6'] #print(out_channels) #256 #print(top_block) -> LastLevelMaxPool() #print(fuse_type) -> sum # Feature map strides and channels from the bottom up network (e.g. ResNet) input_shapes = bottom_up.output_shape() #print(input_shapes) # {'res2': ShapeSpec(channels=256, height=None, width=None, stride=4), 'res3': ShapeSpec(channels=512, height=None, width=None, stride=8), 'res4': ShapeSpec(channels=512, height=None, width=None, stride=16), 'res5': ShapeSpec(channels=1024, height=None, width=None, stride=32), 'res6': ShapeSpec(channels=2048, height=None, width=None, stride=64)} strides = [input_shapes[f].stride for f in in_features] in_channels_per_feature = [ input_shapes[f].channels for f in in_features ] #print(in_channels_per_feature) -> [256, 512, 512, 1024, 2048] _assert_strides_are_log2_contiguous(strides) lateral_convs = [] output_convs = [] use_bias = norm == "" for idx, in_channels in enumerate(in_channels_per_feature): lateral_norm = get_norm(norm, out_channels) output_norm = get_norm(norm, out_channels) lateral_conv = Conv2d(in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(strides[idx])) self.add_module("fpn_lateral{}".format(stage), lateral_conv) self.add_module("fpn_output{}".format(stage), output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) #print(lateral_convs) #-> [Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))] #print(output_convs) #-> [Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))] # Place convs into top-down order (from low to high resolution) # to make the top-down computation in forward clearer. self.out_channels = out_channels self.norm = norm self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"] self._out_feature_strides = { "p{}".format(int(math.log2(s))): s for s in strides } #print(self._out_feature_strides) -> {'p2': 4, 'p3': 8, 'p4': 16, 'p5': 32, 'p6': 64} # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1) #print(self._out_feature_strides)# -> {'p2': 4, 'p3': 8, 'p4': 16, 'p5': 32, 'p6': 64, 'p7': 128} self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features } # self.ftt = FTT(self, ['p2', 'p3'], out_channels) #print(self._out_feature_channels) -> {'p2': 256, 'p3': 256, 'p4': 256, 'p5': 256, 'p6': 256, 'p7': 256} self._size_divisibility = strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type # tuple of (conv2d, conv2d, iter) def create_convs(num_channels, iter=3): conv1 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, norm=get_norm(norm, num_channels), ) conv2 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, norm=get_norm(norm, num_channels), ) return (conv1, conv2, iter)
def FTT_get_p3pr(p2, p3, out_channels, norm): channel_scaler = Conv2d(out_channels, out_channels * 4, kernel_size=1, bias=False #norm='' ) # tuple of (conv2d, conv2d, iter) def create_content_extractor(x, num_channels, iterations=3): conv1 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) conv2 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) out = x for i in range(iterations): out = conv1(out) out = F.relu_(out) out = conv2(out) out = F.relu_(out) return out def create_texture_extractor(x, num_channels, iterations=3): conv1 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) conv2 = Conv2d( num_channels, num_channels, kernel_size=1, bias=False, #norm=get_norm(norm, num_channels), ) conv3 = Conv2d( num_channels, int(num_channels / 2), kernel_size=1, bias=False, ) out = x for i in range(iterations): out = conv1(out) out = F.relu_(out) out = conv2(out) out = F.relu_(out) out = conv3(out) out = F.relu_(out) return out bottom = p3 bottom = channel_scaler(bottom) bottom = create_content_extractor(bottom, out_channels * 4) sub_pixel_conv = nn.PixelShuffle(2) bottom = sub_pixel_conv(bottom) #print("\np3 shape: ",bottom.shape,"\n") # We interpreted "wrap" as concatenating bottom and top # so the total channels is doubled after (basically place one on top # of the other) top = p2 top = torch.cat((bottom, top), axis=1) top = create_texture_extractor(top, out_channels * 2) #top = top[:,256:] result = bottom + top return result
def __init__( self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, ): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False), norm_func(out_channels), ) for modules in [ self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above self.conv2 = Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) nn.init.kaiming_uniform_(self.conv2.weight, a=1) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = norm_func(out_channels) for l in [ self.conv1, self.conv3, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def conv_bn(inp, oup, stride): return nn.Sequential(Conv2d(inp, oup, 3, stride, 1, bias=False), FrozenBatchNorm2d(oup), nn.ReLU6())
def conv_1x1_bn(inp, oup): return nn.Sequential(Conv2d(inp, oup, 1, 1, 0, bias=False), FrozenBatchNorm2d(oup), nn.ReLU6())
def __init__(self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, dcn_config): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False), norm_func(out_channels), ) for modules in [ self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above ''' with_dcn = dcn_config.get("stage_with_dcn", False) if with_dcn: deformable_groups = dcn_config.get("deformable_groups", 1) with_modulated_dcn = dcn_config.get("with_modulated_dcn", False) self.conv2 = DFConv2d( bottleneck_channels, bottleneck_channels, with_modulated_dcn=with_modulated_dcn, kernel_size=3, stride=stride_3x3, groups=num_groups, dilation=dilation, deformable_groups=deformable_groups, bias=False ) else: ''' self.conv2 = Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) init.kaiming_uniform_(self.conv2.weight, a=1) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = norm_func(out_channels) for l in [ self.conv1, self.conv3, ]: init.kaiming_uniform_(l.weight, a=1)