Ejemplo n.º 1
0
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        if expand_ratio == 1:
            self.conv = nn.Sequential(
                # dw
                Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                FrozenBatchNorm2d(hidden_dim),
                nn.ReLU6(),
                # pw-linear
                Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                FrozenBatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                FrozenBatchNorm2d(hidden_dim),
                nn.ReLU6(),
                # dw
                Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                FrozenBatchNorm2d(hidden_dim),
                nn.ReLU6(),
                # pw-linear
                Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                FrozenBatchNorm2d(oup),
            )
Ejemplo n.º 2
0
    def create_texture_extractor(x, num_channels, iterations=3):
        conv1 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        conv2 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        conv3 = Conv2d(
            num_channels,
            int(num_channels / 2),
            kernel_size=1,
            bias=False,
        )

        out = x
        for i in range(iterations):
            out = conv1(out)
            out = F.relu_(out)
            out = conv2(out)
            out = F.relu_(out)
        out = conv3(out)
        out = F.relu_(out)
        return out
Ejemplo n.º 3
0
    def __init__(self, C):
        super(SEModule, self).__init__()
        mid = max(C // self.reduction, 8)
        conv1 = Conv2d(C, mid, 1, 1, 0)
        conv2 = Conv2d(mid, C, 1, 1, 0)

        self.op = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), conv1, nn.ReLU(), conv2, nn.Sigmoid()
        )
Ejemplo n.º 4
0
 def __init__(self, C_in, C_out, stride):
     assert stride in [1, 2]
     ops = [
         Conv2d(C_in, C_in, 3, stride, 1, bias=False),
         BatchNorm2d(C_in),
         nn.ReLU(),
         Conv2d(C_in, C_out, 3, 1, 1, bias=False),
         BatchNorm2d(C_out),
     ]
     super(CascadeConv3x3, self).__init__(*ops)
     self.res_connect = (stride == 1) and (C_in == C_out)
Ejemplo n.º 5
0
def make_conv3x3(
    in_channels,
    out_channels,
    dilation=1,
    stride=1,
    use_gn=False,
    use_relu=False,
    kaiming_init=True
):
    conv = Conv2d(
        in_channels,
        out_channels,
        kernel_size=3,
        stride=stride,
        padding=dilation,
        dilation=dilation,
        bias=False if use_gn else True
    )
    if kaiming_init:
        init.kaiming_normal_(
            conv.weight, mode="fan_out", nonlinearity="relu"
        )
    else:
        init.gauss_(conv.weight, std=0.01)
    if not use_gn:
        init.constant_(conv.bias, 0)
    module = [conv,]
    if use_gn:
        module.append(group_norm(out_channels))
    if use_relu:
        module.append(nn.ReLU())
    if len(module) > 1:
        return nn.Sequential(*module)
    return conv
Ejemplo n.º 6
0
    def __init__(self, cfg, in_channels):
        super(KeypointRCNNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        input_features = in_channels
        layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
        next_feature = input_features
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "conv_fcn{}".format(layer_idx)
            module = Conv2d(next_feature,
                            layer_features,
                            3,
                            stride=1,
                            padding=1)
            init.kaiming_normal_(module.weight,
                                 mode="fan_out",
                                 nonlinearity="relu")
            init.constant_(module.bias, 0)
            setattr(self, layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
Ejemplo n.º 7
0
 def make_conv(
     in_channels, out_channels, kernel_size, stride=1, dilation=1
 ):
     conv = Conv2d(
         in_channels, 
         out_channels, 
         kernel_size=kernel_size, 
         stride=stride, 
         padding=dilation * (kernel_size - 1) // 2, 
         dilation=dilation, 
         bias=False if use_gn else True
     )
     # Caffe2 implementation uses XavierFill, which in fact
     # corresponds to kaiming_uniform_ in PyTorch
     nn.init.kaiming_uniform_(conv.weight, a=1)
     if not use_gn:
         nn.init.constant_(conv.bias, 0)
     module = [conv,]
     if use_gn:
         module.append(group_norm(out_channels))
     if use_relu:
         module.append(nn.ReLU(inplace=True))
     if len(module) > 1:
         return nn.Sequential(*module)
     return conv
Ejemplo n.º 8
0
        def create_convs(num_channels, iter=3):
            conv1 = Conv2d(
                num_channels,
                num_channels,
                kernel_size=1,
                bias=False,
                norm=get_norm(norm, num_channels),
            )

            conv2 = Conv2d(
                num_channels,
                num_channels,
                kernel_size=1,
                bias=False,
                norm=get_norm(norm, num_channels),
            )
            return (conv1, conv2, iter)
Ejemplo n.º 9
0
    def __init__(self, C_in, C_out, expansion, stride):
        assert stride in [1, 2]
        self.res_connect = (stride == 1) and (C_in == C_out)

        C_mid = _get_divisible_by(C_in * expansion, 8, 8)

        ops = [
            # pw
            Conv2d(C_in, C_mid, 1, 1, 0, bias=False),
            BatchNorm2d(C_mid),
            nn.ReLU(),
            # shift
            Shift(C_mid, 5, stride, 2),
            # pw-linear
            Conv2d(C_mid, C_out, 1, 1, 0, bias=False),
            BatchNorm2d(C_out),
        ]
        super(ShiftBlock5x5, self).__init__(*ops)
Ejemplo n.º 10
0
    def __init__(
        self,
        input_depth,
        output_depth,
        kernel,
        stride,
        pad,
        no_bias,
        use_relu,
        bn_type,
        group=1,
        *args,
        **kwargs
    ):
        super(ConvBNRelu, self).__init__()

        assert use_relu in ["relu", None]
        if isinstance(bn_type, (list, tuple)):
            assert len(bn_type) == 2
            assert bn_type[0] == "gn"
            gn_group = bn_type[1]
            bn_type = bn_type[0]
        assert bn_type in ["bn", "af", "gn", None]
        assert stride in [1, 2, 4]

        op = Conv2d(
            input_depth,
            output_depth,
            kernel_size=kernel,
            stride=stride,
            padding=pad,
            bias=not no_bias,
            groups=group,
            *args,
            **kwargs
        )
        nn.init.kaiming_normal_(op.weight, mode="fan_out", nonlinearity="relu")
        if op.bias is not None:
            nn.init.constant_(op.bias, 0.0)
        self.add_module("conv", op)

        if bn_type == "bn":
            bn_op = BatchNorm2d(output_depth)
        elif bn_type == "gn":
            bn_op = nn.GroupNorm(num_groups=gn_group, num_channels=output_depth)
        elif bn_type == "af":
            bn_op = FrozenBatchNorm2d(output_depth)
        if bn_type is not None:
            self.add_module("bn", bn_op)

        if use_relu == "relu":
            self.add_module("relu", nn.ReLU())
Ejemplo n.º 11
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNConv1x1Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        num_inputs = in_channels

        self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)

        for param in self.parameters():

            name = param.name()
            if "bias" in name:
                init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
Ejemplo n.º 12
0
    def __init__(self, cfg, norm_func):
        super(BaseStem, self).__init__()

        out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS

        self.conv1 = Conv2d(3,
                            out_channels,
                            kernel_size=7,
                            stride=2,
                            padding=3,
                            bias=False)
        self.bn1 = norm_func(out_channels)

        for l in [
                self.conv1,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
Ejemplo n.º 13
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Ejemplo n.º 14
0
    def make_conv(
        in_channels, out_channels, kernel_size, stride=1, dilation=1
    ):
        conv = Conv2d(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=dilation * (kernel_size - 1) // 2,
            dilation=dilation,
            bias=False if use_gn else True
        )

        init.kaiming_uniform_(conv.weight, a=1)
        if not use_gn:
            nn.init.constant_(conv.bias, 0)
        module = [conv,]
        if use_gn:
            module.append(group_norm(out_channels))
        if use_relu:
            module.append(nn.Relu())
        if len(module) > 1:
            return nn.Sequential(*module)
        return conv
Ejemplo n.º 15
0
    def __init__(self,
                 bottom_up,
                 in_features,
                 out_channels,
                 norm="",
                 top_block=None,
                 fuse_type="sum"):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        #print("\n\n CONFIRMING THAT NEW FPN IS PRINTED\n\n")
        super(FPN, self).__init__()
        assert isinstance(bottom_up, Backbone)
        assert in_features, in_features

        #print(in_features) #['res2', 'res3', 'res4', 'res5', 'res6']
        #print(out_channels) #256
        #print(top_block) -> LastLevelMaxPool()
        #print(fuse_type) -> sum

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        input_shapes = bottom_up.output_shape()
        #print(input_shapes)
        # {'res2': ShapeSpec(channels=256, height=None, width=None, stride=4), 'res3': ShapeSpec(channels=512, height=None, width=None, stride=8), 'res4': ShapeSpec(channels=512, height=None, width=None, stride=16), 'res5': ShapeSpec(channels=1024, height=None, width=None, stride=32), 'res6': ShapeSpec(channels=2048, height=None, width=None, stride=64)}
        strides = [input_shapes[f].stride for f in in_features]
        in_channels_per_feature = [
            input_shapes[f].channels for f in in_features
        ]
        #print(in_channels_per_feature) -> [256, 512, 512, 1024, 2048]

        _assert_strides_are_log2_contiguous(strides)
        lateral_convs = []
        output_convs = []

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels_per_feature):
            lateral_norm = get_norm(norm, out_channels)
            output_norm = get_norm(norm, out_channels)

            lateral_conv = Conv2d(in_channels,
                                  out_channels,
                                  kernel_size=1,
                                  bias=use_bias,
                                  norm=lateral_norm)
            output_conv = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm,
            )
            weight_init.c2_xavier_fill(lateral_conv)
            weight_init.c2_xavier_fill(output_conv)
            stage = int(math.log2(strides[idx]))
            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_output{}".format(stage), output_conv)
            lateral_convs.append(lateral_conv)
            output_convs.append(output_conv)
        #print(lateral_convs) #-> [Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)), Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))]
        #print(output_convs) #-> [Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]

        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.out_channels = out_channels
        self.norm = norm
        self.lateral_convs = lateral_convs[::-1]
        self.output_convs = output_convs[::-1]
        self.top_block = top_block
        self.in_features = in_features
        self.bottom_up = bottom_up
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {
            "p{}".format(int(math.log2(s))): s
            for s in strides
        }
        #print(self._out_feature_strides) -> {'p2': 4, 'p3': 8, 'p4': 16, 'p5': 32, 'p6': 64}

        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1)

        #print(self._out_feature_strides)# -> {'p2': 4, 'p3': 8, 'p4': 16, 'p5': 32, 'p6': 64, 'p7': 128}

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {
            k: out_channels
            for k in self._out_features
        }
        # self.ftt = FTT(self, ['p2', 'p3'], out_channels)
        #print(self._out_feature_channels) -> {'p2': 256, 'p3': 256, 'p4': 256, 'p5': 256, 'p6': 256, 'p7': 256}
        self._size_divisibility = strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type

        # tuple of (conv2d, conv2d, iter)
        def create_convs(num_channels, iter=3):
            conv1 = Conv2d(
                num_channels,
                num_channels,
                kernel_size=1,
                bias=False,
                norm=get_norm(norm, num_channels),
            )

            conv2 = Conv2d(
                num_channels,
                num_channels,
                kernel_size=1,
                bias=False,
                norm=get_norm(norm, num_channels),
            )
            return (conv1, conv2, iter)
Ejemplo n.º 16
0
def FTT_get_p3pr(p2, p3, out_channels, norm):
    channel_scaler = Conv2d(out_channels,
                            out_channels * 4,
                            kernel_size=1,
                            bias=False
                            #norm=''
                            )

    # tuple of (conv2d, conv2d, iter)
    def create_content_extractor(x, num_channels, iterations=3):
        conv1 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        conv2 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        out = x
        for i in range(iterations):
            out = conv1(out)
            out = F.relu_(out)
            out = conv2(out)
            out = F.relu_(out)

        return out

    def create_texture_extractor(x, num_channels, iterations=3):
        conv1 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        conv2 = Conv2d(
            num_channels,
            num_channels,
            kernel_size=1,
            bias=False,
            #norm=get_norm(norm, num_channels),
        )

        conv3 = Conv2d(
            num_channels,
            int(num_channels / 2),
            kernel_size=1,
            bias=False,
        )

        out = x
        for i in range(iterations):
            out = conv1(out)
            out = F.relu_(out)
            out = conv2(out)
            out = F.relu_(out)
        out = conv3(out)
        out = F.relu_(out)
        return out

    bottom = p3
    bottom = channel_scaler(bottom)
    bottom = create_content_extractor(bottom, out_channels * 4)
    sub_pixel_conv = nn.PixelShuffle(2)
    bottom = sub_pixel_conv(bottom)
    #print("\np3 shape: ",bottom.shape,"\n")

    # We interpreted "wrap" as concatenating bottom and top
    # so the total channels is doubled after (basically place one on top
    # of the other)
    top = p2
    top = torch.cat((bottom, top), axis=1)
    top = create_texture_extractor(top, out_channels * 2)
    #top = top[:,256:]

    result = bottom + top

    return result
Ejemplo n.º 17
0
    def __init__(
        self,
        in_channels,
        bottleneck_channels,
        out_channels,
        num_groups,
        stride_in_1x1,
        stride,
        dilation,
        norm_func,
    ):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=down_stride,
                       bias=False),
                norm_func(out_channels),
            )
            for modules in [
                    self.downsample,
            ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above
        self.conv2 = Conv2d(bottleneck_channels,
                            bottleneck_channels,
                            kernel_size=3,
                            stride=stride_3x3,
                            padding=dilation,
                            bias=False,
                            groups=num_groups,
                            dilation=dilation)
        nn.init.kaiming_uniform_(self.conv2.weight, a=1)

        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(bottleneck_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        self.bn3 = norm_func(out_channels)

        for l in [
                self.conv1,
                self.conv3,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
def conv_bn(inp, oup, stride):
    return nn.Sequential(Conv2d(inp, oup, 3, stride, 1, bias=False),
                         FrozenBatchNorm2d(oup), nn.ReLU6())
def conv_1x1_bn(inp, oup):
    return nn.Sequential(Conv2d(inp, oup, 1, 1, 0, bias=False),
                         FrozenBatchNorm2d(oup), nn.ReLU6())
Ejemplo n.º 20
0
    def __init__(self, in_channels, bottleneck_channels, out_channels,
                 num_groups, stride_in_1x1, stride, dilation, norm_func,
                 dcn_config):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=down_stride,
                       bias=False),
                norm_func(out_channels),
            )
            for modules in [
                    self.downsample,
            ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above
        '''
        with_dcn = dcn_config.get("stage_with_dcn", False)
        if with_dcn:
            deformable_groups = dcn_config.get("deformable_groups", 1)
            with_modulated_dcn = dcn_config.get("with_modulated_dcn", False)
            self.conv2 = DFConv2d(
                bottleneck_channels,
                bottleneck_channels,
                with_modulated_dcn=with_modulated_dcn,
                kernel_size=3,
                stride=stride_3x3,
                groups=num_groups,
                dilation=dilation,
                deformable_groups=deformable_groups,
                bias=False
            )
            
        else:
        '''
        self.conv2 = Conv2d(bottleneck_channels,
                            bottleneck_channels,
                            kernel_size=3,
                            stride=stride_3x3,
                            padding=dilation,
                            bias=False,
                            groups=num_groups,
                            dilation=dilation)
        init.kaiming_uniform_(self.conv2.weight, a=1)

        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(bottleneck_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        self.bn3 = norm_func(out_channels)

        for l in [
                self.conv1,
                self.conv3,
        ]:
            init.kaiming_uniform_(l.weight, a=1)