コード例 #1
0
ファイル: efficientnet.py プロジェクト: tkhe/tkdetection
    def __init__(self,
                 stem_channels,
                 depths,
                 widths,
                 exp_ratios,
                 se_ratio,
                 strides,
                 kernels,
                 last_channels,
                 norm="BN",
                 activation="Swish",
                 num_classes=1000,
                 out_features=None):
        super().__init__()

        stage_params = list(zip(depths, widths, exp_ratios, strides, kernels))

        self.stem = Conv2d(3,
                           stem_channels,
                           3,
                           2,
                           1,
                           bias=False,
                           norm=norm,
                           activation="Swish")
        self._out_feature_channels = {"stem": stem_channels}
        stride = 2
        self._out_feature_strides = {"stem": stride}
        prev_channels = stem_channels
        self.stages = ["stem"]
        for i, (depth, width, expand_ratio, s, k) in enumerate(stage_params):
            name = f"stage{i + 1}"
            stage = EfficientStage(prev_channels, expand_ratio, k, s, se_ratio,
                                   width, depth, norm, activation)
            self.add_module(name, stage)
            self.stages.append(name)
            prev_channels = width
            stride *= s
            self._out_feature_strides[name] = stride
            self._out_feature_channels[name] = width

        if not out_features:
            out_features = ["linear"]
        if "linear" in out_features and num_classes is not None:
            self.last_conv = Conv2d(prev_channels,
                                    last_channels,
                                    1,
                                    bias=False,
                                    norm="BN",
                                    activation=activation)
            self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
            self.fc = nn.Linear(last_channels, num_classes)
        self._out_features = out_features
コード例 #2
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        super(CoarseMaskHead, self).__init__()

        self.num_classes = cfg.MODEL.NUM_CLASSES
        conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM
        num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC
        self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION
        self.input_channels = input_shape.channels
        self.input_h = input_shape.height
        self.input_w = input_shape.width

        self.conv_layers = []
        if self.input_channels > conv_dim:
            self.reduce_channel_dim_conv = Conv2d(self.input_channels,
                                                  conv_dim,
                                                  kernel_size=1,
                                                  activation="ReLU")
            self.conv_layers.append(self.reduce_channel_dim_conv)

        self.reduce_spatial_dim_conv = Conv2d(conv_dim,
                                              conv_dim,
                                              kernel_size=2,
                                              stride=2,
                                              padding=0,
                                              bias=True,
                                              activation="ReLU")
        self.conv_layers.append(self.reduce_spatial_dim_conv)

        input_dim = conv_dim * self.input_h * self.input_w
        input_dim //= 4

        self.fcs = []
        for k in range(num_fc):
            fc = nn.Linear(input_dim, self.fc_dim)
            self.add_module("coarse_mask_fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            input_dim = self.fc_dim

        output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution

        self.prediction = nn.Linear(self.fc_dim, output_dim)
        nn.init.normal_(self.prediction.weight, std=0.001)
        nn.init.constant_(self.prediction.bias, 0)

        for layer in self.conv_layers:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
コード例 #3
0
    def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs):
        """
        NOTE: this interface is experimental.
        """
        super().__init__(num_keypoints=num_keypoints, **kwargs)

        up_scale = 2
        in_channels = input_shape.channels

        self.blocks = []
        for idx, layer_channels in enumerate(conv_dims, 1):
            module = Conv2d(in_channels,
                            layer_channels,
                            3,
                            stride=1,
                            padding=1)
            self.add_module("conv_fcn{}".format(idx), module)
            self.blocks.append(module)
            in_channels = layer_channels

        deconv_kernel = 4
        self.score_lowres = nn.ConvTranspose2d(in_channels,
                                               num_keypoints,
                                               deconv_kernel,
                                               stride=2,
                                               padding=deconv_kernel // 2 - 1)
        self.up_scale = up_scale

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
コード例 #4
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], norm=""):
        super().__init__()

        input_size = cfg.SSD.SIZE
        extra_cfg = self.extra_setting[input_size]
        in_channels = 1024

        layers = []
        previous_features = [f for f in input_shape]
        self._out_features = [f for f in input_shape]
        previous_channels = [input_shape[f].channels for f in input_shape]
        extra_channels = []
        for i, config in enumerate(extra_cfg):
            name = f"extra_{i + 1}"
            module = []
            for c, k, s, p in config:
                module.append(
                    Conv2d(in_channels,
                           c,
                           k,
                           s,
                           p,
                           norm=norm,
                           activation="ReLU"))
                in_channels = c
            self.add_module(name, nn.Sequential(*module))
            self._out_features.append(name)
            extra_channels.append(in_channels)
        self._out_feature_strides = dict(
            zip(self._out_features, cfg.SSD.STRIDES))
        self._out_feature_channels = dict(
            zip(self._out_features, previous_channels + extra_channels))
        self.l2_norm = L2Norm(previous_channels[0])
コード例 #5
0
ファイル: mask_head.py プロジェクト: tkhe/tkdetection
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 num_classes,
                 conv_dims,
                 conv_norm="",
                 **kwargs):
        """
        NOTE: this interface is experimental.
        """
        super().__init__(**kwargs)

        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"

        self.conv_norm_relus = []

        cur_channels = input_shape.channels
        for k, conv_dim in enumerate(conv_dims[:-1]):
            conv = Conv2d(
                cur_channels,
                conv_dim,
                3,
                stride=1,
                padding=1,
                bias=not conv_norm,
                norm=conv_norm,
                activation="relu",
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            cur_channels = conv_dim

        self.deconv = nn.ConvTranspose2d(cur_channels,
                                         conv_dims[-1],
                                         kernel_size=2,
                                         stride=2,
                                         padding=0)
        cur_channels = conv_dims[-1]

        self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1)

        weight_init.c2_msra_fill(self.deconv)

        nn.init.normal_(self.predictor.conv.weight, std=0.001)
        if self.predictor.conv.bias is not None:
            nn.init.constant_(self.predictor.conv.bias, 0)
コード例 #6
0
ファイル: efficientnet.py プロジェクト: tkhe/tkdetection
    def __init__(self,
                 in_channels,
                 expand_ratio,
                 kernel_size,
                 stride,
                 se_ratio,
                 out_channels,
                 norm="BN",
                 activation="Swish"):
        super().__init__()

        expand_channels = int(in_channels * expand_ratio)
        self.expand = expand_channels != in_channels
        if self.expand:
            self.expand_conv = Conv2d(in_channels,
                                      expand_channels,
                                      1,
                                      bias=False,
                                      norm=norm,
                                      activation=activation)
        padding = (kernel_size - 1) // 2
        self.dw = Conv2d(expand_channels,
                         expand_channels,
                         kernel_size,
                         stride,
                         padding,
                         groups=expand_channels,
                         bias=False,
                         norm=norm,
                         activation=activation)
        self.use_se = se_ratio > 0
        if self.use_se:
            self.se = SEModule(expand_channels,
                               se_channels=int(in_channels * se_ratio))
        self.pw = Conv2d(expand_channels,
                         out_channels,
                         1,
                         bias=False,
                         norm=norm)
        self.use_res = (stride == 1) and (in_channels == out_channels)
コード例 #7
0
ファイル: ssd.py プロジェクト: tkhe/tkdetection
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()

        in_channels = [x.channels for x in input_shape]
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        num_classes = cfg.MODEL.NUM_CLASSES
        norm = cfg.SSD.HEAD.NORM

        cls_score = []
        bbox_pred = []
        for i, c in enumerate(in_channels):
            if i == len(in_channels) - 1:
                cls_score.append(
                    nn.Conv2d(c, num_anchors[i] * (num_classes + 1), 1))
                bbox_pred.append(nn.Conv2d(c, num_anchors[i] * 4, 1))
            else:
                cls_score.append(
                    nn.Sequential(
                        Conv2d(c,
                               c,
                               3,
                               1,
                               groups=c,
                               norm=norm,
                               activation="ReLU6"),
                        Conv2d(c, num_anchors[i] * (num_classes + 1), 1)))
                bbox_pred.append(
                    nn.Sequential(
                        Conv2d(c,
                               c,
                               3,
                               1,
                               groups=c,
                               norm=norm,
                               activation="ReLU6"),
                        Conv2d(c, num_anchors[i] * 4, 1)))

        self.cls_score = nn.ModuleList(cls_score)
        self.bbox_pred = nn.ModuleList(bbox_pred)
コード例 #8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 norm="BN",
                 activation="LeakyReLU",
                 **kwargs):
        super().__init__()

        self.conv1 = Conv2d(in_channels,
                            out_channels // 2,
                            kernel_size=1,
                            bias=False,
                            norm=norm,
                            activation=activation,
                            **kwargs)
        self.conv2 = Conv2d(out_channels // 2,
                            out_channels,
                            kernel_size=3,
                            bias=False,
                            norm=norm,
                            activation=activation,
                            **kwargs)
コード例 #9
0
ファイル: ghostnet.py プロジェクト: tkhe/tkdetection
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size=1,
        ratio=2,
        dw_size=3,
        stride=1,
        norm="BN",
        relu=True
    ):
        super().__init__()

        self.out_channels = out_channels
        init_channels = math.ceil(out_channels / ratio)
        new_channels = init_channels * (ratio - 1)

        self.primary_conv = Conv2d(
            in_channels,
            init_channels,
            kernel_size,
            stride,
            (kernel_size - 1) // 2,
            bias=False,
            norm=norm,
            activation="ReLU" if relu else ""
        )
        self.cheap_operation = Conv2d(
            init_channels,
            new_channels,
            dw_size,
            1,
            (dw_size - 1) // 2,
            groups=init_channels,
            bias=False,
            norm=norm,
            activation="ReLU" if relu else ""
        )
コード例 #10
0
    def __init__(self, vgg_cfg, norm="", out_features=None):
        super().__init__()

        self._out_feature_channels = {}
        self._out_feature_strides = {}

        layers = []
        in_channels = 3
        idx = 0
        stride = 1
        for v in vgg_cfg:
            if v == "M":
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True))
                stride *= 2
            else:
                layers.append(Conv2d(in_channels, v, 3, 1, 1, norm=norm, activation="ReLU"))
                in_channels = v
            self._out_feature_channels[str(idx)] = v
            self._out_feature_strides[str(idx)] = stride
            idx += 1

        layers.append(nn.MaxPool2d(kernel_size=3, stride=1, padding=1))
        self._out_feature_channels[str(idx)] = in_channels
        self._out_feature_strides[str(idx)] = stride
        idx += 1
        layers.append(
            Conv2d(in_channels, 1024, 3, padding=6, dilation=6, norm=norm, activation="ReLU")
        )
        self._out_feature_channels[str(idx)] = 1024
        self._out_feature_strides[str(idx)] = stride
        idx += 1
        layers.append(Conv2d(1024, 1024, 1, norm=norm, activation="ReLU"))
        self._out_feature_channels[str(idx)] = 1024
        self._out_feature_strides[str(idx)] = stride

        self.features = nn.Sequential(*layers)

        self._out_features = out_features
コード例 #11
0
def _make_stage(layers,
                in_channels,
                out_channels,
                norm="BN",
                activation="LeakyReLU"):
    module = [
        Conv2d(in_channels,
               out_channels,
               3,
               2,
               bias=False,
               norm=norm,
               activation=activation)
    ]
    for _ in range(layers):
        module.append(BasicBlock(out_channels, out_channels, norm, activation))
    return nn.Sequential(*module)
コード例 #12
0
    def __init__(self,
                 layers,
                 channels,
                 stem_channels=32,
                 norm="BN",
                 activation="LeakyReLU",
                 out_features=None,
                 num_classes=1000):
        super().__init__()

        assert len(layers) == len(channels), \
            f"len(layers) should equal to len(channels), given {len(layers)} vs {len(channels)}"

        self.stem = Conv2d(3,
                           stem_channels,
                           3,
                           1,
                           bias=False,
                           norm=norm,
                           activation=activation)
        self.stage1 = _make_stage(layers[0], stem_channels, channels[0], norm,
                                  activation)
        self.stage2 = _make_stage(layers[1], channels[0], channels[1], norm,
                                  activation)
        self.stage3 = _make_stage(layers[2], channels[1], channels[2], norm,
                                  activation)
        self.stage4 = _make_stage(layers[3], channels[2], channels[3], norm,
                                  activation)
        self.stage5 = _make_stage(layers[4], channels[3], channels[4], norm,
                                  activation)
        self._out_feature_channels = {
            f"stage{i}": c
            for i, c in zip(range(1, 6), channels)
        }
        self._out_feature_strides = {f"stage{i}": 2**i for i in range(1, 6)}

        if not out_features:
            out_features = ["linear"]
        if "linear" in out_features and num_classes is not None:
            self.fc = nn.Linear(channels[4], num_classes)
        self._out_features = out_features
コード例 #13
0
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 conv_dims: List[int],
                 fc_dims: List[int],
                 conv_norm=""):
        """
        NOTE: this interface is experimental.
        """
        super().__init__()

        assert len(conv_dims) + len(fc_dims) > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k, conv_dim in enumerate(conv_dims):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                3,
                bias=(conv_norm == ""),
                norm=conv_norm,
                activation="ReLU",
            )
            self.add_module(f"conv{k + 1}", conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k, fc_dim in enumerate(fc_dims):
            fc = nn.Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
コード例 #14
0
ファイル: ghostnet.py プロジェクト: tkhe/tkdetection
    def __init__(
        self,
        ghostnet_cfg=None,
        multiplier=1.0,
        dropout=0.2,
        norm="BN",
        num_classes=1000,
        out_features=None
    ):
        super().__init__()

        if ghostnet_cfg is None:
            ghostnet_cfg = [
                [3,  16,  16, 0, 1],
                [3,  48,  24, 0, 2],
                [3,  72,  24, 0, 1],
                [5,  72,  40, 0.25, 2],
                [5, 120,  40, 0.25, 1],
                [3, 240,  80, 0, 2],
                [3, 200,  80, 0, 1],
                [3, 184,  80, 0, 1],
                [3, 184,  80, 0, 1],
                [3, 480, 112, 0.25, 1],
                [3, 672, 112, 0.25, 1],
                [5, 672, 160, 0.25, 2],
                [5, 960, 160, 0, 1],
                [5, 960, 160, 0.25, 1],
                [5, 960, 160, 0, 1],
                [5, 960, 160, 0.25, 1]
            ]

        output_channel = make_divisible(16 * multiplier, 4)
        layers = []
        layers.append(Conv2d(3, output_channel, 3, 2, 1, bias=False, norm=norm, activation="ReLU"))
        self._out_feature_channels = {"0": output_channel}
        stride = 2
        self._out_feature_strides = {"0": stride}

        input_channel = output_channel
        block = GhostBottleneck
        index = 1
        for k, exp_size, c, se_ratio, s in ghostnet_cfg:
            output_channel = make_divisible(c * multiplier, 4)
            hidden_channel = make_divisible(exp_size * multiplier, 4)
            layers.append(
                block(input_channel, hidden_channel, output_channel, k, s, se_ratio)
            )
            input_channel = output_channel
            stride *= s
            self._out_feature_channels[str(index)] = output_channel
            self._out_feature_strides[str(index)] = stride
            index += 1

        output_channel = make_divisible(exp_size * multiplier, 4)
        layers.append(Conv2d(input_channel, output_channel, 1, norm=norm, activation="ReLU"))
        self._out_feature_channels[str(index)] = output_channel
        self._out_feature_strides[str(index)] = stride

        self.features = nn.Sequential(*layers)

        if not out_features:
            out_features = ["linear"]
        if "linear" in out_features and num_classes is not None:
            self.conv_head = Conv2d(input_channel, 1280, 1, activation="ReLU")
            self.classifier = nn.Sequential(
                nn.Dropout(0.2),
                nn.Linear(last_channel, num_classes)
            )
        self._out_features = out_features