Esempio n. 1
0
    def _make_predict_layer(self, input_channels: int,
                            inner_channels_list: List[int],
                            output_channels: int) -> nn.ModuleList:
        """
        Yolo 的最终预测层,共有七层卷积网络,前五层用于提取特征,后两层用于获得 yolo 网络的预测结果
        :param input_channels: 输入通道数
        :param inner_channels_list: 中间通道数,[down_dimension_channels(特征整合通道数,即降维), feature_extract_channels(特征提取通道数)]
        :param output_channels: 输出通道数
        :return: 最终预测层的七层卷积网络
        """
        m = nn.ModuleList([
            # 将输入降维
            base_model.Conv2d(input_channels, inner_channels_list[0], 1),
            base_model.Conv2d(inner_channels_list[0], inner_channels_list[1],
                              3),  # 特征提取
            base_model.Conv2d(inner_channels_list[1], inner_channels_list[0],
                              1),  # 特征整合
            base_model.Conv2d(inner_channels_list[0], inner_channels_list[1],
                              3),  # 特征提取
            base_model.Conv2d(inner_channels_list[1], inner_channels_list[0],
                              1),  # 特征整合
            base_model.Conv2d(inner_channels_list[0], inner_channels_list[1],
                              3),  # 特征提取

            # 降维到输出维度
            nn.Conv2d(inner_channels_list[1],
                      output_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=True)
        ])
        return m
Esempio n. 2
0
    def __init__(self, config: dict) -> None:
        super().__init__()

        self.config = config
        self.backbone = darknet.darknet53(False)

        # 最终预测层的通道数
        self.predict_output_channels_13 = len(
            config["anchors"][0]) * (5 + config["classes"])
        self.predict_output_channels_26 = len(
            config["anchors"][1]) * (5 + config["classes"])
        self.predict_output_channels_52 = len(
            config["anchors"][2]) * (5 + config["classes"])

        # channels: 1024 -> 512 -> 255
        self.last_layer_13 = self._make_predict_layer(
            self.backbone.layers_output_channels[-1], [512, 1024],
            self.predict_output_channels_13)

        # 上一层的中间预测结果进行特征整合,上采样:512*13*13 -> 256*26*26
        self.last_layer_26_conv = base_model.Conv2d(512, 256, 1)
        self.last_layer_26_upsample = nn.Upsample(scale_factor=2,
                                                  mode='nearest')

        # channels: 512/2 + 512 -> 256 -> 255
        self.last_layer_26 = self._make_predict_layer(
            self.backbone.layers_output_channels[-2] + 256, [256, 512],
            self.predict_output_channels_26)

        # 上一层的中间预测结果进行特征整合,上采样:256*26*26 -> 128*52*52
        self.last_layer_52_conv = base_model.Conv2d(256, 128, 1)
        self.last_layer_52_upsample = nn.Upsample(scale_factor=2,
                                                  mode='nearest')

        # channels: 256/2 + 256 -> 128 -> 255
        self.last_layer_52 = self._make_predict_layer(
            self.backbone.layers_output_channels[-3] + 128, [128, 256],
            self.predict_output_channels_52)
Esempio n. 3
0
 def _make_layer(self, input_channels: int, block_channels: int,
                 block_inner_channels: int, block_count: int) -> nn.Module:
     """
     :param input_channels: 初始输入通道数
     :param block_channels: 残差结构的输入和输出通道数,因为残差结构需要重复堆叠,所以残差结构的输入通道数应该等于残差结构的输出通道数
     :param block_inner_channels: 残差结构的中间通道数
     :param block_count: 残差结构的数目
     :return:
     """
     layers = []
     # 1. 下采样
     layers.append(("down_sample_conv",
                    base_model.Conv2d(input_channels, block_channels, 3,
                                      2)))
     # 2. 堆叠残差结构
     for i in range(0, block_count):
         layers.append(
             ("residual_{}".format(i),
              base_model.BasicBlock(block_channels, block_inner_channels,
                                    block_channels)))
     # 3. 堆叠所有层
     return nn.Sequential(OrderedDict(layers))
Esempio n. 4
0
    def __init__(self, block_counts: list) -> None:
        """
        :param block_counts: 每个残差层的重复数目
        """
        super().__init__()

        self.input_channels = 3  # 输入图片通道数
        self.init_channels = 32  # 初始卷积通道数

        # 初始卷积
        self.init_conv = base_model.Conv2d(self.input_channels,
                                           self.init_channels,
                                           3)  # 416,416,3 -> 416,416,32

        # 五层下采样加残差卷积
        self.layer1 = self._make_layer(
            32, 64, 32, block_counts[0])  # 416,416,32 -> 208,208,64
        self.layer2 = self._make_layer(
            64, 128, 64, block_counts[1])  # 208,208,64 -> 104,104,128
        self.layer3 = self._make_layer(
            128, 256, 128, block_counts[2])  # 104,104,128 -> 52,52,256
        self.layer4 = self._make_layer(
            256, 512, 256, block_counts[3])  # 52,52,256 -> 26,26,512
        self.layer5 = self._make_layer(
            512, 1024, 512, block_counts[4])  # 26,26,512 -> 13,13,1024

        # DarkNet53 的五个特征层的通道数
        self.layers_output_channels = [64, 128, 256, 512, 1024]

        # 进行权值初始化
        for m in self.modules():  # 遍历所有模型(所有层级的,但是只需要对最基础层级的模型进行权值初始化)
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[
                    1] * m.out_channels  # 卷积核的参数数目
                m.weight.data.normal_(0,
                                      math.sqrt(2. / n))  # 简化的 kaiming 高斯初始化
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)  # 初始化权重为 1
                m.bias.data.zero_()  # 初始化偏置为 0