Beispiel #1
0
 def sub_inception_module(self, BN, bn_eps=1e-5):
     return omth_blocks.InceptionBlock(384,
                                       filters=[384, 384],
                                       kernel_sizes=[[[1, 3]], [[3, 1]]],
                                       stride=[1, 1],
                                       padding=[[[0, 1]], [[1, 0]]],
                                       batch_norm=BN)
Beispiel #2
0
    def __init__(self, backbone_require_grad=False):
        super().__init__()
        # Use pre-trained model as backbone
        """
        backbone = vgg16_bn(pretrained=True)
        for param in backbone.parameters():
            param.requires_grad = backbone_require_grad
        net = list(backbone.children())[0][:24]
        maxout = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
        """
        #net = [ceil_maxout if type(n) is nn.MaxPool2d else n for n in net]
        # 3 x 48 x 960
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, (2, 1), 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.Conv2d(256, 256, 1, 1, 0), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(True),
            nn.Conv2d(512, 512, 1, 1, 0), nn.BatchNorm2d(512), nn.ReLU(True),
            nn.Conv2d(512, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(True), nn.MaxPool2d(2, 2))  # 512x1x25
        """
        backbone = vgg11_bn(pretrained=True)
        net = list(backbone.children())[0][:21]
        for param in backbone.features.parameters():
            param.requires_grad = backbone_require_grad
            """
        #self.cnn = nn.Sequential(*net)
        self.final_conv = omth_blocks.InceptionBlock(256, filters=[[256, 128], [256, 128]],
                                                     kernel_sizes=[[[3, 1], 1], [3, 1]], stride=[[1, 1], [1, 1]],
                                                     padding=[[[0, 0], 0], [[0, 1], 0]])

        self.cnn.apply(init.init_others)
        self.final_conv.apply(init.init_others)
Beispiel #3
0
 def inceptionA(self, in_channels, pool_features, pool):
     return omth_blocks.InceptionBlock(
         in_channels,
         filters=[[64], [48, 64], [64, 96, 96], [pool_features]],
         kernel_sizes=[[1], [1, 5], [1, 3, 3], [1]],
         stride=[[1], [1, 1], [1, 1, 1], [1]],
         padding=[[0], [0, 2], [0, 1, 1], [0]],
         batch_norm=self.BN,
         inner_maxout=[None, None, None, pool])
Beispiel #4
0
 def inceptionC(self, in_channels, c7, pool):
     return omth_blocks.InceptionBlock(
         in_channels,
         filters=[[192], [c7, c7, 192], [c7, c7, c7, c7, 192], [192]],
         kernel_sizes=[[1], [1, [1, 7], [7, 1]],
                       [1, [7, 1], [1, 7], [7, 1], [1, 7]], [1]],
         stride=[[1], [1, 1, 1], [1, 1, 1, 1, 1], [1]],
         padding=[[0], [0, [0, 3], [3, 0]],
                  [0, [0, 3], [3, 0], [0, 3], [3, 0]], [0]],
         batch_norm=self.BN,
         inner_maxout=[None, None, None, pool])
Beispiel #5
0
 def create_loc_layer(self,
                      in_channel,
                      anchor,
                      stride,
                      incep_loc=False,
                      in_wid=128):
     loc_layer = nn.ModuleList([])
     loc_layer.append(
         omth_blocks.conv_block(in_channel, [in_channel, in_channel],
                                kernel_sizes=[3, 1],
                                stride=[1, 1],
                                padding=[3, 0],
                                dilation=[3, 1],
                                batch_norm=self.batch_norm))
     if incep_loc:
         loc_layer.append(
             omth_blocks.InceptionBlock(in_channel,
                                        filters=[[128, 128, 128, in_wid],
                                                 [128, 128, 128, in_wid],
                                                 [128, 128, in_wid],
                                                 [192, 192, 128, in_wid]],
                                        kernel_sizes=[[[1, 9], [1, 5], 3,
                                                       1],
                                                      [[1, 7], [1, 3], 3,
                                                       1], [[1, 5], 3, 1],
                                                      [[1, 3], [3, 1], 3,
                                                       1]],
                                        stride=[[1, 1, 1, 1], [1, 1, 1, 1],
                                                [1, 1, 1], [1, 1, 1, 1]],
                                        padding=[[[0, 4], [0, 2], 1, 0],
                                                 [[0, 3], [0, 1], 1, 0],
                                                 [[0, 2], 1, 0],
                                                 [[0, 1], [1, 0], 1, 0]],
                                        batch_norm=None,
                                        inner_maxout=None))
     input_channel = in_wid * 4 if incep_loc else in_channel
     loc_layer.append(
         omth_blocks.conv_block(
             input_channel,
             filters=[input_channel,
                      int(input_channel / 2), anchor * 4],
             kernel_sizes=[3, 1, 3],
             stride=[1, 1, stride],
             padding=[0, 1, 1],
             activation=None))
     loc_layer.apply(init_cnn)
     return loc_layer
Beispiel #6
0
 def create_conf_layer(self, in_channel, anchor, stride, incep_conf=False):
     conf_layer = nn.ModuleList([])
     conf_layer.append(omth_blocks.conv_block(
         in_channel, [in_channel, in_channel], kernel_sizes=[3, 1], stride=[1, 1],
         padding=[3, 0], dilation=[3, 1], batch_norm=self.batch_norm)
     )
     if self.connect_loc_to_conf:
         if incep_conf:
             out_chnl = int(in_channel / 8)
             out_chnl_2 = int(in_channel / 2) - (3 * out_chnl)
             conf_layer.append(omth_blocks.InceptionBlock(
                 in_channel, stride=[[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1]],
                 kernel_sizes=[[[1, 7], 3, 1], [[1, 5], 3, 1], [[1, 3], 3, 1], [3, 1]],
                 filters=[[64, 64, out_chnl], [64, 64, out_chnl], [64, 64, out_chnl], [128, out_chnl_2]],
                 padding=[[[0, 3], 1, 0], [[0, 2], 1, 0], [[0, 1], 1, 0], [1, 0]],
                 batch_norm=None, inner_maxout=None))
         else:
             conf_layer.append(omth_blocks.conv_block(
                 in_channel, filters=[in_channel, int(in_channel / 2)],
                 kernel_sizes=[1, 3], stride=[1, 1], padding=[0, 1], activation=None))
         # In this layer, the output from loc_layer will be concatenated to the conf layer
         # Feeding the conf layer with regressed location, helping the conf layer
         # to get better prediction
         conf_concate = omth_blocks.conv_block(
             int(in_channel / 2) + anchor * 4, kernel_sizes=[3, 1, 3],
             filters=[int(in_channel / 2), int(in_channel / 4), anchor * 2],
             stride=[1, 1, stride], padding=[1, 0, 1], activation=None)
         conf_concate.apply(init.init_cnn)
     else:
         print("incep_conf is turned off due to connect_loc_to_conf is False")
         conf_layer.append(omth_blocks.conv_block(
             in_channel, filters=[in_channel, int(in_channel / 2), anchor * 2],
             kernel_sizes=[1, 3, 3], stride=[1, 1, stride], padding=[0, 1, 1], activation=None))
         conf_concate = None
     conf_layer.apply(init.init_cnn)
     return conf_layer, conf_concate
Beispiel #7
0
    def __init__(self, BN, num_classes=1000, aux_logits=True):
        """
        :param BN:
        :param num_classes:
        :param aux_logits:
        """
        super().__init__()
        self.avg_pool_3_1_1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1)
        self.pool_3_2_0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.pool_8_1_0 = nn.MaxPool2d(kernel_size=8, stride=1, padding=0)
        self.BN = BN
        self.aux_logits = aux_logits

        self.conv_layers_1 = omth_blocks.conv_block(3,
                                                    filters=[32, 32, 64],
                                                    kernel_sizes=[3, 3, 3],
                                                    stride=[2, 1, 1],
                                                    padding=[0, 0, 1],
                                                    name="conv_block_1",
                                                    batch_norm=BN)
        self.conv_layers_2 = omth_blocks.conv_block(64,
                                                    filters=[80, 192],
                                                    kernel_sizes=[1, 3],
                                                    stride=[1, 1],
                                                    padding=[0, 0],
                                                    name="conv_block_2",
                                                    batch_norm=BN)
        self.Mixed5b = self.inceptionA(in_channels=192,
                                       pool_features=32,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed5c = self.inceptionA(in_channels=256,
                                       pool_features=64,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed5d = self.inceptionA(in_channels=288,
                                       pool_features=64,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed6a = omth_blocks.InceptionBlock(288,
                                                  filters=[[384], [64, 96,
                                                                   96]],
                                                  kernel_sizes=[[3], [1, 3,
                                                                      3]],
                                                  stride=[[2], [1, 1, 2]],
                                                  padding=[[0], [0, 1, 0]],
                                                  batch_norm=BN,
                                                  maxout=self.pool_3_2_0)
        self.Mixed6b = self.inceptionC(in_channels=768,
                                       c7=120,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed6c = self.inceptionC(in_channels=768,
                                       c7=160,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed6d = self.inceptionC(in_channels=768,
                                       c7=160,
                                       pool=self.avg_pool_3_1_1)
        self.Mixed6e = self.inceptionC(in_channels=768,
                                       c7=192,
                                       pool=self.avg_pool_3_1_1)

        self.Mixed7a = omth_blocks.InceptionBlock(
            768,
            filters=[[192, 320], [192, 192, 192, 192]],
            kernel_sizes=[[1, 3], [1, [7, 1], [1, 7], 3]],
            stride=[[1, 2], [1, 1, 1, 2]],
            padding=[[0, 0], [0, [0, 3], [3, 0], 0]],
            maxout=self.pool_3_2_0,
            batch_norm=BN)
        self.Mixed7b = InceptionE(1280, BN)
        self.Mixed7c = InceptionE(2048, BN)
        self.fc = nn.Linear(2048, num_classes)

        if aux_logits:
            self.aux_conv = omth_blocks.conv_block(768,
                                                   filters=[128, 768],
                                                   kernel_sizes=[1, 5],
                                                   stride=[1, 1],
                                                   padding=[0, 0],
                                                   batch_norm=BN)
            self.aux_conv.stdev = 0.01
            self.aux_fc = nn.Linear(768, num_classes)
            self.aux_fc.stddev = 0.001