def sub_inception_module(self, BN, bn_eps=1e-5): return omth_blocks.InceptionBlock(384, filters=[384, 384], kernel_sizes=[[[1, 3]], [[3, 1]]], stride=[1, 1], padding=[[[0, 1]], [[1, 0]]], batch_norm=BN)
def __init__(self, backbone_require_grad=False): super().__init__() # Use pre-trained model as backbone """ backbone = vgg16_bn(pretrained=True) for param in backbone.parameters(): param.requires_grad = backbone_require_grad net = list(backbone.children())[0][:24] maxout = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1) """ #net = [ceil_maxout if type(n) is nn.MaxPool2d else n for n in net] # 3 x 48 x 960 self.cnn = nn.Sequential( nn.Conv2d(3, 64, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), nn.Conv2d(128, 256, 3, (2, 1), 1), nn.BatchNorm2d(256), nn.ReLU(True), nn.Conv2d(256, 256, 1, 1, 0), nn.BatchNorm2d(256), nn.ReLU(True), nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(True), nn.Conv2d(512, 512, 1, 1, 0), nn.BatchNorm2d(512), nn.ReLU(True), nn.Conv2d(512, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(True), nn.MaxPool2d(2, 2)) # 512x1x25 """ backbone = vgg11_bn(pretrained=True) net = list(backbone.children())[0][:21] for param in backbone.features.parameters(): param.requires_grad = backbone_require_grad """ #self.cnn = nn.Sequential(*net) self.final_conv = omth_blocks.InceptionBlock(256, filters=[[256, 128], [256, 128]], kernel_sizes=[[[3, 1], 1], [3, 1]], stride=[[1, 1], [1, 1]], padding=[[[0, 0], 0], [[0, 1], 0]]) self.cnn.apply(init.init_others) self.final_conv.apply(init.init_others)
def inceptionA(self, in_channels, pool_features, pool): return omth_blocks.InceptionBlock( in_channels, filters=[[64], [48, 64], [64, 96, 96], [pool_features]], kernel_sizes=[[1], [1, 5], [1, 3, 3], [1]], stride=[[1], [1, 1], [1, 1, 1], [1]], padding=[[0], [0, 2], [0, 1, 1], [0]], batch_norm=self.BN, inner_maxout=[None, None, None, pool])
def inceptionC(self, in_channels, c7, pool): return omth_blocks.InceptionBlock( in_channels, filters=[[192], [c7, c7, 192], [c7, c7, c7, c7, 192], [192]], kernel_sizes=[[1], [1, [1, 7], [7, 1]], [1, [7, 1], [1, 7], [7, 1], [1, 7]], [1]], stride=[[1], [1, 1, 1], [1, 1, 1, 1, 1], [1]], padding=[[0], [0, [0, 3], [3, 0]], [0, [0, 3], [3, 0], [0, 3], [3, 0]], [0]], batch_norm=self.BN, inner_maxout=[None, None, None, pool])
def create_loc_layer(self, in_channel, anchor, stride, incep_loc=False, in_wid=128): loc_layer = nn.ModuleList([]) loc_layer.append( omth_blocks.conv_block(in_channel, [in_channel, in_channel], kernel_sizes=[3, 1], stride=[1, 1], padding=[3, 0], dilation=[3, 1], batch_norm=self.batch_norm)) if incep_loc: loc_layer.append( omth_blocks.InceptionBlock(in_channel, filters=[[128, 128, 128, in_wid], [128, 128, 128, in_wid], [128, 128, in_wid], [192, 192, 128, in_wid]], kernel_sizes=[[[1, 9], [1, 5], 3, 1], [[1, 7], [1, 3], 3, 1], [[1, 5], 3, 1], [[1, 3], [3, 1], 3, 1]], stride=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1], [1, 1, 1, 1]], padding=[[[0, 4], [0, 2], 1, 0], [[0, 3], [0, 1], 1, 0], [[0, 2], 1, 0], [[0, 1], [1, 0], 1, 0]], batch_norm=None, inner_maxout=None)) input_channel = in_wid * 4 if incep_loc else in_channel loc_layer.append( omth_blocks.conv_block( input_channel, filters=[input_channel, int(input_channel / 2), anchor * 4], kernel_sizes=[3, 1, 3], stride=[1, 1, stride], padding=[0, 1, 1], activation=None)) loc_layer.apply(init_cnn) return loc_layer
def create_conf_layer(self, in_channel, anchor, stride, incep_conf=False): conf_layer = nn.ModuleList([]) conf_layer.append(omth_blocks.conv_block( in_channel, [in_channel, in_channel], kernel_sizes=[3, 1], stride=[1, 1], padding=[3, 0], dilation=[3, 1], batch_norm=self.batch_norm) ) if self.connect_loc_to_conf: if incep_conf: out_chnl = int(in_channel / 8) out_chnl_2 = int(in_channel / 2) - (3 * out_chnl) conf_layer.append(omth_blocks.InceptionBlock( in_channel, stride=[[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1]], kernel_sizes=[[[1, 7], 3, 1], [[1, 5], 3, 1], [[1, 3], 3, 1], [3, 1]], filters=[[64, 64, out_chnl], [64, 64, out_chnl], [64, 64, out_chnl], [128, out_chnl_2]], padding=[[[0, 3], 1, 0], [[0, 2], 1, 0], [[0, 1], 1, 0], [1, 0]], batch_norm=None, inner_maxout=None)) else: conf_layer.append(omth_blocks.conv_block( in_channel, filters=[in_channel, int(in_channel / 2)], kernel_sizes=[1, 3], stride=[1, 1], padding=[0, 1], activation=None)) # In this layer, the output from loc_layer will be concatenated to the conf layer # Feeding the conf layer with regressed location, helping the conf layer # to get better prediction conf_concate = omth_blocks.conv_block( int(in_channel / 2) + anchor * 4, kernel_sizes=[3, 1, 3], filters=[int(in_channel / 2), int(in_channel / 4), anchor * 2], stride=[1, 1, stride], padding=[1, 0, 1], activation=None) conf_concate.apply(init.init_cnn) else: print("incep_conf is turned off due to connect_loc_to_conf is False") conf_layer.append(omth_blocks.conv_block( in_channel, filters=[in_channel, int(in_channel / 2), anchor * 2], kernel_sizes=[1, 3, 3], stride=[1, 1, stride], padding=[0, 1, 1], activation=None)) conf_concate = None conf_layer.apply(init.init_cnn) return conf_layer, conf_concate
def __init__(self, BN, num_classes=1000, aux_logits=True): """ :param BN: :param num_classes: :param aux_logits: """ super().__init__() self.avg_pool_3_1_1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1) self.pool_3_2_0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0) self.pool_8_1_0 = nn.MaxPool2d(kernel_size=8, stride=1, padding=0) self.BN = BN self.aux_logits = aux_logits self.conv_layers_1 = omth_blocks.conv_block(3, filters=[32, 32, 64], kernel_sizes=[3, 3, 3], stride=[2, 1, 1], padding=[0, 0, 1], name="conv_block_1", batch_norm=BN) self.conv_layers_2 = omth_blocks.conv_block(64, filters=[80, 192], kernel_sizes=[1, 3], stride=[1, 1], padding=[0, 0], name="conv_block_2", batch_norm=BN) self.Mixed5b = self.inceptionA(in_channels=192, pool_features=32, pool=self.avg_pool_3_1_1) self.Mixed5c = self.inceptionA(in_channels=256, pool_features=64, pool=self.avg_pool_3_1_1) self.Mixed5d = self.inceptionA(in_channels=288, pool_features=64, pool=self.avg_pool_3_1_1) self.Mixed6a = omth_blocks.InceptionBlock(288, filters=[[384], [64, 96, 96]], kernel_sizes=[[3], [1, 3, 3]], stride=[[2], [1, 1, 2]], padding=[[0], [0, 1, 0]], batch_norm=BN, maxout=self.pool_3_2_0) self.Mixed6b = self.inceptionC(in_channels=768, c7=120, pool=self.avg_pool_3_1_1) self.Mixed6c = self.inceptionC(in_channels=768, c7=160, pool=self.avg_pool_3_1_1) self.Mixed6d = self.inceptionC(in_channels=768, c7=160, pool=self.avg_pool_3_1_1) self.Mixed6e = self.inceptionC(in_channels=768, c7=192, pool=self.avg_pool_3_1_1) self.Mixed7a = omth_blocks.InceptionBlock( 768, filters=[[192, 320], [192, 192, 192, 192]], kernel_sizes=[[1, 3], [1, [7, 1], [1, 7], 3]], stride=[[1, 2], [1, 1, 1, 2]], padding=[[0, 0], [0, [0, 3], [3, 0], 0]], maxout=self.pool_3_2_0, batch_norm=BN) self.Mixed7b = InceptionE(1280, BN) self.Mixed7c = InceptionE(2048, BN) self.fc = nn.Linear(2048, num_classes) if aux_logits: self.aux_conv = omth_blocks.conv_block(768, filters=[128, 768], kernel_sizes=[1, 5], stride=[1, 1], padding=[0, 0], batch_norm=BN) self.aux_conv.stdev = 0.01 self.aux_fc = nn.Linear(768, num_classes) self.aux_fc.stddev = 0.001