def __init__(self, num_anchors=9, num_classes=20, compound_coef=0): super(EfficientDet, self).__init__() self.compound_coef = compound_coef self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.compound_coef] self.conv3 = nn.Conv2d(48, self.num_channels, kernel_size=1, stride=1, padding=0)#40 self.conv4 = nn.Conv2d(96, self.num_channels, kernel_size=1, stride=1, padding=0)#80 self.conv5 = nn.Conv2d(232, self.num_channels, kernel_size=1, stride=1, padding=0)#192 self.conv6 = nn.Conv2d(232, self.num_channels, kernel_size=3, stride=2, padding=1)#192 self.conv7 = nn.Sequential(nn.ReLU(), nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1)) self.bifpn = nn.Sequential(*[BiFPN(self.num_channels) for _ in range(min(2 + self.compound_coef, 8))]) self.num_classes = num_classes self.regressor = Regressor(in_channels=self.num_channels, num_anchors=num_anchors, num_layers=3 + self.compound_coef // 3) self.classifier = Classifier(in_channels=self.num_channels, num_anchors=num_anchors, num_classes=num_classes, num_layers=3 + self.compound_coef // 3) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classifier.header.weight.data.fill_(0) self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.regressor.header.weight.data.fill_(0) self.regressor.header.bias.data.fill_(0) self.backbone_net = EfficientNet()
def __init__(self, config): super(EfficientDet, self).__init__() self.is_training = config.is_training self.nms_threshold = config.nms_threshold model_conf = EFFICIENTDET[config.network] self.num_channels = model_conf['W_bifpn'] input_channels = model_conf['EfficientNet_output'] self.convs = [] self.conv3 = nn.Conv2d(input_channels[0], self.num_channels, kernel_size=1, stride=1, padding=0) self.conv4 = nn.Conv2d(input_channels[1], self.num_channels, kernel_size=1, stride=1, padding=0) self.conv5 = nn.Conv2d(input_channels[2], self.num_channels, kernel_size=1, stride=1, padding=0) self.conv6 = nn.Conv2d(input_channels[3], self.num_channels, kernel_size=1, stride=1, padding=0) self.conv7 = nn.Conv2d(input_channels[4], self.num_channels, kernel_size=1, stride=1, padding=0) self.convs.append(self.conv3) self.convs.append(self.conv4) self.convs.append(self.conv5) self.convs.append(self.conv6) self.convs.append(self.conv7) self.bifpn = nn.Sequential( *[BiFPN(self.num_channels) for _ in range(model_conf['D_bifpn'])]) self.num_classes = config.num_classes self.anchors = Anchors() self.regressor = Regressor(in_channels=self.num_channels, num_anchors=self.anchors.num_anchors, num_layers=model_conf['D_class']) self.classifier = Classifier(in_channels=self.num_channels, num_anchors=self.anchors.num_anchors, num_classes=self.num_classes, num_layers=model_conf['D_class']) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classifier.header.weight.data.fill_(0) self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.regressor.header.weight.data.fill_(0) self.regressor.header.bias.data.fill_(0) if config.resume: self.backbone_net = EfficientNet.from_name( model_conf['EfficientNet']) else: self.backbone_net = EfficientNet.from_pretrained( model_conf['EfficientNet'])