Ejemplo n.º 1
0
    def __init__(self, num_anchors=9, num_classes=20, compound_coef=0):
        super(EfficientDet, self).__init__()
        self.compound_coef = compound_coef

        self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.compound_coef]

        self.conv3 = nn.Conv2d(48, self.num_channels, kernel_size=1, stride=1, padding=0)#40
        self.conv4 = nn.Conv2d(96, self.num_channels, kernel_size=1, stride=1, padding=0)#80
        self.conv5 = nn.Conv2d(232, self.num_channels, kernel_size=1, stride=1, padding=0)#192
        self.conv6 = nn.Conv2d(232, self.num_channels, kernel_size=3, stride=2, padding=1)#192
        self.conv7 = nn.Sequential(nn.ReLU(),
                                   nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1))

        self.bifpn = nn.Sequential(*[BiFPN(self.num_channels) for _ in range(min(2 + self.compound_coef, 8))])

        self.num_classes = num_classes
        self.regressor = Regressor(in_channels=self.num_channels, num_anchors=num_anchors,
                                   num_layers=3 + self.compound_coef // 3)
        self.classifier = Classifier(in_channels=self.num_channels, num_anchors=num_anchors, num_classes=num_classes,
                                     num_layers=3 + self.compound_coef // 3)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classifier.header.weight.data.fill_(0)
        self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior))

        self.regressor.header.weight.data.fill_(0)
        self.regressor.header.bias.data.fill_(0)

        self.backbone_net = EfficientNet()
Ejemplo n.º 2
0
    def __init__(self, config):
        super(EfficientDet, self).__init__()
        self.is_training = config.is_training
        self.nms_threshold = config.nms_threshold
        model_conf = EFFICIENTDET[config.network]
        self.num_channels = model_conf['W_bifpn']
        input_channels = model_conf['EfficientNet_output']
        self.convs = []
        self.conv3 = nn.Conv2d(input_channels[0],
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv4 = nn.Conv2d(input_channels[1],
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv5 = nn.Conv2d(input_channels[2],
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv6 = nn.Conv2d(input_channels[3],
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv7 = nn.Conv2d(input_channels[4],
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.convs.append(self.conv3)
        self.convs.append(self.conv4)
        self.convs.append(self.conv5)
        self.convs.append(self.conv6)
        self.convs.append(self.conv7)

        self.bifpn = nn.Sequential(
            *[BiFPN(self.num_channels) for _ in range(model_conf['D_bifpn'])])

        self.num_classes = config.num_classes
        self.anchors = Anchors()
        self.regressor = Regressor(in_channels=self.num_channels,
                                   num_anchors=self.anchors.num_anchors,
                                   num_layers=model_conf['D_class'])
        self.classifier = Classifier(in_channels=self.num_channels,
                                     num_anchors=self.anchors.num_anchors,
                                     num_classes=self.num_classes,
                                     num_layers=model_conf['D_class'])

        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classifier.header.weight.data.fill_(0)
        self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) /
                                                         prior))

        self.regressor.header.weight.data.fill_(0)
        self.regressor.header.bias.data.fill_(0)

        if config.resume:
            self.backbone_net = EfficientNet.from_name(
                model_conf['EfficientNet'])
        else:
            self.backbone_net = EfficientNet.from_pretrained(
                model_conf['EfficientNet'])