Ejemplo n.º 1
0
    def __init__(self, layers, prn_node_count=1024, prn_coeff=2):
        super(poseNet, self).__init__()
        if layers == 101:
            self.fpn = FPN101()
        if layers == 50:
            self.fpn = FPN50()

        ##################################################################################
        # keypoints subnet
        # 2 conv(kernel=3x3),change channels from 256 to 128
        self.convt1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.convt2 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.convt3 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.convt4 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
        self.convs1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.convs2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.convs3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.convs4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)

        self.upsample1 = nn.Upsample(scale_factor=8,
                                     mode='nearest',
                                     align_corners=None)
        self.upsample2 = nn.Upsample(scale_factor=4,
                                     mode='nearest',
                                     align_corners=None)
        self.upsample3 = nn.Upsample(scale_factor=2,
                                     mode='nearest',
                                     align_corners=None)
        # self.upsample4 = nn.Upsample(size=(120,120),mode='bilinear',align_corners=True)

        self.concat = Concat()
        self.conv2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
        self.convfin = nn.Conv2d(256, 17, kernel_size=1, stride=1, padding=0)

        ##################################################################################
        # detection subnet
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=80)
        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        ##################################################################################
        # prn subnet
        self.prn = PRN(prn_node_count, prn_coeff)

        ##################################################################################
        # initialize weights
        self._initialize_weights_norm()
        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()  # from retinanet
Ejemplo n.º 2
0
def build_detection_loss(saved_for_loss, anno):
    '''
    :param saved_for_loss: [classifications, regressions, anchors]
    :param anno: annotations
    :return: classification_loss, regression_loss
    '''
    saved_for_log = OrderedDict()

    # Compute losses
    focalLoss = losses.FocalLoss()
    classification_loss, regression_loss = focalLoss(*saved_for_loss, anno)
    classification_loss = classification_loss.mean()
    regression_loss = regression_loss.mean()
    total_loss = classification_loss + regression_loss

    # Get value from Tensor and save for log
    saved_for_log['total_loss'] = total_loss.item()
    saved_for_log['classification_loss'] = classification_loss.item()
    saved_for_log['regression_loss'] = regression_loss.item()

    return total_loss, saved_for_log
Ejemplo n.º 3
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 act=nn.ReLU(),
                 conv=nn.Conv2d):
        self.inplanes = 4
        super(ResNetReduced, self).__init__()
        self.conv1 = MetaConv2d(3,
                                4,
                                kernel_size=7,
                                stride=2,
                                padding=3,
                                bias=False)
        self.act = act
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 4, layers[0])
        self.layer2 = self._make_layer(block, 8, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 16, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 32, layers[3], stride=2)
        self.bn1 = MetaBatchNorm2d(4)
        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]
        else:
            raise ValueError(f"Block type {block} not understood")

        self.fpn = PyramidFeatures(fpn_sizes[0],
                                   fpn_sizes[1],
                                   fpn_sizes[2],
                                   feature_size=32)

        self.regressionModel = RegressionModel(32,
                                               num_anchors=15,
                                               feature_size=32)
        self.classificationModel = ClassificationModel(32,
                                                       num_classes=num_classes,
                                                       num_anchors=15,
                                                       feature_size=32)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, conv):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, MetaBatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()