def __init__(self, layers, prn_node_count=1024, prn_coeff=2): super(poseNet, self).__init__() if layers == 101: self.fpn = FPN101() if layers == 50: self.fpn = FPN50() ################################################################################## # keypoints subnet # 2 conv(kernel=3x3),change channels from 256 to 128 self.convt1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) self.convt2 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) self.convt3 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) self.convt4 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) self.convs1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) self.convs2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) self.convs3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) self.convs4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) self.upsample1 = nn.Upsample(scale_factor=8, mode='nearest', align_corners=None) self.upsample2 = nn.Upsample(scale_factor=4, mode='nearest', align_corners=None) self.upsample3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) # self.upsample4 = nn.Upsample(size=(120,120),mode='bilinear',align_corners=True) self.concat = Concat() self.conv2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1) self.convfin = nn.Conv2d(256, 17, kernel_size=1, stride=1, padding=0) ################################################################################## # detection subnet self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=80) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() ################################################################################## # prn subnet self.prn = PRN(prn_node_count, prn_coeff) ################################################################################## # initialize weights self._initialize_weights_norm() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn() # from retinanet
def build_detection_loss(saved_for_loss, anno): ''' :param saved_for_loss: [classifications, regressions, anchors] :param anno: annotations :return: classification_loss, regression_loss ''' saved_for_log = OrderedDict() # Compute losses focalLoss = losses.FocalLoss() classification_loss, regression_loss = focalLoss(*saved_for_loss, anno) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() total_loss = classification_loss + regression_loss # Get value from Tensor and save for log saved_for_log['total_loss'] = total_loss.item() saved_for_log['classification_loss'] = classification_loss.item() saved_for_log['regression_loss'] = regression_loss.item() return total_loss, saved_for_log
def __init__(self, num_classes, block, layers, act=nn.ReLU(), conv=nn.Conv2d): self.inplanes = 4 super(ResNetReduced, self).__init__() self.conv1 = MetaConv2d(3, 4, kernel_size=7, stride=2, padding=3, bias=False) self.act = act self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 4, layers[0]) self.layer2 = self._make_layer(block, 8, layers[1], stride=2) self.layer3 = self._make_layer(block, 16, layers[2], stride=2) self.layer4 = self._make_layer(block, 32, layers[3], stride=2) self.bn1 = MetaBatchNorm2d(4) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] else: raise ValueError(f"Block type {block} not understood") self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], feature_size=32) self.regressionModel = RegressionModel(32, num_anchors=15, feature_size=32) self.classificationModel = ClassificationModel(32, num_classes=num_classes, num_anchors=15, feature_size=32) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, conv): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, MetaBatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()