Example #1
0
    def __init__(self, num_classes, block, layers):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
Example #2
0
 def __init__(self):
     #super(RetinanetDetector,self).__init__()
     self.top_k = cfgs.top_k
     self.score_threshold = cfgs.score_threshold
     self.nms_threshold = cfgs.nms_threshold
     self.num_classes = cfgs.ClsNum
     self.regressBoxes = BBoxTransform()
     self.clipBoxes = ClipBoxes(cfgs.ImgSize, cfgs.ImgSize)
Example #3
0
    def __init__(self, num_classes, block, layers, groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 dropout1=0.25, dropout2=0.25, alpha=0.25, gamma=2.0,
                 loss_with_no_bboxes=False, no_bboxes_alpha=0.5, no_bboxes_gamma=2.0):
        #Has been changed to ResNext(customized by Yu Han Huang)
        self.inplanes = 64
        super(ResNet, self).__init__()
        #add self.dilation, width_per_group, replace_stride_with_dilation (customized by Yu Han Huang)
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        #add dilate=replace_stride_with_dilation (customized by Yu Han Huang)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
        #add C2 layer_size to fpn_sizes (customized by Yu Han Huang)
        if block == BasicBlock:
            fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels,
             self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        elif block == BasicBlock:
            fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels,
             self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]
        #add fpn_sizes[0] into PyramidFeatures (customized by Yu Han Huang)
        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes, dropout1=dropout1, dropout2=dropout2)
        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        #add arguments alpha, gamma loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang)
        self.focalLoss = losses.FocalLoss(alpha=alpha, gamma=gamma, loss_with_no_bboxes=loss_with_no_bboxes, no_bboxes_alpha=no_bboxes_alpha, no_bboxes_gamma=no_bboxes_gamma)
                
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()
    def __init__(self, num_classes, block, pretrained=False, phi=0):
        self.inplanes = w_bifpn[phi]
        super(EfficientDet, self).__init__()
        efficientnet = EfficientNet.from_pretrained(f'efficientnet-b{phi}')
        blocks = []
        count = 0
        fpn_sizes = []
        for block in efficientnet._blocks:
            blocks.append(block)
            if block._depthwise_conv.stride == [2, 2]:
                count += 1
                fpn_sizes.append(block._project_conv.out_channels)
                if len(fpn_sizes) >= 4:
                    break

        self.efficientnet = nn.Sequential(efficientnet._conv_stem,
                                          efficientnet._bn0, *blocks)
        num_layers = min(phi + 2, 8)
        self.fpn = BiFPN(fpn_sizes[1:],
                         feature_size=w_bifpn[phi],
                         num_layers=num_layers)

        d_class = 3 + (phi // 3)
        self.regressionModel = RegressionModel(w_bifpn[phi],
                                               feature_size=w_bifpn[phi],
                                               d_class=d_class)
        self.classificationModel = ClassificationModel(
            w_bifpn[phi],
            feature_size=w_bifpn[phi],
            d_class=d_class,
            num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss().cuda()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
    def __init__(self, num_classes, block, layers, normalization='batch_norm'):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.normalization = normalization


        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        if normalization == 'batch_norm':
            self.bn1 = nn.BatchNorm2d(64)
        else:
            self.bn1 = nn.GroupNorm(num_groups=8, num_channels=64)  # Note: Does not use preloaded imagenet weights, as BatchNorm does
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        elif block == Bottleneck:
            fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()
        
        self.focalLoss = losses.FocalLoss()
                
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            # elif :
                
                # raise NotImplementedError('Not Implemented: Contact @Vishnu')

        prior = 0.01
        
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
Example #6
0
    def __init__(self, num_classes, backbone_network, fpn_sizes):
        """[summary]

        Args:
            num_classes ([int]): [description]
            backbone_network ([str]): [description]
            fpn_sizes ([list]): [number of channels
                                    in each backbone feature map]
        """
        self.inplanes = 64
        super(RetinaNet, self).__init__()
        # fpn_sizes = [160, 272, 448]
        # fpn_sizes = [56, 160, 448]
        # for b4
        # fpn_sizes = [160, 272, 448]

        # for b0
        # fpn_sizes = [112,192,1280]
        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()

        self.efficientnet = backbone_network
Example #7
0
    def __init__(self, num_classes, phi):
        feature_size = feature_sizes[phi]
        super(EfficientDet, self).__init__()

        self.backbone = geffnets[phi](pretrained=True,
                                      drop_rate=0.25,
                                      drop_connect_rate=0.2)

        # Get backbone feature sizes.
        fpn_sizes = [40, 80, 192]

        self.fpn = [
            PyramidFeatures(fpn_sizes, feature_size=feature_size,
                            index=index).cuda()
            for index in range(min(2 + phi, 8))
        ]

        self.regressionModel = RegressionModel(phi, feature_size=feature_size)
        self.classificationModel = ClassificationModel(
            phi, feature_size=feature_size, num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
Example #8
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 max_boxes,
                 score_threshold,
                 seg_level,
                 alphabet,
                 train_htr,
                 htr_gt_box,
                 ner_branch=False,
                 binary_classifier=True):
        self.inplanes = 64
        self.pool_h = 2
        self.pool_w = 400
        self.forward_transcription = False
        self.max_boxes = max_boxes
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.downsampling_factors = [8, 16, 32, 64, 128]
        self.epochs_only_det = 1
        self.score_threshold = score_threshold
        self.alphabet = alphabet
        self.train_htr = train_htr
        self.binary_classifier = binary_classifier
        self.htr_gt_box = htr_gt_box
        self.num_classes = num_classes
        self.ner_branch = ner_branch

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.anchors = Anchors(seg_level=seg_level)
        self.regressionModel = RegressionModel(
            num_features_in=256, num_anchors=self.anchors.num_anchors)
        self.recognitionModel = RecognitionModel(feature_size=256,
                                                 pool_h=self.pool_h,
                                                 alphabet_len=len(alphabet))
        if ner_branch:
            self.nerModel = NERModel(feature_size=256,
                                     pool_h=self.pool_h,
                                     n_classes=num_classes,
                                     pool_w=self.pool_w)
        self.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=self.anchors.num_anchors,
            num_classes=num_classes)
        self.boxSampler = BoxSampler('train', self.score_threshold)
        self.sorter = RoISorter()
        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()
        if ner_branch:
            self.nerLoss = losses.NERLoss()
        self.transcriptionLoss = losses.TranscriptionLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.recognitionModel.output.weight.data.fill_(0)

        self.recognitionModel.output.bias.data.fill_(-math.log((1.0 - prior) /
                                                               prior))
        if ner_branch:
            self.nerModel.output.weight.data.fill_(0)

            self.nerModel.output.bias.data.fill_(-math.log((1.0 - prior) /
                                                           prior))
        self.freeze_bn()
    def __init__(
        self,
        num_classes,
        block=Bottleneck,
        layers=[3, 4, 6, 3],
        prior=0.01,
        no_rpn=False,
        no_semantic=False,
        bypass_semantic=False,
        squeeze=True,
        decoder_dropout=None,
        decoder_activation=nn.ReLU(),
        encoder_activation=nn.ReLU(inplace=True),
        batch_norm=False,
        regr_feature_sizes=[256] * 3,
        class_feature_sizes=[256] * 3,
    ):
        super(RetinaNet, self).__init__()
        self.bypass_semantic = bypass_semantic
        self.squeeze = squeeze
        self.pyramid_levels = [3, 4, 5]
        self.no_rpn = no_rpn
        self.no_semantic = no_semantic
        self.encoder = ResNet(block=block,
                              layers=layers,
                              activation=encoder_activation)
        self.fpn_sizes = [
            self.get_out_channels(getattr(self.encoder, "layer%d" % nn))
            for nn in [2, 3, 4]
        ]
        #self.fpn_sizes.append([sz[-1]//2 for sz in self.fpn_sizes[-1]])
        print("fpn_sizes")
        print(*self.fpn_sizes, sep='\t')
        #         if block == BasicBlock:
        #             fpn_sizes = [self.layer2[-1].conv2.out_channels,
        #                          self.layer3[-1].conv2.out_channels,
        #                          self.layer4[-1].conv2.out_channels]
        #             print
        #         elif block == Bottleneck:
        #             fpn_sizes = [self.layer2[-1].conv3.out_channels,
        #                          self.layer3[-1].conv3.out_channels,
        #                          self.layer4[-1].conv3.out_channels]

        #         self.decoder = UNetDecode(num_classes, hid_channels=fpn_sizes)
        self.decoder = nn.Sequential(
            UNetDecode(256,
                       hid_channels=self.fpn_sizes,
                       dropout=decoder_dropout,
                       batch_norm=batch_norm,
                       activation=decoder_activation),
            UpsampleBlock(in_channels=256,
                          out_channels=1 + num_classes,
                          steps=3,
                          activation=decoder_activation,
                          batch_norm=batch_norm))

        self.enc_to_logits = nn.ModuleList(
            [EncToLogits(n, num_classes + 1) for n in self.fpn_sizes])
        #self.fpn = PyramidFeatures(self.fpn_sizes[0], self.fpn_sizes[1], self.fpn_sizes[2])
        #self.regressionModel = RegressionModel(256)
        #self.classificationModel = ClassificationModel(256, num_classes=num_classes)
        self.fpn = PyramidFeatures(*([num_classes + 1] * 3))

        self.regressionModel = RegressionModel(
            num_classes + 1,
            batch_norm=batch_norm,
            activation=decoder_activation,
            feature_sizes=regr_feature_sizes)
        self.classificationModel = ClassificationModel(
            num_classes + 1,
            num_classes=num_classes,
            batch_norm=batch_norm,
            activation=decoder_activation,
            w_init=0.0,
            feature_sizes=class_feature_sizes)

        self.anchors = Anchors(pyramid_levels=self.pyramid_levels,
                               squeeze=squeeze)

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        self.classificationModel.final.weight.data.fill_(0)
        self.classificationModel.final.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.seq.convblock_final.conv.weight.data.fill_(0)
        self.regressionModel.seq.convblock_final.conv.bias.data.fill_(0)

        self.freeze_bn()
Example #10
0
    def __init__(self, num_anchors=9, num_classes=20, compound_coef=0):
        super(EfficientDet, self).__init__()
        self.compound_coef = compound_coef

        self.num_channels = [64, 88, 112, 160, 224, 288, 384,
                             384][self.compound_coef]

        self.conv3 = nn.Conv2d(40,
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv4 = nn.Conv2d(80,
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv5 = nn.Conv2d(192,
                               self.num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0)
        self.conv6 = nn.Conv2d(192,
                               self.num_channels,
                               kernel_size=3,
                               stride=2,
                               padding=1)
        self.conv7 = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(self.num_channels,
                      self.num_channels,
                      kernel_size=3,
                      stride=2,
                      padding=1))

        self.bifpn = nn.Sequential(*[
            BiFPN(self.num_channels)
            for _ in range(min(2 + self.compound_coef, 8))
        ])

        self.num_classes = num_classes
        self.regressor = Regressor(in_channels=self.num_channels,
                                   num_anchors=num_anchors,
                                   num_layers=3 + self.compound_coef // 3)
        self.classifier = Classifier(in_channels=self.num_channels,
                                     num_anchors=num_anchors,
                                     num_classes=num_classes,
                                     num_layers=3 + self.compound_coef // 3)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classifier.header.weight.data.fill_(0)
        self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) /
                                                         prior))

        self.regressor.header.weight.data.fill_(0)
        self.regressor.header.bias.data.fill_(0)

        self.backbone_net = EfficientNet()
Example #11
0
 def __init__(self, training, score_threshold):
     super(BoxSampler, self).__init__()
     self.training = training
     self.regressBoxes = BBoxTransform()
     self.clipBoxes = ClipBoxes()
     self.score_threshold = score_threshold
Example #12
0
    def __init__(self, num_classes, block, layers):
        super(ResNet, self).__init__()
        self.inplanes = 64

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        # self.relu = nn.ReLU(inplace=True)
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block,
                                       planes=64,
                                       blocks=layers[0],
                                       stride=1)
        self.layer2 = self._make_layer(block,
                                       planes=128,
                                       blocks=layers[1],
                                       stride=2)
        self.layer3 = self._make_layer(block,
                                       planes=256,
                                       blocks=layers[2],
                                       stride=2)
        self.layer4 = self._make_layer(block,
                                       planes=512,
                                       blocks=layers[3],
                                       stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        # if block == BasicBlock:
        #     fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        # elif block == Bottleneck:
        #     fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)
        self.siameseNetwork = SiameseNetwork()

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        self.cropBoxes = utils.CropBoxes()

        # pooler = Pooler(
        #     output_size=(6, 6),
        #     scales=(1.0/8, 1.0/16, 1.0/32,), #1.0/64, 1.0/128),
        #     sampling_ratio=0,
        #     canonical_level=4,
        # )
        # self.pooler = pooler

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.conv5.weight.data.fill_(0)
        self.classificationModel.conv5.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.conv5.weight.data.fill_(0)
        self.regressionModel.conv5.bias.data.fill_(0)

        self.freeze_bn()
Example #13
0
    def __init__(self, num_class, block, layers):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            OrderedDict([('Conv1',
                          nn.Conv2d(3,
                                    64,
                                    kernel_size=7,
                                    stride=2,
                                    padding=3,
                                    bias=False)), ('BN', nn.BatchNorm2d(64)),
                         ('Relu', nn.ReLU(inplace=True)),
                         ('Maxpooling',
                          nn.MaxPool2d(kernel_size=3, stride=2, padding=1))]))

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].channels,
                self.layer3[layers[2] - 1].channels,
                self.layer4[layers[3] - 1].channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].channels,
                self.layer3[layers[2] - 1].channels,
                self.layer4[layers[3] - 1].channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regression = Regression(256)
        self.classification = Classification(256, num_classes=num_class)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        # self.classification.output.weight.data.fill_(0)
        self.classification.output.bias.data.fill_(-torch.log(
            (torch.tensor(1.0 - prior).float()) / prior))

        # self.regression.output.weight.data.fill_(0)
        self.regression.output.bias.data.fill_(0)

        self.freeze_bn()
Example #14
0
    def __init__(self, args, image_network, decoder_network=None):
        super().__init__()

        self.args = args
        self.blobs_strategy = self.args.blobs_strategy
        self.model_type = self.args.finetune_obj.split("_")[0]

        self.num_classes = 9
        self.n_blobs = 3

        # print(image_network)
        self.image_network = image_network
        # print(self.image_network)
        self.init_layers = self.image_network[0:4]
        self.block1 = self.image_network[4]
        self.block2 = self.image_network[5]
        self.block3 = self.image_network[6]
        self.block4 = self.image_network[7]

        self.decoder_network = decoder_network

        if "encoder" in self.blobs_strategy:
            if "resnet18" in self.args.network_base or "resnet34" in self.args.network_base:
                fpn_sizes = [
                    self.block2[-1].conv2.out_channels,
                    self.block3[-1].conv2.out_channels,
                    self.block4[-1].conv2.out_channels
                ]
            else:
                fpn_sizes = [
                    self.block2[-1].conv3.out_channels,
                    self.block3[-1].conv3.out_channels,
                    self.block4[-1].conv3.out_channels
                ]

        elif "decoder" in self.blobs_strategy:
            if "var" in self.model_type:
                fpn_sizes = [
                    self.decoder_network[3].conv.out_channels,
                    self.decoder_network[2].conv.out_channels,
                    self.decoder_network[1].conv.out_channels
                ]
            else:
                fpn_sizes = [
                    self.decoder_network[1].conv.out_channels,
                    self.decoder_network[0].conv.out_channels,
                    self.synthesizer[-1].conv.out_channels
                ]

        if "encoder" in self.blobs_strategy and "fused" in self.blobs_strategy:
            self.fpn = PyramidFeatures(args,
                                       fpn_sizes[0],
                                       fpn_sizes[1],
                                       fpn_sizes[2],
                                       fusion_strategy="concat_fuse")
        else:
            self.fpn = PyramidFeatures(args, fpn_sizes[0], fpn_sizes[1],
                                       fpn_sizes[2])

        self.dynamic_strategy = ("fused" not in self.blobs_strategy
                                 and "encoder" in self.blobs_strategy)
        # print("dynamic strat", self.dynamic_strategy)
        self.regressionModel = RegressionModel(256, self.dynamic_strategy)
        self.classificationModel = ClassificationModel(256,
                                                       self.dynamic_strategy)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        import losses

        self.focalLoss = losses.FocalLoss(self.dynamic_strategy)

        prior = 0.01

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.params = nn.Sequential(
            self.fpn,
            self.regressionModel,
            self.classificationModel,
        )
Example #15
0
    def __init__(self,training):
        super(BoxSampler, self).__init__()
	self.training = training
        self.regressBoxes = BBoxTransform()
	self.clipBoxes = ClipBoxes()