def __init__(self, num_classes, block, layers): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self): #super(RetinanetDetector,self).__init__() self.top_k = cfgs.top_k self.score_threshold = cfgs.score_threshold self.nms_threshold = cfgs.nms_threshold self.num_classes = cfgs.ClsNum self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes(cfgs.ImgSize, cfgs.ImgSize)
def __init__(self, num_classes, block, layers, groups=1, width_per_group=64, replace_stride_with_dilation=None, dropout1=0.25, dropout2=0.25, alpha=0.25, gamma=2.0, loss_with_no_bboxes=False, no_bboxes_alpha=0.5, no_bboxes_gamma=2.0): #Has been changed to ResNext(customized by Yu Han Huang) self.inplanes = 64 super(ResNet, self).__init__() #add self.dilation, width_per_group, replace_stride_with_dilation (customized by Yu Han Huang) self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) #add dilate=replace_stride_with_dilation (customized by Yu Han Huang) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) #add C2 layer_size to fpn_sizes (customized by Yu Han Huang) if block == BasicBlock: fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] elif block == BasicBlock: fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] #add fpn_sizes[0] into PyramidFeatures (customized by Yu Han Huang) self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes, dropout1=dropout1, dropout2=dropout2) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() #add arguments alpha, gamma loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang) self.focalLoss = losses.FocalLoss(alpha=alpha, gamma=gamma, loss_with_no_bboxes=loss_with_no_bboxes, no_bboxes_alpha=no_bboxes_alpha, no_bboxes_gamma=no_bboxes_gamma) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, pretrained=False, phi=0): self.inplanes = w_bifpn[phi] super(EfficientDet, self).__init__() efficientnet = EfficientNet.from_pretrained(f'efficientnet-b{phi}') blocks = [] count = 0 fpn_sizes = [] for block in efficientnet._blocks: blocks.append(block) if block._depthwise_conv.stride == [2, 2]: count += 1 fpn_sizes.append(block._project_conv.out_channels) if len(fpn_sizes) >= 4: break self.efficientnet = nn.Sequential(efficientnet._conv_stem, efficientnet._bn0, *blocks) num_layers = min(phi + 2, 8) self.fpn = BiFPN(fpn_sizes[1:], feature_size=w_bifpn[phi], num_layers=num_layers) d_class = 3 + (phi // 3) self.regressionModel = RegressionModel(w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class) self.classificationModel = ClassificationModel( w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss().cuda() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, layers, normalization='batch_norm'): super(ResNet, self).__init__() self.inplanes = 64 self.normalization = normalization self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) if normalization == 'batch_norm': self.bn1 = nn.BatchNorm2d(64) else: self.bn1 = nn.GroupNorm(num_groups=8, num_channels=64) # Note: Does not use preloaded imagenet weights, as BatchNorm does self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] elif block == Bottleneck: fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm): m.weight.data.fill_(1) m.bias.data.zero_() # elif : # raise NotImplementedError('Not Implemented: Contact @Vishnu') prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, backbone_network, fpn_sizes): """[summary] Args: num_classes ([int]): [description] backbone_network ([str]): [description] fpn_sizes ([list]): [number of channels in each backbone feature map] """ self.inplanes = 64 super(RetinaNet, self).__init__() # fpn_sizes = [160, 272, 448] # fpn_sizes = [56, 160, 448] # for b4 # fpn_sizes = [160, 272, 448] # for b0 # fpn_sizes = [112,192,1280] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn() self.efficientnet = backbone_network
def __init__(self, num_classes, phi): feature_size = feature_sizes[phi] super(EfficientDet, self).__init__() self.backbone = geffnets[phi](pretrained=True, drop_rate=0.25, drop_connect_rate=0.2) # Get backbone feature sizes. fpn_sizes = [40, 80, 192] self.fpn = [ PyramidFeatures(fpn_sizes, feature_size=feature_size, index=index).cuda() for index in range(min(2 + phi, 8)) ] self.regressionModel = RegressionModel(phi, feature_size=feature_size) self.classificationModel = ClassificationModel( phi, feature_size=feature_size, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0)
def __init__(self, num_classes, block, layers, max_boxes, score_threshold, seg_level, alphabet, train_htr, htr_gt_box, ner_branch=False, binary_classifier=True): self.inplanes = 64 self.pool_h = 2 self.pool_w = 400 self.forward_transcription = False self.max_boxes = max_boxes super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.downsampling_factors = [8, 16, 32, 64, 128] self.epochs_only_det = 1 self.score_threshold = score_threshold self.alphabet = alphabet self.train_htr = train_htr self.binary_classifier = binary_classifier self.htr_gt_box = htr_gt_box self.num_classes = num_classes self.ner_branch = ner_branch if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.anchors = Anchors(seg_level=seg_level) self.regressionModel = RegressionModel( num_features_in=256, num_anchors=self.anchors.num_anchors) self.recognitionModel = RecognitionModel(feature_size=256, pool_h=self.pool_h, alphabet_len=len(alphabet)) if ner_branch: self.nerModel = NERModel(feature_size=256, pool_h=self.pool_h, n_classes=num_classes, pool_w=self.pool_w) self.classificationModel = ClassificationModel( num_features_in=256, num_anchors=self.anchors.num_anchors, num_classes=num_classes) self.boxSampler = BoxSampler('train', self.score_threshold) self.sorter = RoISorter() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() if ner_branch: self.nerLoss = losses.NERLoss() self.transcriptionLoss = losses.TranscriptionLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.recognitionModel.output.weight.data.fill_(0) self.recognitionModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) if ner_branch: self.nerModel.output.weight.data.fill_(0) self.nerModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.freeze_bn()
def __init__( self, num_classes, block=Bottleneck, layers=[3, 4, 6, 3], prior=0.01, no_rpn=False, no_semantic=False, bypass_semantic=False, squeeze=True, decoder_dropout=None, decoder_activation=nn.ReLU(), encoder_activation=nn.ReLU(inplace=True), batch_norm=False, regr_feature_sizes=[256] * 3, class_feature_sizes=[256] * 3, ): super(RetinaNet, self).__init__() self.bypass_semantic = bypass_semantic self.squeeze = squeeze self.pyramid_levels = [3, 4, 5] self.no_rpn = no_rpn self.no_semantic = no_semantic self.encoder = ResNet(block=block, layers=layers, activation=encoder_activation) self.fpn_sizes = [ self.get_out_channels(getattr(self.encoder, "layer%d" % nn)) for nn in [2, 3, 4] ] #self.fpn_sizes.append([sz[-1]//2 for sz in self.fpn_sizes[-1]]) print("fpn_sizes") print(*self.fpn_sizes, sep='\t') # if block == BasicBlock: # fpn_sizes = [self.layer2[-1].conv2.out_channels, # self.layer3[-1].conv2.out_channels, # self.layer4[-1].conv2.out_channels] # print # elif block == Bottleneck: # fpn_sizes = [self.layer2[-1].conv3.out_channels, # self.layer3[-1].conv3.out_channels, # self.layer4[-1].conv3.out_channels] # self.decoder = UNetDecode(num_classes, hid_channels=fpn_sizes) self.decoder = nn.Sequential( UNetDecode(256, hid_channels=self.fpn_sizes, dropout=decoder_dropout, batch_norm=batch_norm, activation=decoder_activation), UpsampleBlock(in_channels=256, out_channels=1 + num_classes, steps=3, activation=decoder_activation, batch_norm=batch_norm)) self.enc_to_logits = nn.ModuleList( [EncToLogits(n, num_classes + 1) for n in self.fpn_sizes]) #self.fpn = PyramidFeatures(self.fpn_sizes[0], self.fpn_sizes[1], self.fpn_sizes[2]) #self.regressionModel = RegressionModel(256) #self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.fpn = PyramidFeatures(*([num_classes + 1] * 3)) self.regressionModel = RegressionModel( num_classes + 1, batch_norm=batch_norm, activation=decoder_activation, feature_sizes=regr_feature_sizes) self.classificationModel = ClassificationModel( num_classes + 1, num_classes=num_classes, batch_norm=batch_norm, activation=decoder_activation, w_init=0.0, feature_sizes=class_feature_sizes) self.anchors = Anchors(pyramid_levels=self.pyramid_levels, squeeze=squeeze) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.classificationModel.final.weight.data.fill_(0) self.classificationModel.final.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.seq.convblock_final.conv.weight.data.fill_(0) self.regressionModel.seq.convblock_final.conv.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_anchors=9, num_classes=20, compound_coef=0): super(EfficientDet, self).__init__() self.compound_coef = compound_coef self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.compound_coef] self.conv3 = nn.Conv2d(40, self.num_channels, kernel_size=1, stride=1, padding=0) self.conv4 = nn.Conv2d(80, self.num_channels, kernel_size=1, stride=1, padding=0) self.conv5 = nn.Conv2d(192, self.num_channels, kernel_size=1, stride=1, padding=0) self.conv6 = nn.Conv2d(192, self.num_channels, kernel_size=3, stride=2, padding=1) self.conv7 = nn.Sequential( nn.ReLU(), nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1)) self.bifpn = nn.Sequential(*[ BiFPN(self.num_channels) for _ in range(min(2 + self.compound_coef, 8)) ]) self.num_classes = num_classes self.regressor = Regressor(in_channels=self.num_channels, num_anchors=num_anchors, num_layers=3 + self.compound_coef // 3) self.classifier = Classifier(in_channels=self.num_channels, num_anchors=num_anchors, num_classes=num_classes, num_layers=3 + self.compound_coef // 3) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classifier.header.weight.data.fill_(0) self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.regressor.header.weight.data.fill_(0) self.regressor.header.bias.data.fill_(0) self.backbone_net = EfficientNet()
def __init__(self, training, score_threshold): super(BoxSampler, self).__init__() self.training = training self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.score_threshold = score_threshold
def __init__(self, num_classes, block, layers): super(ResNet, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) # self.relu = nn.ReLU(inplace=True) # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], stride=1) self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2) self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2) self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] # if block == BasicBlock: # fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] # elif block == Bottleneck: # fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.siameseNetwork = SiameseNetwork() self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() self.cropBoxes = utils.CropBoxes() # pooler = Pooler( # output_size=(6, 6), # scales=(1.0/8, 1.0/16, 1.0/32,), #1.0/64, 1.0/128), # sampling_ratio=0, # canonical_level=4, # ) # self.pooler = pooler for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.conv5.weight.data.fill_(0) self.classificationModel.conv5.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.conv5.weight.data.fill_(0) self.regressionModel.conv5.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_class, block, layers): super(ResNet, self).__init__() self.in_channels = 64 self.conv1 = nn.Sequential( OrderedDict([('Conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), ('BN', nn.BatchNorm2d(64)), ('Relu', nn.ReLU(inplace=True)), ('Maxpooling', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].channels, self.layer3[layers[2] - 1].channels, self.layer4[layers[3] - 1].channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].channels, self.layer3[layers[2] - 1].channels, self.layer4[layers[3] - 1].channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regression = Regression(256) self.classification = Classification(256, num_classes=num_class) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 # self.classification.output.weight.data.fill_(0) self.classification.output.bias.data.fill_(-torch.log( (torch.tensor(1.0 - prior).float()) / prior)) # self.regression.output.weight.data.fill_(0) self.regression.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, args, image_network, decoder_network=None): super().__init__() self.args = args self.blobs_strategy = self.args.blobs_strategy self.model_type = self.args.finetune_obj.split("_")[0] self.num_classes = 9 self.n_blobs = 3 # print(image_network) self.image_network = image_network # print(self.image_network) self.init_layers = self.image_network[0:4] self.block1 = self.image_network[4] self.block2 = self.image_network[5] self.block3 = self.image_network[6] self.block4 = self.image_network[7] self.decoder_network = decoder_network if "encoder" in self.blobs_strategy: if "resnet18" in self.args.network_base or "resnet34" in self.args.network_base: fpn_sizes = [ self.block2[-1].conv2.out_channels, self.block3[-1].conv2.out_channels, self.block4[-1].conv2.out_channels ] else: fpn_sizes = [ self.block2[-1].conv3.out_channels, self.block3[-1].conv3.out_channels, self.block4[-1].conv3.out_channels ] elif "decoder" in self.blobs_strategy: if "var" in self.model_type: fpn_sizes = [ self.decoder_network[3].conv.out_channels, self.decoder_network[2].conv.out_channels, self.decoder_network[1].conv.out_channels ] else: fpn_sizes = [ self.decoder_network[1].conv.out_channels, self.decoder_network[0].conv.out_channels, self.synthesizer[-1].conv.out_channels ] if "encoder" in self.blobs_strategy and "fused" in self.blobs_strategy: self.fpn = PyramidFeatures(args, fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fusion_strategy="concat_fuse") else: self.fpn = PyramidFeatures(args, fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.dynamic_strategy = ("fused" not in self.blobs_strategy and "encoder" in self.blobs_strategy) # print("dynamic strat", self.dynamic_strategy) self.regressionModel = RegressionModel(256, self.dynamic_strategy) self.classificationModel = ClassificationModel(256, self.dynamic_strategy) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() import losses self.focalLoss = losses.FocalLoss(self.dynamic_strategy) prior = 0.01 for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.params = nn.Sequential( self.fpn, self.regressionModel, self.classificationModel, )
def __init__(self,training): super(BoxSampler, self).__init__() self.training = training self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes()