def __init__(self, anchors=None, o_sz=63, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) self.anchor = Anchors(anchors) self.features = None self.rpn_model = None self.mask_model = None self.o_sz = o_sz self.g_sz = g_sz self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz]) self.all_anchors = None
def __init__(self, num_classes, block, layers): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, backbone, return_layers, anchor_nums=3): super(RetinaFace, self).__init__() # if backbone_name == 'resnet50': # self.backbone = resnet.resnet50(pretrained) # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained) # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} assert backbone, 'Backbone can not be none!' assert len( return_layers) > 0, 'There must be at least one return layers' self.body = _utils.IntermediateLayerGetter(backbone, return_layers) in_channels_stage2 = 256 # in_channels_stage2 = 64 in_channels_list = [ #in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 self.fpn = FeaturePyramidNetwork(in_channels_list, out_channels) # self.ClassHead = ClassHead() # self.BboxHead = BboxHead() # self.LandmarkHead = LandmarkHead() self.ClassHead = self._make_class_head() self.BboxHead = self._make_bbox_head() self.LandmarkHead = self._make_landmark_head() self.anchors = Anchors() self.regressBoxes = RegressionTransform() self.losslayer = losses.LossLayer()
def __init__(self, num_classes, block, layers, groups=1, width_per_group=64, replace_stride_with_dilation=None, dropout1=0.25, dropout2=0.25, alpha=0.25, gamma=2.0, loss_with_no_bboxes=False, no_bboxes_alpha=0.5, no_bboxes_gamma=2.0): #Has been changed to ResNext(customized by Yu Han Huang) self.inplanes = 64 super(ResNet, self).__init__() #add self.dilation, width_per_group, replace_stride_with_dilation (customized by Yu Han Huang) self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) #add dilate=replace_stride_with_dilation (customized by Yu Han Huang) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) #add C2 layer_size to fpn_sizes (customized by Yu Han Huang) if block == BasicBlock: fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] elif block == BasicBlock: fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] #add fpn_sizes[0] into PyramidFeatures (customized by Yu Han Huang) self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes, dropout1=dropout1, dropout2=dropout2) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() #add arguments alpha, gamma loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang) self.focalLoss = losses.FocalLoss(alpha=alpha, gamma=gamma, loss_with_no_bboxes=loss_with_no_bboxes, no_bboxes_alpha=no_bboxes_alpha, no_bboxes_gamma=no_bboxes_gamma) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, pretrained=False, phi=0): self.inplanes = w_bifpn[phi] super(EfficientDet, self).__init__() efficientnet = EfficientNet.from_pretrained(f'efficientnet-b{phi}') blocks = [] count = 0 fpn_sizes = [] for block in efficientnet._blocks: blocks.append(block) if block._depthwise_conv.stride == [2, 2]: count += 1 fpn_sizes.append(block._project_conv.out_channels) if len(fpn_sizes) >= 4: break self.efficientnet = nn.Sequential(efficientnet._conv_stem, efficientnet._bn0, *blocks) num_layers = min(phi + 2, 8) self.fpn = BiFPN(fpn_sizes[1:], feature_size=w_bifpn[phi], num_layers=num_layers) d_class = 3 + (phi // 3) self.regressionModel = RegressionModel(w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class) self.classificationModel = ClassificationModel( w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss().cuda() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, layers, normalization='batch_norm'): super(ResNet, self).__init__() self.inplanes = 64 self.normalization = normalization self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) if normalization == 'batch_norm': self.bn1 = nn.BatchNorm2d(64) else: self.bn1 = nn.GroupNorm(num_groups=8, num_channels=64) # Note: Does not use preloaded imagenet weights, as BatchNorm does self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] elif block == Bottleneck: fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm): m.weight.data.fill_(1) m.bias.data.zero_() # elif : # raise NotImplementedError('Not Implemented: Contact @Vishnu') prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn()
def main(): uuidList = ["e2c56db5dffb48d2b060d0f5a71096e0"] dev_id = 0 #please give a nubmer below 40, since now we are assuming the packets are received roughly at the same time from the same batch of scan. #if the number is high, motion is then significant, rssi values are going to be less accurate. numberOfBeaconsToWait = 2000 commissioningFileName = "/home/pi/rssi_positioning/commissionning.dat" host = '192.168.1.6' port = 5000 reporter = Reporter(host, port) logger = Logger() try: anchors = Anchors(commissioningFileName) #in anchors.py anchors.show_debug() scanner = Scanner(uuidList, dev_id, numberOfBeaconsToWait) i = 0 while True: timestamp, returnedList = scanner.scan() dataPoint = DataPoint(uuidList, anchors, timestamp, returnedList) solver = Solver(dataPoint) edmSolver = EDMSolver(dataPoint) edmSolver.run() logger.logDataPoint(dataPoint) result = (solver.result + edmSolver.result) / 2 # reporter.report(solver.result) # reporter.report(edmSolver.result) reporter.report(result) i += 1 print("Running loop number: ", i) except KeyboardInterrupt: print("\nWarning: keyboard interrupt detected, quitting...") finally: #clean up print("Program done.")
def update(self, newparam=None, anchors=None): if newparam: for key, value in newparam.items(): setattr(self, key, value) if anchors is not None: if isinstance(anchors, dict): anchors = Anchors(anchors) if isinstance(anchors, Anchors): self.total_stride = anchors.stride self.ratios = anchors.ratios self.scales = anchors.scales self.round_dight = anchors.round_dight self.renew()
def __init__(self, num_classes, backbone_network, fpn_sizes): """[summary] Args: num_classes ([int]): [description] backbone_network ([str]): [description] fpn_sizes ([list]): [number of channels in each backbone feature map] """ self.inplanes = 64 super(RetinaNet, self).__init__() # fpn_sizes = [160, 272, 448] # fpn_sizes = [56, 160, 448] # for b4 # fpn_sizes = [160, 272, 448] # for b0 # fpn_sizes = [112,192,1280] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn() self.efficientnet = backbone_network
def generate_anchor(cfg, score_size): anchors = Anchors(cfg) anchor = anchors.anchors x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1) total_stride = anchors.stride anchor_num = anchor.shape[0] anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = - (score_size // 2) * total_stride xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
def __init__(self, config): super(Retina, self).__init__() self.anchors_per_grid_cell = len(config.anchor_ratios) * len( config.anchor_scales) self.classes = config.classes self.num_classes = len(self.classes) + 1 self._backbone = resnet101(pretrained=True) names, layers = zip(*list( self._backbone.named_children())[:-2]) # leave off avgpool and fc self.backbone = [] i = 0 while i < len(names): j = i + 1 while j < len(names) and not (names[j].startswith('layer')): j += 1 self.backbone.append(torch.nn.Sequential(*layers[i:j])) i = j self.conv6 = torch.nn.Conv2d(2048, 256, 3, stride=2, padding=1) self.conv7 = torch.nn.Conv2d(256, 256, 3, stride=2, padding=1) self.conv5 = torch.nn.Conv2d(2048, 256, 3, padding=1) self.conv4 = torch.nn.Conv2d(1024, 256, 1) self.conv3 = torch.nn.Conv2d(512, 256, 1) self.conv2 = torch.nn.Conv2d(256, 256, 1) self.loc = self.mk_subnet(4, include_sigmoid=False) self.conf = self.mk_subnet(self.num_classes, include_sigmoid=False) self.anchors = Anchors(config) self.detect = Detect(self.num_classes, 200, 0.01, 0.45, self.anchors) self.config = config
def __init__(self, num_classes, phi): feature_size = feature_sizes[phi] super(EfficientDet, self).__init__() self.backbone = geffnets[phi](pretrained=True, drop_rate=0.25, drop_connect_rate=0.2) # Get backbone feature sizes. fpn_sizes = [40, 80, 192] self.fpn = [ PyramidFeatures(fpn_sizes, feature_size=feature_size, index=index).cuda() for index in range(min(2 + phi, 8)) ] self.regressionModel = RegressionModel(phi, feature_size=feature_size) self.classificationModel = ClassificationModel( phi, feature_size=feature_size, num_classes=num_classes) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0)
def __init__(self): super(Network, self).__init__() # stage 1 networks self.template_feature_extractor_global = TemplateFeatExtractGlobal() self.image_feature_extractor = ImageFeatExtract() self.template_feature_extractor = TemplateFeatExtract() self.correlation_model = CorrelationModel(640) # detection networks self.anchors = Anchors(pyramid_levels=[4], ratios=[0.5, 1, 2], sizes=[30], scales=[1, 2, 3, 4, 5, 6, 7, 8]) self.classification = ClassificationModel(512, num_anchors=24) self.regression = RegressionModel(512, num_anchors=24) # weight init prior = 0.01 self.classification.output.weight.data.fill_(0) self.classification.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.regression.output.weight.data.fill_(0) self.regression.output.bias.data.fill_(0) self.correlation_model.corr_conv_heatmap.weight.data.fill_(0) self.correlation_model.corr_conv_heatmap.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.correlation_model.seg_final.weight.data.fill_(0) self.correlation_model.seg_final.bias.data.fill_(-math.log( (1.0 - prior) / prior)) # utils self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes()
def __init__(self, cfg, anchor_cfg, num_epoch=1): super(DataSets, self).__init__() global logger logger = logging.getLogger('global') # anchors self.anchors = Anchors(anchor_cfg) # size self.template_size = 127 self.search_size = 255 self.size = 17 self.base_size = 0 self.crop_size = 0 if 'template_size' in cfg: self.template_size = cfg['template_size'] if 'search_size' in cfg: self.search_size = cfg['search_size'] if 'base_size' in cfg: self.base_size = cfg['base_size'] if 'size' in cfg: self.size = cfg['size'] # if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size: # raise Exception("size not match!") # TODO: calculate size online if 'crop_size' in cfg: self.crop_size = cfg['crop_size'] self.template_small = False if 'template_small' in cfg and cfg['template_small']: self.template_small = True self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size) if 'anchor_target' not in cfg: cfg['anchor_target'] = {} self.anchor_target = AnchorTargetLayer(cfg['anchor_target']) # data sets if 'datasets' not in cfg: raise(Exception('DataSet need "{}"'.format('datasets'))) self.all_data = [] start = 0 self.num = 0 for name in cfg['datasets']: dataset = cfg['datasets'][name] dataset['mark'] = name dataset['start'] = start dataset = SubDataSet(dataset) dataset.log() self.all_data.append(dataset) start += dataset.num # real video number self.num += dataset.num_use # the number used for subset shuffle # data augmentation aug_cfg = cfg['augmentation'] self.template_aug = Augmentation(aug_cfg['template']) self.search_aug = Augmentation(aug_cfg['search']) self.gray = aug_cfg['gray'] self.neg = aug_cfg['neg'] self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg'] self.pick = None # list to save id for each img if 'num' in cfg: # number used in training for all dataset self.num = int(cfg['num']) self.num *= num_epoch self.shuffle() self.infos = { 'template': self.template_size, 'search': self.search_size, 'template_small': self.template_small, 'gray': self.gray, 'neg': self.neg, 'inner_neg': self.inner_neg, 'crop_size': self.crop_size, 'anchor_target': self.anchor_target.__dict__, 'num': self.num // num_epoch } logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))
class SiamMask(nn.Module): def __init__(self, anchors=None, o_sz=63, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len( self.anchors["scales"]) self.anchor = Anchors(anchors) self.features = None self.rpn_model = None self.mask_model = None self.o_sz = o_sz self.g_sz = g_sz self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz]) self.all_anchors = None def set_all_anchors(self, image_center, size): # cx,cy,w,h if not self.anchor.generate_all_anchors(image_center, size): return all_anchors = self.anchor.all_anchors[1] # cx, cy, w, h self.all_anchors = torch.from_numpy(all_anchors).float().cuda() self.all_anchors = [self.all_anchors[i] for i in range(4)] def feature_extractor(self, x): return self.features(x) def rpn(self, template, search): pred_cls, pred_loc = self.rpn_model(template, search) return pred_cls, pred_loc def mask(self, template, search): pred_mask = self.mask_model(template, search) return pred_mask def _add_rpn_loss(self, label_cls, label_loc, lable_loc_weight, label_mask, label_mask_weight, rpn_pred_cls, rpn_pred_loc, rpn_pred_mask): rpn_loss_cls = select_cross_entropy_loss(rpn_pred_cls, label_cls) rpn_loss_loc = weight_l1_loss(rpn_pred_loc, label_loc, lable_loc_weight) rpn_loss_mask, iou_m, iou_5, iou_7 = select_mask_logistic_loss( rpn_pred_mask, label_mask, label_mask_weight) return rpn_loss_cls, rpn_loss_loc, rpn_loss_mask, iou_m, iou_5, iou_7 def run(self, template, search, softmax=False): """ run network """ template_feature = self.feature_extractor(template) search_feature = self.feature_extractor(search) rpn_pred_cls, rpn_pred_loc = self.rpn(template_feature, search_feature) rpn_pred_mask = self.mask(template_feature, search_feature) # (b, 63*63, w, h) if softmax: rpn_pred_cls = self.softmax(rpn_pred_cls) return rpn_pred_cls, rpn_pred_loc, rpn_pred_mask, template_feature, search_feature def softmax(self, cls): b, a2, h, w = cls.size() cls = cls.view(b, 2, a2 // 2, h, w) cls = cls.permute(0, 2, 3, 4, 1).contiguous() cls = F.log_softmax(cls, dim=4) return cls def forward(self, input): """ :param input: dict of input with keys of: 'template': [b, 3, h1, w1], input template image. 'search': [b, 3, h2, w2], input search image. 'label_cls':[b, max_num_gts, 5] or None(self.training==False), each gt contains x1,y1,x2,y2,class. :return: dict of loss, predict, accuracy """ template = input['template'] search = input['search'] if self.training: label_cls = input['label_cls'] label_loc = input['label_loc'] lable_loc_weight = input['label_loc_weight'] label_mask = input['label_mask'] label_mask_weight = input['label_mask_weight'] rpn_pred_cls, rpn_pred_loc, rpn_pred_mask, template_feature, search_feature = \ self.run(template, search, softmax=self.training) outputs = dict() outputs['predict'] = [ rpn_pred_loc, rpn_pred_cls, rpn_pred_mask, template_feature, search_feature ] if self.training: rpn_loss_cls, rpn_loss_loc, rpn_loss_mask, iou_acc_mean, iou_acc_5, iou_acc_7 = \ self._add_rpn_loss(label_cls, label_loc, lable_loc_weight, label_mask, label_mask_weight, rpn_pred_cls, rpn_pred_loc, rpn_pred_mask) outputs['losses'] = [rpn_loss_cls, rpn_loss_loc, rpn_loss_mask] outputs['accuracy'] = [iou_acc_mean, iou_acc_5, iou_acc_7] return outputs def template(self, z): self.zf = self.feature_extractor(z) cls_kernel, loc_kernel = self.rpn_model.template(self.zf) return cls_kernel, loc_kernel def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False): xf = self.feature_extractor(x) rpn_pred_cls, rpn_pred_loc = self.rpn_model.track( xf, cls_kernel, loc_kernel) if softmax: rpn_pred_cls = self.softmax(rpn_pred_cls) return rpn_pred_cls, rpn_pred_loc
def __init__(self, num_class, block, layers): super(ResNet, self).__init__() self.in_channels = 64 self.conv1 = nn.Sequential( OrderedDict([('Conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), ('BN', nn.BatchNorm2d(64)), ('Relu', nn.ReLU(inplace=True)), ('Maxpooling', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].channels, self.layer3[layers[2] - 1].channels, self.layer4[layers[3] - 1].channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].channels, self.layer3[layers[2] - 1].channels, self.layer4[layers[3] - 1].channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regression = Regression(256) self.classification = Classification(256, num_classes=num_class) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 # self.classification.output.weight.data.fill_(0) self.classification.output.bias.data.fill_(-torch.log( (torch.tensor(1.0 - prior).float()) / prior)) # self.regression.output.weight.data.fill_(0) self.regression.output.bias.data.fill_(0) self.freeze_bn()
def main(args): use_cuda = args.use_cuda if not os.path.exists(cfgs.model_dir): os.makedirs(cfgs.model_dir) #*******************************************************************************create logg log_dir = args.log_dir if not os.path.exists(log_dir): os.makedirs(log_dir) logger = logging.getLogger() log_name = time.strftime('%F-%T', time.localtime()).replace(':', '-') + '.log' log_path = os.path.join(log_dir, log_name) hdlr = logging.FileHandler(log_path) logger.addHandler(hdlr) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) #*****************************************************************************Create the data loaders dataset_train, dataloader_val = dataset_factory() dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=detection_collate, batch_size=args.batch_size, shuffle=True) # dataloader_val = DataLoader(dataset_val,num_workers=1,batch_size=1) # dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) #*********************************************************************************load model if torch.cuda.is_available() and use_cuda: device = torch.device('cuda') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_list else: device = torch.device('cpu') retinanet = RetinaMask(9, 'train').to(device) if args.model_path: model_weights = torch.load(args.model_path, map_location=device) #model_weights = rename_dict(model_weights) retinanet.load_state_dict(model_weights, strict=True) logger.info("load weightes success: {}".format(args.model_path)) BoxDetector = RetinanetDetector() #****************************************************************** load anchor get_anchors = Anchors() anchors = get_anchors(cfgs.IMGHeight, cfgs.IMGWidth) if use_cuda: anchors = anchors.cuda().float() print("anchors:", anchors.size()) #*******************************************************************creat loss # focalLoss = FocalLoss() criterion = MultiBoxLoss(use_gpu=use_cuda) if len(args.gpu_list.split(',')) > 0: retinanet = torch.nn.DataParallel(retinanet) # retinanet.train() # optimizer = optim.Adam(retinanet.parameters(), lr=args.lr,weight_decay=args.weight_decay) optimizer = optim.SGD(retinanet.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) loss_reg = collections.deque(maxlen=500) loss_cls = collections.deque(maxlen=500) # retinanet.module.freeze_bn() logger.info('Num training images: {}'.format(dataset_train.__len__())) rgb_mean = np.array([0.485, 0.456, 0.406])[np.newaxis, np.newaxis, :].astype('float32') rgb_std = np.array([0.229, 0.224, 0.225])[np.newaxis, np.newaxis, :].astype('float32') step = 0 tmp_max = 0.0 for epoch_num in range(args.start_iter, args.epochs): retinanet.train() #retinanet.module.freeze_bn() lr = poly_lr_scheduler(optimizer, args.lr, epoch_num, max_iter=args.epochs, power=0.8) for idx, (img_batch, gt_batch) in enumerate(dataloader_train): save_fg = 0 step += 1 if use_cuda: img_batch = img_batch.cuda() ''' images = img_batch.numpy() targets = gt_batch priors = anchors conf_t = test_anchor(targets,priors) for i in range(args.batch_size): tmp_img = np.transpose(images[i],(1,2,0)) # tmp_img = tmp_img + rgb_mean # tmp_img = tmp_img * 255 tmp_img *= rgb_std tmp_img += rgb_mean tmp_img *=255 tmp_img = np.array(tmp_img,dtype=np.uint8) tmp_img = cv2.cvtColor(tmp_img,cv2.COLOR_RGB2BGR) h,w = tmp_img.shape[:2] if len(targets[i])>0: gt = targets[i] for j in range(gt.shape[0]): x1,y1 = int(gt[j,0]),int(gt[j,1]) x2,y2 = int(gt[j,2]),int(gt[j,3]) # print('pred',x1,y1,x2,y2,gt[j,4],w,h) if x2 >x1 and y2 >y1: cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(0,0,255)) for j in range(priors.size(0)): if conf_t[i,j] >0: box = priors[j].cpu().numpy() # print(box) x1,y1 = box[0],box[1] x2,y2 = box[2],box[3] x1,y1 = int(x1),int(y1) x2,y2 = int(x2),int(y2) cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(255,0,0)) cv2.imwrite('train_match4.jpg',tmp_img) cv2.imshow('src',tmp_img) cv2.waitKey(0) ''' classification, regression, _ = retinanet(img_batch) #print("begin to cal loss") classification_loss, regression_loss = criterion( [classification, regression, anchors], gt_batch) # classification_loss = classification_loss.mean() # regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue optimizer.zero_grad() loss.backward() #torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss.item())) loss_cls.append(float(classification_loss.item())) loss_reg.append(float(regression_loss.item())) if step % 500 == 0: logger.info( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | cls_mean:{:.6f} | reg_mean:{:.6f} | lr: {:.6f}' .format(epoch_num, step, classification_loss.item(), regression_loss.item(), np.mean(loss_hist), np.mean(loss_cls), np.mean(loss_reg), lr)) if step % 3000 == 0: # mmap = test_net(retinanet,BoxDetector,anchors,dataloader_val,use_cuda,'train',args) save_fg = 1 # if mmap > tmp_max: # tmp_max = mmap # save_fg = 1 if save_fg: sfile = sfile = 'retina_' + args.dataset + '_best.pth' spath = os.path.join(cfgs.model_dir, sfile) if len(args.gpu_list.split(',')) > 0: torch.save(retinanet.module.state_dict(), spath) else: torch.save(retinanet.state_dict(), spath) logger.info("*****************save weightes******,%d" % step)
def __init__(self, num_classes, block, layers): super(ResNet, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) # self.relu = nn.ReLU(inplace=True) # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], stride=1) self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2) self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2) self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] # if block == BasicBlock: # fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] # elif block == Bottleneck: # fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.siameseNetwork = SiameseNetwork() self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() self.cropBoxes = utils.CropBoxes() # pooler = Pooler( # output_size=(6, 6), # scales=(1.0/8, 1.0/16, 1.0/32,), #1.0/64, 1.0/128), # sampling_ratio=0, # canonical_level=4, # ) # self.pooler = pooler for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.conv5.weight.data.fill_(0) self.classificationModel.conv5.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.conv5.weight.data.fill_(0) self.regressionModel.conv5.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, layers, num_anchors=3): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [ self.layer1[layers[0] - 1].conv2.out_channels, self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer1[layers[0] - 1].conv3.out_channels, self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3]) self.context = self._make_contextlayer() self.clsHead = ClassHead_() self.bboxHead = BboxHead_() self.ldmHead = LandmarkHead_() # self.clsHead = self._make_class_head() # self.bboxHead = self._make_bbox_head() # self.ldmHead = self._make_landmark_head() self.anchors = Anchors() self.regressBoxes = RegressionTransform() self.losslayer = losses.LossLayer() self.freeze_bn() # initialize head # self.clsHead.apply(initialize_layer) # self.bboxHead.apply(initialize_layer) # self.ldmHead.apply(initialize_layer) # initialize context for layer in self.context: for m in layer.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
host = '192.168.1.6' port = 5000 serverAddress = (host, port) print("Starting data socket server on %s:%s" % serverAddress) dataSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) dataSocket.bind(serverAddress) dataSocket.listen( 1) #listen to a maximum number of queued connections of 1 print("Data socket server on %s:%s is listening now." % serverAddress) connection, clientAddress = dataSocket.accept() print("Connection established with client with IP: %s:%s" % clientAddress) except: pass commissioningFileName = "commissionning.dat" anchors = Anchors(commissioningFileName) p = figure(plot_width=800, plot_height=800) r_anchors = p.scatter([x * 0.6096 for x in anchors.listOfX], [y * 0.6096 for y in anchors.listOfY], size=10, color="black", alpha=0.6) p.xaxis.axis_label = "X(meter)" p.yaxis.axis_label = "Y(meter)" r = p.scatter([], [], size=6, color="firebrick", alpha=0.6) ds = r.data_source curdoc().add_root(p) curdoc().title = "Visualization of positioning results" # Add a periodic callback to be run every 500 milliseconds
import os import json import cv2 from dataloader import * from PIL import Image os.environ["CUDA_VISIBLE_DEVICES"] = "7" dataset = CocoDataset('../dataset', set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) device = torch.device('cuda:0') anchors = Anchors() fpn = torch.load("fpn.pt") net = torch.load("fsaf.pt") fpn = fpn.to(device) net = net.to(device) fpn.eval() net.eval() def normalizer(image): mean = np.array([[[0.485, 0.456, 0.406]]]) std = np.array([[[0.229, 0.224, 0.225]]]) return (image - mean) / std
j = np.where(fppi_tmp <= ref_i)[-1][-1] ref[i] = mr_tmp[j] # log(0) is undefined, so we use the np.maximum(1e-10, ref) lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref)))) return lamr, mr, fppi if __name__ == '__main__': # load net args = params() if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) use_cuda = torch.cuda.is_available() if use_cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') device = torch.device('cuda') else: torch.set_default_tensor_type('torch.FloatTensor') device = torch.device('cpu') net = RetinaMask(cfgs.CLSNUM, 'test').to(device) net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.eval() detector = RetinanetDetector() print('finish loading model') dataset = ReadDataset(args.voc_root, args.val_file) get_anchors = Anchors() priors = get_anchors(640, 640) test_net(net, detector, priors, dataset, use_cuda, 'test', args)
class DataSets(Dataset): def __init__(self, cfg, anchor_cfg, num_epoch=1): super(DataSets, self).__init__() global logger logger = logging.getLogger('global') # anchors self.anchors = Anchors(anchor_cfg) # size self.template_size = 127 self.search_size = 255 self.size = 17 self.base_size = 0 self.crop_size = 0 if 'template_size' in cfg: self.template_size = cfg['template_size'] if 'search_size' in cfg: self.search_size = cfg['search_size'] if 'base_size' in cfg: self.base_size = cfg['base_size'] if 'size' in cfg: self.size = cfg['size'] # if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size: # raise Exception("size not match!") # TODO: calculate size online if 'crop_size' in cfg: self.crop_size = cfg['crop_size'] self.template_small = False if 'template_small' in cfg and cfg['template_small']: self.template_small = True self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size) if 'anchor_target' not in cfg: cfg['anchor_target'] = {} self.anchor_target = AnchorTargetLayer(cfg['anchor_target']) # data sets if 'datasets' not in cfg: raise(Exception('DataSet need "{}"'.format('datasets'))) self.all_data = [] start = 0 self.num = 0 for name in cfg['datasets']: dataset = cfg['datasets'][name] dataset['mark'] = name dataset['start'] = start dataset = SubDataSet(dataset) dataset.log() self.all_data.append(dataset) start += dataset.num # real video number self.num += dataset.num_use # the number used for subset shuffle # data augmentation aug_cfg = cfg['augmentation'] self.template_aug = Augmentation(aug_cfg['template']) self.search_aug = Augmentation(aug_cfg['search']) self.gray = aug_cfg['gray'] self.neg = aug_cfg['neg'] self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg'] self.pick = None # list to save id for each img if 'num' in cfg: # number used in training for all dataset self.num = int(cfg['num']) self.num *= num_epoch self.shuffle() self.infos = { 'template': self.template_size, 'search': self.search_size, 'template_small': self.template_small, 'gray': self.gray, 'neg': self.neg, 'inner_neg': self.inner_neg, 'crop_size': self.crop_size, 'anchor_target': self.anchor_target.__dict__, 'num': self.num // num_epoch } logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4))) def imread(self, path): img = cv2.imread(path) return img, 1.0 def shuffle(self): pick = [] m = 0 while m < self.num: p = [] for subset in self.all_data: sub_p = subset.shuffle() p += sub_p sample_random.shuffle(p) pick += p m = len(pick) self.pick = pick logger.info("shuffle done!") logger.info("dataset length {}".format(self.num)) def __len__(self): return self.num def find_dataset(self, index): for dataset in self.all_data: if dataset.start + dataset.num > index: return dataset, index - dataset.start def __getitem__(self, index, debug=False): index = self.pick[index] dataset, index = self.find_dataset(index) gray = self.gray and self.gray > random.random() neg = self.neg and self.neg > random.random() if neg: template = dataset.get_random_target(index) if self.inner_neg and self.inner_neg > random.random(): search = dataset.get_random_target() else: search = random.choice(self.all_data).get_random_target() else: template, search = dataset.get_positive_pair(index) def center_crop(img, size): shape = img.shape[1] if shape == size: return img c = shape // 2 l = c - size // 2 r = c + size // 2 + 1 return img[l:r, l:r] template_image, scale_z = self.imread(template[0]) if self.template_small: template_image = center_crop(template_image, self.template_size) search_image, scale_x = self.imread(search[0]) if dataset.has_mask and not neg: # print(search[2]) search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32) else: search_mask = np.zeros(search_image.shape[:2], dtype=np.float32) if self.crop_size > 0: search_image = center_crop(search_image, self.crop_size) search_mask = center_crop(search_mask, self.crop_size) def toBBox(image, shape): imh, imw = image.shape[:2] if len(shape) == 4: w, h = shape[2]-shape[0], shape[3]-shape[1] else: w, h = shape context_amount = 0.5 exemplar_size = self.template_size # 127 wc_z = w + context_amount * (w+h) hc_z = h + context_amount * (w+h) s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z w = w*scale_z h = h*scale_z cx, cy = imw//2, imh//2 bbox = center2corner(Center(cx, cy, w, h)) return bbox template_box = toBBox(template_image, template[1]) search_box = toBBox(search_image, search[1]) template, _, _ = self.template_aug(template_image, template_box, self.template_size, gray=gray) search, bbox, mask = self.search_aug(search_image, search_box, self.search_size, gray=gray, mask=search_mask) def draw(image, box, name): image = image.copy() x1, y1, x2, y2 = map(lambda x: int(round(x)), box) cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0)) cv2.imwrite(name, image) if debug: draw(template_image, template_box, "debug/{:06d}_ot.jpg".format(index)) draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index)) draw(template, _, "debug/{:06d}_t.jpg".format(index)) draw(search, bbox, "debug/{:06d}_s.jpg".format(index)) cls, delta, delta_weight = self.anchor_target(self.anchors, bbox, self.size, neg) if dataset.has_mask and not neg: mask_weight = cls.max(axis=0, keepdims=True) else: mask_weight = np.zeros([1, cls.shape[1], cls.shape[2]], dtype=np.float32) template, search = map(lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32), [template, search]) mask = (np.expand_dims(mask, axis=0) > 0.5) * 2 - 1 # 1*H*W return template, search, cls, delta, delta_weight, np.array(bbox, np.float32), \ np.array(mask, np.float32), np.array(mask_weight, np.float32)
def __init__( self, num_classes, block=Bottleneck, layers=[3, 4, 6, 3], prior=0.01, no_rpn=False, no_semantic=False, bypass_semantic=False, squeeze=True, decoder_dropout=None, decoder_activation=nn.ReLU(), encoder_activation=nn.ReLU(inplace=True), batch_norm=False, regr_feature_sizes=[256] * 3, class_feature_sizes=[256] * 3, ): super(RetinaNet, self).__init__() self.bypass_semantic = bypass_semantic self.squeeze = squeeze self.pyramid_levels = [3, 4, 5] self.no_rpn = no_rpn self.no_semantic = no_semantic self.encoder = ResNet(block=block, layers=layers, activation=encoder_activation) self.fpn_sizes = [ self.get_out_channels(getattr(self.encoder, "layer%d" % nn)) for nn in [2, 3, 4] ] #self.fpn_sizes.append([sz[-1]//2 for sz in self.fpn_sizes[-1]]) print("fpn_sizes") print(*self.fpn_sizes, sep='\t') # if block == BasicBlock: # fpn_sizes = [self.layer2[-1].conv2.out_channels, # self.layer3[-1].conv2.out_channels, # self.layer4[-1].conv2.out_channels] # print # elif block == Bottleneck: # fpn_sizes = [self.layer2[-1].conv3.out_channels, # self.layer3[-1].conv3.out_channels, # self.layer4[-1].conv3.out_channels] # self.decoder = UNetDecode(num_classes, hid_channels=fpn_sizes) self.decoder = nn.Sequential( UNetDecode(256, hid_channels=self.fpn_sizes, dropout=decoder_dropout, batch_norm=batch_norm, activation=decoder_activation), UpsampleBlock(in_channels=256, out_channels=1 + num_classes, steps=3, activation=decoder_activation, batch_norm=batch_norm)) self.enc_to_logits = nn.ModuleList( [EncToLogits(n, num_classes + 1) for n in self.fpn_sizes]) #self.fpn = PyramidFeatures(self.fpn_sizes[0], self.fpn_sizes[1], self.fpn_sizes[2]) #self.regressionModel = RegressionModel(256) #self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.fpn = PyramidFeatures(*([num_classes + 1] * 3)) self.regressionModel = RegressionModel( num_classes + 1, batch_norm=batch_norm, activation=decoder_activation, feature_sizes=regr_feature_sizes) self.classificationModel = ClassificationModel( num_classes + 1, num_classes=num_classes, batch_norm=batch_norm, activation=decoder_activation, w_init=0.0, feature_sizes=class_feature_sizes) self.anchors = Anchors(pyramid_levels=self.pyramid_levels, squeeze=squeeze) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.classificationModel.final.weight.data.fill_(0) self.classificationModel.final.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.seq.convblock_final.conv.weight.data.fill_(0) self.regressionModel.seq.convblock_final.conv.bias.data.fill_(0) self.freeze_bn()
def __init__(self, num_classes, block, layers, max_boxes, score_threshold, seg_level, alphabet, train_htr, htr_gt_box, ner_branch=False, binary_classifier=True): self.inplanes = 64 self.pool_h = 2 self.pool_w = 400 self.forward_transcription = False self.max_boxes = max_boxes super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.downsampling_factors = [8, 16, 32, 64, 128] self.epochs_only_det = 1 self.score_threshold = score_threshold self.alphabet = alphabet self.train_htr = train_htr self.binary_classifier = binary_classifier self.htr_gt_box = htr_gt_box self.num_classes = num_classes self.ner_branch = ner_branch if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.anchors = Anchors(seg_level=seg_level) self.regressionModel = RegressionModel( num_features_in=256, num_anchors=self.anchors.num_anchors) self.recognitionModel = RecognitionModel(feature_size=256, pool_h=self.pool_h, alphabet_len=len(alphabet)) if ner_branch: self.nerModel = NERModel(feature_size=256, pool_h=self.pool_h, n_classes=num_classes, pool_w=self.pool_w) self.classificationModel = ClassificationModel( num_features_in=256, num_anchors=self.anchors.num_anchors, num_classes=num_classes) self.boxSampler = BoxSampler('train', self.score_threshold) self.sorter = RoISorter() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() if ner_branch: self.nerLoss = losses.NERLoss() self.transcriptionLoss = losses.TranscriptionLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.recognitionModel.output.weight.data.fill_(0) self.recognitionModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) if ner_branch: self.nerModel.output.weight.data.fill_(0) self.nerModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.freeze_bn()
def laod_anchor(self): get_anchor = Anchors() img_batch = torch.ones((1, 3, self.img_size, self.img_size)) self.anchors = get_anchor(img_batch) if torch.cuda.is_available(): self.anchors = self.anchors.cuda()
# encoding:utf-8 ''' 测试评价 ''' import numpy as np import xml.etree.ElementTree as ET import os import json import cv2 testAnchor = False if testAnchor: from anchors import Anchors anchorsC = Anchors() anchors = anchorsC.get_anchors(fmSizes=[(16, 16), (8, 8)], fmBased=True, imgSize=1) def parsingR(fileName): tmpDict = {} # tmp = [] tmpTime = [] with open(fileName, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: line = line.replace('\n', '') line = line.replace('.jpg', '') items = line.split('\t')
def laod_anchor(self): get_anchor = Anchors() # img_batch = torch.ones((1,3,self.img_size,self.img_size)) self.anchors = get_anchor(self.imgh,self.imgw) if self.use_gpu: self.anchors = self.anchors.cuda()
net.load_state_dict(torch.load(args.trained_model)) else: state_dict = torch.load(args.trained_model, map_location='cpu') #state_dict_new = dict() #for key,value in list(state_dict.items()): # state_dict_new[key[7:]] = value net.load_state_dict(state_dict) net.eval() print('Finished loading model!') # load data #dataset = VOCDetection(args.voc_root, [('2007', set_type)], # BaseTransform(int(args.input_size), dataset_mean), # VOCAnnotationTransform()) dataset = TestDataset() if args.cuda: net = net.cuda() cudnn.benchmark = True get_anchor = Anchors() img_batch = torch.ones((1, 3, cfgs.ImgSize, cfgs.ImgSize)) anchors = get_anchor([640, 640]) print('anchor', anchors.data.size()) # evaluation test_net(args.save_folder, net, Detector, args.cuda, dataset, args.top_k, anchors, thresh=args.confidence_threshold)