def detect_image(self, image): # 绘制人脸框 old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) # 它的作用是将归一化后的框坐标转换成原图的大小 scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) # pytorch image = preprocess_input(image).transpose(2, 0, 1) # 增加batch_size维度 image = torch.from_numpy(image).unsqueeze(0) # 计算先验框 anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): if self.cuda: scale = scale.cuda() scale_for_landmarks = scale_for_landmarks.cuda() image = image.cuda() anchors = anchors.cuda() loc, conf, landms = self.net(image) # forward pass boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance']) landms = landms * scale_for_landmarks landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes,conf,landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) pnum = len(boxes_conf_landms) return old_image , pnum
def get_FPS(self, image, test_interval): image = np.array(image,np.float32) im_height, im_width, _ = np.shape(image) scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] if self.letterbox_image: image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t1 = time.time() for _ in range(test_interval): with torch.no_grad(): loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.generate() if self.letterbox_image: self.anchors = Anchors(self.cfg, image_size=[self.input_shape[0], self.input_shape[1]]).get_anchors()
def __init__(self, anchors=None): super(SiamRPN, self).__init__() self.anchors = anchors # anchor_cfg self.anchor = Anchors(anchors) self.anchor_num = self.anchor.anchor_num self.features = None self.rpn_model = None self.all_anchors = None
def __init__(self, anchors=None, o_sz=127, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) self.anchor = Anchors(anchors) self.features = None self.rpn_model = None self.mask_model = None self.o_sz = o_sz self.g_sz = g_sz self.all_anchors = None
def __init__(self, anchors=None, o_sz=63, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg anchors中的配置信息 self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) # anchor的数目 self.anchor = Anchors(anchors) # anchor self.features = None # 特征提取网络模型 self.rpn_model = None # rpn网络模型 self.mask_model = None # 图像分割的网络模型 self.o_sz = o_sz # 输入尺寸 self.g_sz = g_sz # 输出尺寸 self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz]) # 2d数据的双线性插值 self.all_anchors = None
def __init__(self, anchors=None, o_sz=63, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) self.anchor = Anchors(anchors) self.features = None self.rpn_model = None self.mask_model = None self.o_sz = o_sz self.g_sz = g_sz self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz]) self.all_anchors = None
def generate_anchor(cfg, score_size): """ 生成锚点:anchor :param cfg: anchor的配置信息 :param score_size:分类的评分结果 :return:生成的anchor """ # 初始化anchor anchors = Anchors(cfg) # 得到生成的anchors anchor = anchors.anchors # 得到每一个anchor的左上角和右下角坐标 x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] # 将anchor转换为中心点坐标和宽高的形式 anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) # 获取生成anchor的范围 total_stride = anchors.stride # 获取锚点的个数 anchor_num = anchor.shape[0] # 将对锚点组进行广播,并设置其坐标。 anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) # 加上ori偏移后,xx和yy以图像中心为原点 ori = -(score_size // 2) * total_stride xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() # 获取anchor anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
def generate_anchor(cfg, score_size): #默认是25 #cfg是test.py中model.anchors字典 anchors = Anchors( cfg) #实例化Anchors会自动调用函数Anchors类里面的函数generate_anchors()来生成self.anchors anchor = anchors.anchors #anchor =(anchor_num,4) (x上,y上,x下,y下) 对应(x1, y1, x2, y2) x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) #(anchor_num,4) (cx,cy,w,h) total_stride = anchors.stride #8 anchor_num = anchor.shape[0] #按原来的方式广播得到所有的锚点。复制锚点,然后添加不同位置的偏移量。 anchor = np.tile(anchor, score_size * score_size).reshape( (-1, 4)) #anchor横向扩大score_size*score_size倍 #anchor.shape = (anchor_num*score_size*score_size,4),每一个anchor有shape为(score_size*score_size,4)的数据, #一共anchor_num个anchor数据竖着摞在一起, ori = -(score_size // 2) * total_stride #ori = -96 #0-24 xx, yy = np.meshgrid( [ori + total_stride * dx for dx in range(score_size)], #[-96,-88,80,....,88,96] [ori + total_stride * dy for dy in range(score_size)]) #生成网格点坐标矩阵。 #xx.shape = (score_size,score_size),yy.shape = (score_size,score_size)其实就是(25,25)、 #把xx展平,然后横向扩大anchor_num倍,yy亦如此 #此操作之后,xx.shape = (anchor_num*score_size*score_size,),yy.shape = (anchor_num*score_size*score_size,) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() #np.tile,对数组进行重复操作 anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor #(anchor_num*score_size*score_size,4)
def __init__(self, model, device): super(DetectionWrapper, self).__init__() self.model = model self.device = device self.anchors = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL, cfg.NUM_SCALES, cfg.ASPECT_RATIOS, cfg.ANCHOR_SCALE, cfg.MODEL.IMAGE_SIZE) self._anchor_cache = None
def detect_image(self, image): old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] # 图片预处理,归一化 photo = np.expand_dims(preprocess_input(image), 0) anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() preds = self.retinaface.predict(photo) # 将预测结果进行解码和非极大抑制 results = self.bbox_util.detection_out( preds, anchors, confidence_threshold=self.confidence) if len(results) <= 0: return old_image results = np.array(results) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image
def __init__(self, num_classes, block, layers): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if block == BasicBlock: fpn_sizes = [self.layer1[layers[0] - 1].conv2.out_channels, self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels] elif block == Bottleneck: fpn_sizes = [self.layer1[layers[0] - 1].conv3.out_channels, self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels] else: raise ValueError(f"Block type {block} not understood") self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.contextModel = LevelAttentionModel(256) self.anchors = Anchors() self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.contextLoss = losses.Con() self.focalLoss = losses.FocalLoss() for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) # init.xavier_normal(m.weight) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.levelattentionModel.conv5.weight.data.fill_(0) self.levelattentionModel.conv5.bias.data.fill_(0) self.freeze_bn()
def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.generate() self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors()
def __init__(self, model, device): super(DetectionEvalWrapper, self).__init__() self.model = model self.device = device self.anchor_boxes = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL, cfg.NUM_SCALES, cfg.ASPECT_RATIOS, cfg.ANCHOR_SCALE, cfg.MODEL.IMAGE_SIZE, device).boxes self.model.eval()
def __init__(self, model, device, criterion): super(DetectionTrainWrapper, self).__init__() self.model = model self.device = device anchors = Anchors(cfg.MIN_LEVEL, cfg.MAX_LEVEL, cfg.NUM_SCALES, cfg.ASPECT_RATIOS, cfg.ANCHOR_SCALE, cfg.MODEL.IMAGE_SIZE, device) self.anchor_labeler = AnchorsLabeler(anchors, cfg.NUM_CLASSES) self.criterion = criterion self.model.train()
def update(self, newparam=None, anchors=None): if newparam: for key, value in newparam.items(): setattr(self, key, value) if anchors is not None: if isinstance(anchors, dict): anchors = Anchors(anchors) if isinstance(anchors, Anchors): self.total_stride = anchors.stride self.ratios = anchors.ratios self.scales = anchors.scales self.round_dight = anchors.round_dight self.renew()
def generate_anchor(cfg, score_size): # cfg = {'stride': 8, 'ratios': [0.33, 0.5, 1, 2, 3], 'scales': [8], 'round_dight': 0} # score_size = 25 anchors = Anchors(cfg) anchor = anchors.anchors # (Pdb) anchor == anchors.anchors # array([[-52., -16., 52., 16.], # [-44., -20., 44., 20.], # [-32., -32., 32., 32.], # [-20., -40., 20., 40.], # [-16., -48., 16., 48.]], dtype=float32) # (Pdb) anchors.anchors.shape -- (5, 4) x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) total_stride = anchors.stride # total_stride == 8 anchor_num = anchor.shape[0] # anchor_num -- 5 anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = -(score_size // 2) * total_stride # (Pdb) ori == -96 xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() # (Pdb) xx -- array([-96, -88, -80, ..., 80, 88, 96]) # (Pdb) xx.shape -- (3125,) # (Pdb) yy -- array([-96, -96, -96, ..., 96, 96, 96]) # (Pdb) yy.shape -- (3125,) anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) # (Pdb) anchor # array([[-96., -96., 104., 32.], # [-88., -96., 104., 32.], # [-80., -96., 104., 32.], # ..., # [ 80., 96., 32., 96.], # [ 88., 96., 32., 96.], # [ 96., 96., 32., 96.]], dtype=float32) # (Pdb) anchor.shape # (3125, 4) return anchor
def get_FPS(self, image, test_interval): #---------------------------------------------------# # 把图像转换成numpy的形式 #---------------------------------------------------# image = np.array(image, np.float32) #---------------------------------------------------# # 计算输入图片的高和宽 #---------------------------------------------------# im_height, im_width, _ = np.shape(image) #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() #---------------------------------------------------------# # 图片预处理,归一化。 #---------------------------------------------------------# photo = np.expand_dims(preprocess_input(image), 0) #---------------------------------------------------------# # 传入网络进行预测 #---------------------------------------------------------# preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] #---------------------------------------------------------# # 将预测结果进行解码 #---------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------------# # 传入网络进行预测 #---------------------------------------------------------# preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] #---------------------------------------------------------# # 将预测结果进行解码 #---------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): self.confidence = 0.02 image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = np.expand_dims(preprocess_input(image),0) preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results)<=0: return np.array([]) results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:,:4] = results[:,:4]*scale results[:,5:] = results[:,5:]*scale_for_landmarks return results
def get_FPS(self, image, test_interval): image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() photo = np.expand_dims(preprocess_input(image),0) preds = self.retinaface.predict(photo) results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence) if len(results)>0: results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:,:4] = results[:,:4]*scale results[:,5:] = results[:,5:]*scale_for_landmarks t1 = time.time() for _ in range(test_interval): preds = self.retinaface.predict(photo) results = self.bbox_util.detection_out(preds, self.anchors, confidence_threshold=self.confidence) if len(results)>0: results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes(results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:,:4] = results[:,:4]*scale results[:,5:] = results[:,5:]*scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def generate_anchor(cfg, score_size): anchors = Anchors(cfg) anchor = anchors.anchors x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) total_stride = anchors.stride anchor_num = anchor.shape[0] anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = -(score_size // 2) * total_stride xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
def __init__(self, num_classes, phi, pretrain_weights=False): super(Retinanet, self).__init__() self.pretrain_weights = pretrain_weights self.backbone_net = Resnet(phi,pretrain_weights) fpn_sizes = { 0: [128, 256, 512], 1: [128, 256, 512], 2: [512, 1024, 2048], 3: [512, 1024, 2048], 4: [512, 1024, 2048], }[phi] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self._init_weights()
def __init__(self, num_classes=80, phi=0, load_weights=False): super(EfficientDetBackbone, self).__init__() # phi指的是efficientdet的版本 self.phi = phi # backbone_phi指的是该efficientdet对应的efficient self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 6] # BiFPN所用的通道数 self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384] # BiFPN的重复次数 self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8] # 分类头的卷积重复次数 self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5] # 基础的先验框大小 self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.] num_anchors = 9 conv_channel_coef = { 0: [40, 112, 320], 1: [40, 112, 320], 2: [48, 120, 352], 3: [48, 136, 384], 4: [56, 160, 448], 5: [64, 176, 512], 6: [72, 200, 576], 7: [72, 200, 576], } self.bifpn = nn.Sequential(*[ BiFPN(self.fpn_num_filters[self.phi], conv_channel_coef[phi], True if _ == 0 else False, attention=True if phi < 6 else False) for _ in range(self.fpn_cell_repeats[phi]) ]) self.num_classes = num_classes self.regressor = BoxNet(in_channels=self.fpn_num_filters[self.phi], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.phi]) self.classifier = ClassNet(in_channels=self.fpn_num_filters[self.phi], num_anchors=num_anchors, num_classes=num_classes, num_layers=self.box_class_repeats[self.phi]) self.anchors = Anchors(anchor_scale=self.anchor_scale[phi]) self.backbone_net = EfficientNet(self.backbone_phi[phi], load_weights)
def __init__(self, num_classes=80, phi=0, load_weight=False): super(EfficientDet_BackBone, self).__init__() self.phi = phi self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 6] # self.backbone_phi = [0, 1, 2, 3, 4, 5, 6, 7] self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384] self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8] self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5] self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.] num_anchors = 9 # 在著网络输出时,p3-p5的通道数目 conv_channel_coef = { 0: [40, 112, 320], 1: [40, 112, 320], 2: [48, 120, 352], 3: [48, 136, 384], 4: [56, 160, 448], 5: [64, 176, 512], 6: [72, 200, 576], 7: [72, 200, 576], } self.bifpn = nn.Sequential(*[ BiFPN(self.fpn_num_filters[self.phi], conv_channel_coef[phi], True if _ == 0 else False, attention=True if phi < 6 else False) for _ in range(self.fpn_cell_repeats[phi]) ]) self.num_classes = num_classes self.regressor = Box_Block(in_channels=self.fpn_num_filters[self.phi], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.phi]) self.classifier = Class_Block( in_channels=self.fpn_num_filters[self.phi], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.phi], num_classes=num_classes) self.anchors = Anchors(anchor_scale=self.anchor_scale[phi]) self.bockbone_net = EfficientNet(self.backbone_phi[phi], load_weight=load_weight)
def __init__(self, pretrain=False, anchors=None, o_sz=127, g_sz=127): super(Custom, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len( self.anchors["scales"]) self.anchor = Anchors(anchors) self.o_sz = o_sz self.g_sz = g_sz self.upSample = nn.Upsample(size=[g_sz, g_sz], mode='bilinear', align_corners=True) self.features = ResDown(pretrain=pretrain) self.rpn_model = UP(anchor_num=self.anchor_num, feature_in=256, feature_out=256) self.mask_model = MaskCorr() self.refine_model = Refine() self.all_anchors = None
class SiamMask(nn.Module): def __init__(self, anchors=None, o_sz=127, g_sz=127): super(SiamMask, self).__init__() self.anchors = anchors # anchor_cfg self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) self.anchor = Anchors(anchors) self.features = None self.rpn_model = None self.mask_model = None self.o_sz = o_sz self.g_sz = g_sz self.all_anchors = None def set_all_anchors(self, image_center, size): # cx,cy,w,h if not self.anchor.generate_all_anchors(image_center, size): return all_anchors = self.anchor.all_anchors[1] # cx, cy, w, h self.all_anchors = torch.from_numpy(all_anchors).float().cuda() self.all_anchors = [self.all_anchors[i] for i in range(4)] def feature_extractor(self, x): return self.features(x) def rpn(self, template, search): pred_cls, pred_loc = self.rpn_model(template, search) return pred_cls, pred_loc def mask(self, template, search): pred_mask = self.mask_model(template, search) return pred_mask def template(self, z): self.zf = self.feature_extractor(z) cls_kernel, loc_kernel = self.rpn_model.template(self.zf) return cls_kernel, loc_kernel def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False): xf = self.feature_extractor(x) rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(xf, cls_kernel, loc_kernel) if softmax: rpn_pred_cls = self.softmax(rpn_pred_cls) return rpn_pred_cls, rpn_pred_loc
def __init__(self, **kwargs): self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) #---------------------------------------------------# # 不同主干网络的config信息 #---------------------------------------------------# if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 #---------------------------------------------------# # 工具箱和先验框的生成 #---------------------------------------------------# self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors() self.generate()
def __init__(self, num_classes, phi, pretrained=False): super(retinanet, self).__init__() self.pretrained = pretrained #-----------------------------------------# # 取出三个有效特征层,分别是C3、C4、C5 # 假设输入图像为600,600,3 # 当我们使用resnet50的时候 # C3 75,75,512 # C4 38,38,1024 # C5 19,19,2048 #-----------------------------------------# self.backbone_net = Resnet(phi, pretrained) fpn_sizes = { 0: [128, 256, 512], 1: [128, 256, 512], 2: [512, 1024, 2048], 3: [512, 1024, 2048], 4: [512, 1024, 2048], }[phi] #-----------------------------------------# # 经过FPN可以获得5个有效特征层分别是 # P3 75,75,256 # P4 38,38,256 # P5 19,19,256 # P6 10,10,256 # P7 5,5,256 #-----------------------------------------# self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) #----------------------------------------------------------# # 将获取到的P3, P4, P5, P6, P7传入到 # Retinahead里面进行预测,获得回归预测结果和分类预测结果 # 将所有特征层的预测结果进行堆叠 #----------------------------------------------------------# self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) self.anchors = Anchors() self._init_weights()
def __init__(self, cfg, anchor_cfg, num_epoch=1): super(DataSets, self).__init__() global logger logger = logging.getLogger('global') # anchors self.anchors = Anchors(anchor_cfg) # size self.template_size = 127 self.origin_size = 127 self.search_size = 255 self.size = 17 self.base_size = 0 self.crop_size = 0 # 根据配置文件更新参数 if 'template_size' in cfg: self.template_size = cfg['template_size'] if 'origin_size' in cfg: self.origin_size = cfg['origin_size'] if 'search_size' in cfg: self.search_size = cfg['search_size'] if 'base_size' in cfg: self.base_size = cfg['base_size'] if 'size' in cfg: self.size = cfg['size'] if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size: raise Exception("size not match!") # TODO: calculate size online if 'crop_size' in cfg: self.crop_size = cfg['crop_size'] self.template_small = False if 'template_small' in cfg and cfg['template_small']: self.template_small = True # 生成anchor self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size) if 'anchor_target' not in cfg: cfg['anchor_target'] = {} # 生成anchor的信息:cls,reg,mask self.anchor_target = AnchorTargetLayer(cfg['anchor_target']) # data sets if 'datasets' not in cfg: raise(Exception('DataSet need "{}"'.format('datasets'))) self.all_data = [] start = 0 self.num = 0 for name in cfg['datasets']: dataset = cfg['datasets'][name] dataset['mark'] = name dataset['start'] = start # 加载数据 dataset = SubDataSet(dataset) dataset.log() self.all_data.append(dataset) # 数据数量 start += dataset.num # real video number # 打乱的数据数量 self.num += dataset.num_use # the number used for subset shuffle # 数据增强data augmentation aug_cfg = cfg['augmentation'] self.template_aug = Augmentation(aug_cfg['template']) self.search_aug = Augmentation(aug_cfg['search']) self.gray = aug_cfg['gray'] self.neg = aug_cfg['neg'] self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg'] self.pick = None # list to save id for each img if 'num' in cfg: # number used in training for all dataset self.num = int(cfg['num']) self.num *= num_epoch self.shuffle() self.infos = { 'template': self.template_size, 'search': self.search_size, 'template_small': self.template_small, 'gray': self.gray, 'neg': self.neg, 'inner_neg': self.inner_neg, 'crop_size': self.crop_size, 'anchor_target': self.anchor_target.__dict__, 'num': self.num // num_epoch } logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))
class DataSets(Dataset): def __init__(self, cfg, anchor_cfg, num_epoch=1): super(DataSets, self).__init__() global logger logger = logging.getLogger('global') # anchors self.anchors = Anchors(anchor_cfg) # size self.template_size = 127 self.origin_size = 127 self.search_size = 255 self.size = 17 self.base_size = 0 self.crop_size = 0 # 根据配置文件更新参数 if 'template_size' in cfg: self.template_size = cfg['template_size'] if 'origin_size' in cfg: self.origin_size = cfg['origin_size'] if 'search_size' in cfg: self.search_size = cfg['search_size'] if 'base_size' in cfg: self.base_size = cfg['base_size'] if 'size' in cfg: self.size = cfg['size'] if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size: raise Exception("size not match!") # TODO: calculate size online if 'crop_size' in cfg: self.crop_size = cfg['crop_size'] self.template_small = False if 'template_small' in cfg and cfg['template_small']: self.template_small = True # 生成anchor self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size) if 'anchor_target' not in cfg: cfg['anchor_target'] = {} # 生成anchor的信息:cls,reg,mask self.anchor_target = AnchorTargetLayer(cfg['anchor_target']) # data sets if 'datasets' not in cfg: raise(Exception('DataSet need "{}"'.format('datasets'))) self.all_data = [] start = 0 self.num = 0 for name in cfg['datasets']: dataset = cfg['datasets'][name] dataset['mark'] = name dataset['start'] = start # 加载数据 dataset = SubDataSet(dataset) dataset.log() self.all_data.append(dataset) # 数据数量 start += dataset.num # real video number # 打乱的数据数量 self.num += dataset.num_use # the number used for subset shuffle # 数据增强data augmentation aug_cfg = cfg['augmentation'] self.template_aug = Augmentation(aug_cfg['template']) self.search_aug = Augmentation(aug_cfg['search']) self.gray = aug_cfg['gray'] self.neg = aug_cfg['neg'] self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg'] self.pick = None # list to save id for each img if 'num' in cfg: # number used in training for all dataset self.num = int(cfg['num']) self.num *= num_epoch self.shuffle() self.infos = { 'template': self.template_size, 'search': self.search_size, 'template_small': self.template_small, 'gray': self.gray, 'neg': self.neg, 'inner_neg': self.inner_neg, 'crop_size': self.crop_size, 'anchor_target': self.anchor_target.__dict__, 'num': self.num // num_epoch } logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4))) def imread(self, path): # 数据读取 img = cv2.imread(path) if self.origin_size == self.template_size: # 返回图像 return img, 1.0 def map_size(exe, size): return int(round(((exe + 1) / (self.origin_size + 1) * (size+1) - 1))) # 尺寸调整 nsize = map_size(self.template_size, img.shape[1]) # 调整图像大小 img = cv2.resize(img, (nsize, nsize)) # 返回图像和缩放比例 return img, nsize / img.shape[1] def shuffle(self): "打乱" pick = [] m = 0 # 获取数据 while m < self.num: p = [] for subset in self.all_data: sub_p = subset.shuffle() p += sub_p # 打乱数据 sample_random.shuffle(p) # 将打乱的结果进行拼接 pick += p m = len(pick) # 将打乱的结果赋值给pick self.pick = pick logger.info("shuffle done!") logger.info("dataset length {}".format(self.num)) def __len__(self): return self.num def find_dataset(self, index): "查找数据" for dataset in self.all_data: if dataset.start + dataset.num > index: # 返回索引范围内的数据 return dataset, index - dataset.start def __getitem__(self, index, debug=False): # 在打乱的结果中找到索引 index = self.pick[index] # 查找得到数据 dataset, index = self.find_dataset(index) # 灰度图 gray = self.gray and self.gray > random.random() # 负样本 neg = self.neg and self.neg > random.random() # 负样本 if neg: # 获取template template = dataset.get_random_target(index) # 根据设置,从数据生成负样本或随机选择负样本 if self.inner_neg and self.inner_neg > random.random(): search = dataset.get_random_target() else: search = random.choice(self.all_data).get_random_target() else: # 获得正样本对 template, search = dataset.get_positive_pair(index) # 裁剪图像的中央大小为size的部分 def center_crop(img, size): # 获取图像的形状 shape = img.shape[1] # 若为size,则直接返回 if shape == size: return img # 否则,裁剪中央位置为size大小的图像 c = shape // 2 l = c - size // 2 r = c + size // 2 + 1 return img[l:r, l:r] # 读取模板图像 template_image, scale_z = self.imread(template[0]) # 若设置为小模板时,则从模板图像中进行裁剪 if self.template_small: template_image = center_crop(template_image, self.template_size) # 读取待搜索图像 search_image, scale_x = self.imread(search[0]) # 若存在掩膜并且不是负样本数据 if dataset.has_mask and not neg: # 读取掩膜数据 search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32) else: # 掩膜数据用全零数组替代 search_mask = np.zeros(search_image.shape[:2], dtype=np.float32) # 若裁剪size大于0,对搜索图像和掩膜进行裁剪 if self.crop_size > 0: search_image = center_crop(search_image, self.crop_size) search_mask = center_crop(search_mask, self.crop_size) # 根据图像大小生成bbox,shape是模板图像中bbox的形状 def toBBox(image, shape): # 图像的大小 imh, imw = image.shape[:2] # 获取shape的宽高 if len(shape) == 4: w, h = shape[2]-shape[0], shape[3]-shape[1] else: w, h = shape # 扩展比例 context_amount = 0.5 # 模板尺寸 exemplar_size = self.template_size # 127 # 获取宽高 wc_z = w + context_amount * (w+h) hc_z = h + context_amount * (w+h) # 等效边长 s_z = np.sqrt(wc_z * hc_z) # 比例 scale_z = exemplar_size / s_z # 宽高 w = w*scale_z h = h*scale_z # 中心点坐标 cx, cy = imw//2, imh//2 bbox = center2corner(Center(cx, cy, w, h)) return bbox # 生成模板图像和待搜索图像中的bbox template_box = toBBox(template_image, template[1]) search_box = toBBox(search_image, search[1]) # 模板数据增强 template, _, _ = self.template_aug(template_image, template_box, self.template_size, gray=gray) # 待搜索图像的数据增强 search, bbox, mask = self.search_aug(search_image, search_box, self.search_size, gray=gray, mask=search_mask) # def draw(image, box, name): # image = image.copy() # x1, y1, x2, y2 = map(lambda x: int(round(x)), box) # cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0)) # cv2.imwrite(name, image) # # if debug: # draw(template_image, template_box, "debug/{:06d}_ot.jpg".format(index)) # draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index)) # draw(template, _, "debug/{:06d}_t.jpg".format(index)) # draw(search, bbox, "debug/{:06d}_s.jpg".format(index)) # 生成anchor对应的信息 cls, delta, delta_weight = self.anchor_target(self.anchors, bbox, self.size, neg) if dataset.has_mask and not neg: # 掩膜图像 mask_weight = cls.max(axis=0, keepdims=True) else: mask_weight = np.zeros([1, cls.shape[1], cls.shape[2]], dtype=np.float32) # 模板和搜索图像 template, search = map(lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32), [template, search]) # 掩膜结果 mask = (np.expand_dims(mask, axis=0) > 0.5) * 2 - 1 # 1*H*W # 返回结果 return template, search, cls, delta, delta_weight, np.array(bbox, np.float32), \ np.array(mask, np.float32), np.array(mask_weight, np.float32)