def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) - 0.5 _img_dataset.append(img_data) del _x1, _y1, _x2, _y2, img, img_data img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.9999) # for idx in idxs: # _box = _rnet_boxes[idx] # _x1 = int(_box[0]) # _y1 = int(_box[1]) # _x2 = int(_box[2]) # _y2 = int(_box[3]) # # ow = _x2 - _x1 # oh = _y2 - _y1 # # x1 = _x1 + ow * offset[idx][0] # y1 = _y1 + oh * offset[idx][1] # x2 = _x2 + ow * offset[idx][2] # y2 = _y2 + oh * offset[idx][3] # # # boxes.append([x1, y1, x2, y2, cls[idx][0]]) # del _box,_x1,_y1,_x2,_y2,ow,oh,x1,y1,x2,y2 _box = _rnet_boxes[idxs] _x1 = np.array(_box[:, 0], dtype=np.int) _y1 = np.array(_box[:, 1], dtype=np.int) _x2 = np.array(_box[:, 2], dtype=np.int) _y2 = np.array(_box[:, 3], dtype=np.int) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] boxes = np.stack([x1, y1, x2, y2, cls[idxs][:, 0]], axis=1) # return np.array(boxes) return utils.nms(np.array(boxes), 0.7, isMin=True)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs = _cls > 0.8 # print(_cls.shape) idxs = np.squeeze(idxs) # print(idxs.shape) # exit() # print(_pnet_boxes.shape) _box = _pnet_boxes[idxs] # print(_box[0]) #[1291.752 261.65625 1864.8544 834.75867 21.14312] _x1 = np.round(_box[:, 0]) _y1 = np.round(_box[:, 1]) _x2 = np.round(_box[:, 2]) _y2 = np.round(_box[:, 3]) ow = _x2 - _x1 oh = _y2 - _y1 # print(_x1.shape) # print(ow.shape) # print(offset[idxs].shape) x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] _cls = np.squeeze(_cls) cls = _cls[idxs] boxes.append([x1, y1, x2, y2, cls]) boxes = np.array(boxes) print(boxes.shape) boxes = np.squeeze(boxes).transpose((1, 0)) print(boxes.shape) return utils.nms(boxes, 0.3)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] # 创建空列表,存放抠图 _pnet_boxes = utils.convert_to_square(pnet_boxes) # ★给p网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图 for _box in _pnet_boxes: # ★遍历每个框,每个框返回框4个坐标点,抠图,放缩,数据类型转换,添加列表 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据4个坐标点抠图 img = img.resize((24, 24)) # 放缩在固尺寸 img_data = self.__image_transform(img) # 将图片数组转成张量 _img_dataset.append(img_data) img_dataset =torch.stack(_img_dataset) # stack堆叠(默认在0轴),此处相当数据类型转换,见例子2★ if self.isCuda: img_dataset = img_dataset.cuda() # 给图片数据采用cuda加速 _cls, _offset = self.rnet(img_dataset) # ★★将24*24的图片传入网络再进行一次筛选 cls = _cls.cpu().data.numpy() # 将gpu上的数据放到cpu上去,在转成numpy数组 offset = _offset.cpu().data.numpy() # print("r_cls:",cls.shape) # (11, 1):P网络生成了11个框 # print("r_offset:", offset.shape) # (11, 4) boxes = [] #R 网络要留下来的框,存到boxes里 idxs, _ = np.where(cls > r_cls) # 原置信度0.6是偏低的,时候很多框并没有用(可打印出来观察),可以适当调高些;idxs置信度框大于0.6的索引;★返回idxs:0轴上索引[0,1],_:1轴上索引[0,0],共同决定元素位置,见例子3 _box = _pnet_boxes[np.array(idxs)] for idx in idxs: # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到) _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 # 基准框的宽 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] # 实际框的坐标点 y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # 返回4个坐标点和置信度 return utils.nms(np.array(boxes), r_nms) # 原r_nms为0.5(0.5要往小调),上面的0.6要往大调;小于0.5的框被保留下来
def __rnet_detect(self, image, pnet_boxes): print("4") _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) _cls = _cls.cpu().data.numpy() # print(cls) # print(cls.shape) #[N,1] offset = _offset.cpu().data.numpy() # print(offset.shape) #[N,4] boxes = [] # 选取置信度达标的索引 idxs, _ = np.where(_cls > 0.6) # print(idxs) # print(idxs.shape) #[N,] for idx in idxs: # 使用R网络的置信度来筛选P网络的截图 _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] cls = _cls[idx][0] boxes.append([x1, y1, x2, y2, cls]) return utils.nms(np.array(boxes), 0.3)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs = _cls > 0.9999 # print(_cls.shape) idxs = np.squeeze(idxs) # print(idxs.shape) _box = _rnet_boxes[idxs] _x1 = np.round(_box[:, 0]) _y1 = np.round(_box[:, 1]) _x2 = np.round(_box[:, 2]) _y2 = np.round(_box[:, 3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] _cls = np.squeeze(_cls) cls = _cls[idxs] boxes.append([x1, y1, x2, y2, cls]) boxes = np.array(boxes) boxes = boxes.squeeze(axis=0).swapaxes(1, 0) #transpose((1, 0)) return utils.nms(boxes, 0.3, isMin=True)
def forward(self, input, thresh, anchors): output_13, output_26, output_52 = self.net(input.cuda()) idxs_13, vecs_13 = self._filter(output_13, thresh) boxes_13 = self._parse(idxs_13, vecs_13, 30, anchors[10]) idxs_26, vecs_26 = self._filter(output_26, thresh) boxes_26 = self._parse(idxs_26, vecs_26, 15, anchors[20]) idxs_52, vecs_52 = self._filter(output_52, thresh) boxes_52 = self._parse(idxs_52, vecs_52, 7.5, anchors[40]) boxes = torch.cat([boxes_13, boxes_26, boxes_52], dim=0).cpu().detach().numpy() print(boxes.shape) return utils.nms(boxes, 0.5, isMin=False)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] # 创建列表,存放抠图r _rnet_boxes = utils.convert_to_square( rnet_boxes) # 给r网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形” for _box in _rnet_boxes: # 遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据坐标点“抠图” img = img.resize((48, 48)) img_data = self.__image_transform(img) # 将抠出的图转成张量 _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) # 堆叠,此处相当数据格式转换,见例子2 if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() # (1, 1) offset = _offset.cpu().data.numpy() # (1, 4) boxes = [] # 存放o网络的计算结果 # 原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998, # 这样的话出来就全是人脸;留下置信度大于0.97的框;★返回idxs:0轴上索引[0],_:1轴上索引[0],共同决定元素位置,见例子3 idxs, _ = np.where(cls > o_cls) for idx in idxs: # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到) _box = _rnet_boxes[idx] # 以R网络做为基准框 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 # 框的基准宽,框是“方”的,ow=oh oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][ 0] # O网络最终生成的框的坐标;生样,偏移量△δ=x1-_x1/w*side_len y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # 返回4个坐标点和1个置信度 return utils.nms(np.array(boxes), o_nms, isMin=True) # 用最小面积的IOU;原o_nms(IOU)为小于0.7的框被保留下来
def __pnet_detect(self, img): print("2") boxes = [] w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len >= 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) # 由于P网络输出是全卷积,所以的到的cls和offset都是四维的NCHW _cls, _offest = self.pnet(img_data) # print(_cls.shape) # print(_offest.shape) # NCHW→HW,取每张图的第一个值C的高宽,因为输入的图像大于12*12,所以特征图是大于1*1的,所以得到的特征图就是H*W的, # 而每个H,W对于的特征点反算回原图都有对于每个区域的置信度,所以HW是置信度的集合 # NCHW→CHW,计算每张图上的坐标偏移率,也就是输出的4个通道上(x1,y1,x2,y2)偏移率的集合 cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data # print(cls.shape)#[H,W] # print(offest.shape)#[C,H,W] # 得到置信度大于阈值的每组(h,w)索引值,就可以返回算同一个索引的置信度和偏移率 idxs = torch.nonzero(torch.gt(cls, 0.6)) # print(idxs.shape) # 遍历达标的索引库,得到每组(h,w)索引 # for idx in idxs: # # print(idx) # # 传入每组索引(h,w),偏移量,索引对应的置信度,比例 # boxes.append(self.__box(idx, offest, cls, scale)) boxes = self.__box(idxs, offest, cls, scale) scale *= 0.709 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) # print(min_side_len) min_side_len = np.minimum(_w, _h) # print(min_side_len) # boxss = utils.nms(np.array(boxes), 0.3) # return boxss return utils.nms(np.array(boxes), 0.3)
def __pnet_detect(self, img): boxes = [] w, h = img.size # print(w,h) min_side_len = min(w, h) scale = 1 while min_side_len >= 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() # img_data = torch.unsqueeze(img_data, dim=0) # 扩维度将[C,H,W]转为[N,C,H,W] img_data.unsqueeze_(0) # print(img_data.shape) _cls, _offest = self.pnet(img_data) # print(_cls.shape) #1 1 w h # print(_offest.shape) #1 4 w h cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data # print(cls.shape) # w h # print(offest.shape) #4 w h idxs = torch.nonzero(torch.gt(cls, 0.5)) # print(idxs.shape) if scale == 1: boxes.append( self.__box(idxs, offest, cls[idxs[:, 0], idxs[:, 1]], scale)) boxes = np.array(boxes).squeeze(0) else: boxes = np.concatenate((boxes, (self.__box( idxs, offest, cls[idxs[:, 0], idxs[:, 1]], scale))), axis=0) scale *= 0.707 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) # print(min_side_len) min_side_len = np.minimum(_w, _h) return utils.nms(boxes, 0.3)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(_cls > 0.90) for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] cls = _cls[idx][0] boxes.append([x1, y1, x2, y2, cls]) return utils.nms(np.array(boxes), 0.3, isMin=True)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = np.array(img) / 255. - 0.5 # print(img_data.shape) _img_dataset.append(img_data) img_dataset = np.stack(_img_dataset) # print(img_dataset.shape) cls, offset = self.rnet.sess.run( [self.rnet.cls_pre, self.rnet.off_pre], feed_dict={self.rnet.input: img_dataset}) # print("debug", cls, offset) boxes = [] idxs, _ = np.where(cls > 0.7) # print(idxs, idxs.shape) for idx in idxs: _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.5, isMin=True)
def detect(self, image, thresh, net=None): if net != None: self.net = net.to(self.device) input = self.transform(image) input.unsqueeze_(dim=0) output_13, output_26, output_52 = self.net(input.to(self.device)) idx_13, vecs_13 = self._filter(output_13, thresh) idx_26, vecs_26 = self._filter(output_26, thresh) idx_52, vecs_52 = self._filter(output_52, thresh) box_13 = self._parse(idx_13, vecs_13, 32, self.anchors[13]) box_26 = self._parse(idx_26, vecs_26, 16, self.anchors[26]) box_52 = self._parse(idx_52, vecs_52, 8, self.anchors[52]) box_list = [] for box_ in [box_13, box_26, box_52]: if box_.shape[0] != 0: box_list.append(box_) if len(box_list) > 0: boxes_all = np.concatenate(box_list, axis=0) last_boxes = [] # last_boxes1 = [] for n in range(input.size(0)): n_boxes = [] boxes_n = boxes_all[boxes_all[:, 6] == n] for cls in range(cfg.CLASS_NUM): boxes_c = boxes_n[boxes_n[:, 5] == cls] if boxes_c.shape[0] > 0: n_boxes.extend(utils.nms(boxes_c, 0.3)) else: pass last_boxes.extend(np.stack(n_boxes)) # last_boxes1.append(np.stack(n_boxes)) last_boxes = np.stack(last_boxes) return last_boxes return
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] #为了存放扣下来的那堆数据 _pnet_boxes = utils.convert_to_square(pnet_boxes) #把框框从原图上变成一个正方形 for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) #组装成矩阵 if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.6) for idx in idxs: _box = _pnet_boxes[idx] #从p网络中选择 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.5)
def __pnet_detect(self, image): # ★p网络全部是卷积,与输入图片大小无关,可输出任意形状图片 boxes = [] # 创建空列表,接收符合条件的建议框 box1 = np.random.randn(1,5) img = image w, h = img.size min_side_len = min(w, h) # 获取图片的最小边长 scale = 1 # 初始缩放比例(为1时不缩放):得到不同分辨率的图片 start = time.time() while min_side_len > 12: # 直到缩放到小于等于12时停止 img_data = self.__image_transform(img) # 将图片数组转成张量 img_data = img_data.cuda() # 将图片tensor传到cuda里加速 img_data.unsqueeze_(0) # 在“批次”上升维(测试时传的不止一张图片) # print("img_data:",img_data.shape) # [1, 3, 416, 500]:C=3,W=416,H=500 _cls, _offest = self.pnet(img_data) # ★★返回多个置信度和偏移量 # print("_cls",_cls.shape) # [1, 1, 203, 245]:NCWH:分组卷积的特征图的通道和尺寸★ # print("_offest", _offest.shape) # [1, 4, 203, 245]:NCWH cls= _cls[0][0].cpu().data # [203, 245]:分组卷积特征图的尺寸:W,H offest = _offest[0].cpu().data #[4, 203, 245] # 分组卷积特征图的通道、尺寸:C,W,H idxs = torch.nonzero(torch.gt(cls, p_cls))# ★置信度大于0.6的框索引;把P网络输出,看有没没框到的人脸,若没框到人脸,说明网络没训练好;或者置信度给高了、调低 box2 = self.__boox(idxs, offest, cls[idxs[:,0], idxs[:,1]], scale) box1 =np.vstack((box1, box2)) # for idx in idxs: # 根据索引,依次添加符合条件的框;cls[idx[0], idx[1]]在置信度中取值:idx[0]行索引,idx[1]列索引 # boxes.append(self.__box(idx, offest, cls[idx[0], idx[1]], scale)) # ★调用框反算函数_box(把特征图上的框,反算到原图上去),把大于0.6的框留下来; scale *= 0.7 # 缩放图片:循环控制条件 _w = int(w * scale) # 新的宽度 _h = int(h * scale) img = img.resize((_w, _h)) # 根据缩放后的宽和高,对图片进行缩放 min_side_len = min(_w, _h) # 重新获取最小宽高 # return np.array(boxes) #return utils.nms(np.array(boxes), p_nms) #返回框框,原阈值给p_nms=0.5(iou为0.5),尽可能保留IOU小于0.5的一些框下来,若网络训练的好,值可以给低些 return utils.nms(box1[1:], p_nms)
def boxDetect(self, inputBoxes, cons, offsets, landMark, conMax, nmsMax=0.3, iouMode='inter'): mask = cons > conMax mask_index = mask.nonzero()[:, 0] cons = cons[mask] offsets = torch.index_select(offsets, dim=0, index=mask_index) boxes = torch.index_select(inputBoxes, dim=0, index=mask_index) landMark = torch.index_select(landMark, dim=0, index=mask_index) if cons.size(0) == 0: return [] #筛选R网络的框 w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] x1 = boxes[:, 0] + w * offsets[:, 0] y1 = boxes[:, 1] + h * offsets[:, 1] x2 = boxes[:, 2] + w * offsets[:, 2] y2 = boxes[:, 3] + h * offsets[:, 3] x3 = boxes[:, 0] + w * landMark[:, 0] y3 = boxes[:, 1] + h * landMark[:, 1] x4 = boxes[:, 2] + w * landMark[:, 2] y4 = boxes[:, 1] + h * landMark[:, 3] x5 = boxes[:, 0] + w * landMark[:, 4] y5 = boxes[:, 1] + h * landMark[:, 5] x6 = boxes[:, 0] + w * landMark[:, 6] y6 = boxes[:, 1] + h * landMark[:, 7] x7 = boxes[:, 2] + w * landMark[:, 8] y7 = boxes[:, 3] + h * landMark[:, 9] return utils.nms(torch.stack( [x1, y1, x2, y2, cons, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7], dim=1), thresh=nmsMax, mode=iouMode)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() idxs, _ = np.where(_cls > 0.6) _box = _pnet_boxes[idxs] # print("_box",_box) _x1 = _box[:, 0] _y1 = _box[:, 1] _x2 = _box[:, 2] _y2 = _box[:, 3] ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] cls = _cls[idxs][:, 0] boxes = np.stack((x1, y1, x2, y2, cls), axis=1) return utils.nms(boxes, 0.3)
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len > 12: img_data = np.array(img) / 255. - 0.5 img_data = img_data[np.newaxis, :] # print(img_data, img_data.shape) _cls, _offest = self.pnet.sess.run( [self.pnet.cls_pre, self.pnet.off_pre], feed_dict={self.pnet.input: img_data}) # print(_cls, _cls.shape) # print(_offest, _offest.shape) cls, offest = _cls[0, :, :, 0], _offest[0] # print(cls, cls.shape) # print(offest, offest.shape) idxs = np.where(cls > 0.6) # print(np.stack(idxs, axis=1)) # print(list(zip(idxs[0], idxs[1]))) for idx in list(zip(idxs[0], idxs[1])): # print("debug", idx[0], idx[1]) # print(cls[idx[0], idx[1]]) boxes.append( self.__box(idx, offest, cls[idx[0], idx[1]], scale)) scale *= 0.7 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) return utils.nms(np.array(boxes), 0.5)
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len > 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() # print(img_data.size()) #img_data是CHW形状 img_data.unsqueeze_(0) #前面加一个维度代表批次 NCHW # print(img_data.size()) _cls, _offset = self.pnet(img_data) # print(_cls.size(),"====") # print(_offest.size(),"----") cls, offset = _cls[0][0].cpu().data, _offset[0].cpu().data idxs = torch.nonzero(torch.gt(cls, 0.6)) # print(cls.size(),"111") # print(offset.size(),"222") # print(idxs.size(),"333") for idx in idxs: #大于0.6所有的框添加进boxes boxes.append( self.__box(idx, offset, cls[idx[0], idx[1]], scale)) scale *= 0.7 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) return utils.nms(np.array(boxes), 0.5) #阈值0.5
def __pnet_detect(self, image): boxes = torch.tensor([]) img = image scale = 1 w, h = img.size # w = int(w * scale) # h = int(h * scale) # # img = img.resize((w, h)) min_side_len = min(w, h) while min_side_len > 12: img_data = self.__image_transform(img) - 0.5 if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) _cls, _offest = self.pnet(img_data) cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data idxs = torch.nonzero(torch.gt(cls, 0.5)) # for idx in idxs: # boxes.append(self.__box(idx, offest, cls[idx[0], idx[1]], scale)) boxes = torch.cat((boxes, self.__box(idxs, offest, cls[idxs[:, 0], idxs[:, 1]], scale))) scale *= 0.7 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) del img_data, _cls, _offest, cls, offest, idxs, _w, _h return utils.nms(np.array(boxes), 0.6)
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len > 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) _cls, _offest = self.pnet(img_data) cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data idxs = torch.nonzero(torch.gt(cls, 0.65)) for idx in idxs: boxes.append( self.__box(idx, offest, cls[idx[0], idx[1]], scale)) scale *= 0.7 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = np.minimum(_w, _h) return utils.nms(np.array(boxes), 0.3)
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_silde_len = min(w, h) # 初始缩放比例(为1时不缩放):得到不同分辨率的图片 scale = 1 while min_silde_len > 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze(0) # 在“批次”上升维(测试时传的不止一张图片) _cls, _offest = self.pnet(img_data) # ★★返回多个置信度和偏移量 cls = _cls[0][0].cpu().data # [203, 245]:分组卷积特征图的尺寸:W,H offest = _offest[0].cpu( ).data # [4, 203, 245] # 分组卷积特征图的通道、尺寸:C,W,H # ★置信度大于0.6的框索引;把P网络输出,看有没没框到的人脸,若没框到人脸,说明网络没训练好;或者置信度给高了、调低 idxs = torch.nonzero(torch.gt(cls, p_cls)) # 根据索引,依次添加符合条件的框;cls[idx[0], idx[1]]在置信度中取值:idx[0]行索引,idx[1]列索引 for idx in idxs: # ★调用框反算函数_box(把特征图上的框,反算到原图上去),把大于0.6的框留下来; boxes.append( self.__box(idx, offest, cls[idx[0], idx[1]], scale)) scale *= 0.7 # 缩放图片:循环控制条件 _w = int(w * scale) # 新的宽度 _h = int(h * scale) img = img.resize((_w, _h)) # 根据缩放后的宽和高,对图片进行缩放 min_side_len = min(_w, _h) # 重新获取最小宽高 # 返回框框,原阈值给p_nms=0.5(iou为0.5),尽可能保留IOU小于0.5的一些框下来,若网络训练的好,值可以给低些 return utils.nms(np.array(boxes), p_nms)
def __onet_detect(self, image, rnet_boxes): if len(rnet_boxes) == 0: return [] _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) #把R网络中的图像生成新的正方形 for _box in _rnet_boxes: #取出坐标 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.image_transform(img) - 0.5 _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) img_dataset = img_dataset.to(self.device) _cls, _offset, _landmak = self.onet(img_dataset) cls = _cls.cpu().data.numpy() #print('O网络置信度:',cls) offset = _offset.cpu().data.numpy() landmak = _landmak.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > self.cls[2]) for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] #把原图上对应框画出来 y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] x3 = _x1 + ow * landmak[idx][0] y3 = _y1 + oh * landmak[idx][1] x4 = _x2 + ow * landmak[idx][2] y4 = _y1 + oh * landmak[idx][3] x5 = _x1 + ow * landmak[idx][4] y5 = _y1 + oh * landmak[idx][5] x6 = _x1 + ow * landmak[idx][6] y6 = _y2 + oh * landmak[idx][7] x7 = _x2 + ow * landmak[idx][8] y7 = _y2 + oh * landmak[idx][9] boxes.append([ x1, y1, x2, y2, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7, cls[idx][0] ]) return utils.nms(np.array(boxes), self.nms[2], isMin=True) #isMin=True这里去掉当大框覆盖小框的情况;