class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading',prefix, epoch) if ctx_id>=0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names = None) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model self.detector = ESSHDetector('./essh-model/essh-r50', 0) def trans_dot(self, trans1, trans2): trans1 = np.vstack((trans1, [0,0,1])) trans2 = np.vstack((trans2, [0,0,1])) trans21 =, trans1)[0:2] return trans21 def get_landmark(self, img): ret = self.detector.detect(img, threshold=0.4) if ret is None or ret.shape[0]==0: return None bbox = ret[:,0:4] points = ret[:, 5:15].reshape(-1,5,2) landmark_list = np.zeros( (bbox.shape[0], 68, 2), dtype=np.float32) for i in range(bbox.shape[0]): rimg, label, trans1 = img_helper.preprocess(img, points[i], 384) ret2 = self.detector.detect(rimg, threshold=0.4) rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[0,0:4]) trans = self.trans_dot(trans1, trans2) # cv2.imshow("rimg", rimg) # cv2.waitKey(0) img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(img2, (2,0,1)) #3*128*128, RGB input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]),dtype=np.uint8 ) input_blob[0] = img2 ta = data = mx.nd.array(input_blob) db =,)) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] tb = print('module time cost', (tb-ta).total_seconds()) IM = cv2.invertAffineTransform(trans) for j in xrange(alabel.shape[0]): a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) # landmark_list[i] = (ind[0], ind[1]) #h, w # landmark_list[i,j] = (ind[1], ind[0]) #w, h point = (ind[1], ind[0], 1.0) point =, point) landmark_list[i,j] = point[0:2] return landmark_list
class ESSHModel: def __init__(self): self.total_time = 0 self.scales = [1200, 1600] self.t = 10 self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1) def detect_faces(self, video, output): self.total_time = 0 detect_faces(video, output, 640, 360, self.detect_faces_on_img) return self.total_time def detect_faces_on_img(self, img): im_shape = img.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min > target_size or im_size_max > max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape) # for i in range(t - 1): # warmup # faces = esshDetector.detect(img) timea = faces = self.esshDetector.detect(img, threshold=0.5) # print("DETECTED!!!!!!!") bbox = np.round(faces[:, 0:5]) landmark = faces[:, 5:15].reshape(-1, 5, 2) timeb = diff = timeb - timea diff = diff.total_seconds() self.total_time += diff # print('detection uses', diff.total_seconds(), 'seconds') # print('find', faces.shape[0], 'faces') for b in bbox: cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2) for p in landmark: for i in range(5):, (p[i][0], p[i][1]), 1, (0, 0, 255), 2) return img
class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model = mx.mod.Module(symbol=sym, context=ctx) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn_model') # self.det_threshold = [0.6,0.7,0.8] # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold) self.detector = ESSHDetector('./essh-model/essh', 0) def get(self, img): # ret = self.detector.detect_face(img) ret = self.detector.detect(img, threshold=0.5) if ret is None: return None bbox = ret[:, 0:4] points = ret[:, 5:15].reshape(-1, 5, 2) # bbox, points = ret # if bbox.shape[0]==0: # return None # bbox = bbox[:,0:4] # points = points[:,:].reshape((-1,2,5)) # points = np.transpose(points, (0,2,1)) # for b in bbox: # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2) # for p in points: # for i in range(5): #, (p[i][0], p[i][1]), 1, (0, 0, 255), 2) # cv2.imshow("detection result", img) # cv2.waitKey(0) M = np.zeros((bbox.shape[0], 2, 3), dtype=np.float32) ret = np.zeros((bbox.shape[0], 68, 2), dtype=np.float32) for i in range(bbox.shape[0]): M[i] = img_helper.estimate_trans_bbox(bbox[i, :], self.image_size[0], s=1.2) rimg = cv2.warpAffine(img, M[i], self.image_size, borderValue=0.0) img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(img2, (2, 0, 1)) #3*112*112, RGB # cv2.imshow("detection result", rimg) # cv2.waitKey(0) # filename = 'sample-images/%d.jpg'%(i+1) # cv2.imwrite(filename, rimg) input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) input_blob[0] = img2 ta = data = mx.nd.array(input_blob) db =, )) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] tb = print('module time cost', (tb - ta).total_seconds()) # ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32) for j in xrange(alabel.shape[0]): a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) #ret[i] = (ind[0], ind[1]) #h, w ret[i, j] = (ind[1], ind[0]) #w, h return ret, M
im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min > target_size or im_size_max > max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape) for i in xrange(t - 1): #warmup faces = detector.detect(img) timea = faces = detector.detect(img, threshold=0.5) bbox = np.round(faces[:, 0:5]) landmark = faces[:, 5:15].reshape(-1, 5, 2) timeb = diff = timeb - timea print('detection uses', diff.total_seconds(), 'seconds') print('find', faces.shape[0], 'faces') print(bbox) print(landmark) # for i in xrange(faces.shape[0]): # cv2.rectangle(img, (faces[i,0],faces[i,1]), (faces[i,2],faces[i,3]), (0,255,0), 2)
class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model self.detector = ESSHDetector('./essh-model/essh', 0) def trans_dot(self, trans1, trans2): trans1 = np.vstack((trans1, [0, 0, 1])) trans2 = np.vstack((trans2, [0, 0, 1])) trans21 =, trans1)[0:2] return trans21 def get_maxpos(self, img, det): img_size = np.asarray(img.shape)[0:2] # bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) # bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering bindex = np.argmin( offset_dist_squared) # some extra weight on the centering return bindex def get_landmark(self, img, label, dataset, use_essh): if use_essh: ret = self.detector.detect(img, threshold=0.4) if ret is None or ret.shape[0] == 0: return None, None bindex = self.get_maxpos(img, ret) face = ret[bindex] bbox = face[0:4] points = face[5:15].reshape(5, 2) # b = bbox # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) # for p in landmark: #, (int(p[0]), int(p[1])), 1, (0, 0, 255), 2) # cv2.imshow("detection result", img) # cv2.waitKey(0) # for i in range(bbox.shape[0]): rimg, label2, trans1 = img_helper.preprocess( img, points, img.shape[0]) ret2 = self.detector.detect(rimg, threshold=0.4) if ret2 is None or ret2.shape[0] == 0: return None, None bindex2 = self.get_maxpos(rimg, ret2) rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[bindex2, 0:4], dataset) else: rimg, label2, trans1 = img_helper.preprocess( img, label, img.shape[0]) rimg, trans2 = img_helper.transform2(rimg, label2, self.image_size[0], None, dataset) trans = self.trans_dot(trans1, trans2) # cv2.imshow("rimg", rimg) # cv2.waitKey(0) # img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(rimg, (2, 0, 1)) #3*128*128, RGB input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) input_blob[0] = img2 data = mx.nd.array(input_blob) db =, )) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] IM = cv2.invertAffineTransform(trans) landmark = np.zeros((68, 2), dtype=np.float32) for i in xrange(alabel.shape[0]): a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) point = (ind[1], ind[0], 1.0) #w, h point =, point) landmark[i] = point[0:2] npt = img_helper.transform_pt(label[i], trans) if config.landmark_type == '2d': npt = np.floor(npt) else: npt = np.round(npt) point = (npt[0], npt[1], 1.0) point =, point) label[i] = point[0:2] return landmark, label