class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading',prefix, epoch) if ctx_id>=0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names = None) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model self.detector = ESSHDetector('./essh-model/essh-r50', 0) def trans_dot(self, trans1, trans2): trans1 = np.vstack((trans1, [0,0,1])) trans2 = np.vstack((trans2, [0,0,1])) trans21 = np.dot(trans2, trans1)[0:2] return trans21 def get_landmark(self, img): ret = self.detector.detect(img, threshold=0.4) if ret is None or ret.shape[0]==0: return None bbox = ret[:,0:4] points = ret[:, 5:15].reshape(-1,5,2) landmark_list = np.zeros( (bbox.shape[0], 68, 2), dtype=np.float32) for i in range(bbox.shape[0]): rimg, label, trans1 = img_helper.preprocess(img, points[i], 384) ret2 = self.detector.detect(rimg, threshold=0.4) rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[0,0:4]) trans = self.trans_dot(trans1, trans2) # cv2.imshow("rimg", rimg) # cv2.waitKey(0) img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(img2, (2,0,1)) #3*128*128, RGB input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]),dtype=np.uint8 ) input_blob[0] = img2 ta = datetime.datetime.now() data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] tb = datetime.datetime.now() print('module time cost', (tb-ta).total_seconds()) IM = cv2.invertAffineTransform(trans) for j in xrange(alabel.shape[0]): a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) # landmark_list[i] = (ind[0], ind[1]) #h, w # landmark_list[i,j] = (ind[1], ind[0]) #w, h point = (ind[1], ind[0], 1.0) point = np.dot(IM, point) landmark_list[i,j] = point[0:2] return landmark_list
def __init__(self, args): self.args = args if args.gpu >= 0: ctx = mx.gpu(args.gpu) else: ctx = mx.cpu() _vec = args.image_size.split(',') assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) self.model = None if len(args.model) > 0: self.model = get_model(ctx, image_size, args.model, 'fc1') self.det_minsize = 50 self.det_threshold = [0.6, 0.7, 0.8] #self.det_factor = 0.9 self.image_size = image_size mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') if args.det == 0: detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=self.det_threshold) else: detector = ESSHDetector(prefix='./ssh-model/essh', epoch=0, ctx_id=args.gpu, test_mode=False) # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2]) self.detector = detector
def __init__(self, prefix, epoch, ctx_id=0): print('loading',prefix, epoch) if ctx_id>=0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names = None) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model self.detector = ESSHDetector('./essh-model/essh-r50', 0)
def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model = mx.mod.Module(symbol=sym, context=ctx) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn_model') # self.det_threshold = [0.6,0.7,0.8] # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold) self.detector = ESSHDetector('./essh-model/essh', 0)
class ESSHModel: def __init__(self): self.total_time = 0 self.scales = [1200, 1600] self.t = 10 self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1) def detect_faces(self, video, output): self.total_time = 0 detect_faces(video, output, 640, 360, self.detect_faces_on_img) return self.total_time def detect_faces_on_img(self, img): im_shape = img.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min > target_size or im_size_max > max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape) # for i in range(t - 1): # warmup # faces = esshDetector.detect(img) timea = datetime.datetime.now() faces = self.esshDetector.detect(img, threshold=0.5) # print("DETECTED!!!!!!!") bbox = np.round(faces[:, 0:5]) landmark = faces[:, 5:15].reshape(-1, 5, 2) timeb = datetime.datetime.now() diff = timeb - timea diff = diff.total_seconds() self.total_time += diff # print('detection uses', diff.total_seconds(), 'seconds') # print('find', faces.shape[0], 'faces') for b in bbox: cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2) for p in landmark: for i in range(5): cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2) return img
def test(args): print('test with', args) global detector output_folder = args.output if not os.path.exists(output_folder): os.mkdir(output_folder) detector = ESSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True) imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 for i in xrange(len(roidb)): if i % 10 == 0: print('processing', i, file=sys.stderr) roi = roidb[i] boxes = get_boxes(roi, args.pyramid) gt_boxes = roidb[i]['boxes'].copy() gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > config.TEST.IOU_THRESH: continue print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > config.TEST.IOU_THRESH).sum() _recall = found / float(gt_boxes.shape[0]) print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) _recall = float(overall[0]) / overall[1] print('recall_all', _recall, file=sys.stderr) _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4])) print('Evaluating detections using official WIDER toolbox...') path = os.path.join(os.path.dirname(__file__), 'wider_eval_tools') eval_output_path = os.path.join(path, 'wider_plots') if not os.path.isdir(eval_output_path): os.mkdir(eval_output_path) cmd = 'cd {} && '.format(path) cmd += 'matlab -nodisplay -nodesktop ' cmd += '-r "dbstop if error; ' cmd += 'wider_eval(\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \ .format(args.output, args.method_name, eval_output_path) print('Running:\n{}'.format(cmd)) subprocess.call(cmd, shell=True)
class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model = mx.mod.Module(symbol=sym, context=ctx) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn_model') # self.det_threshold = [0.6,0.7,0.8] # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold) self.detector = ESSHDetector('./essh-model/essh', 0) def get(self, img): # ret = self.detector.detect_face(img) ret = self.detector.detect(img, threshold=0.5) if ret is None: return None bbox = ret[:, 0:4] points = ret[:, 5:15].reshape(-1, 5, 2) # bbox, points = ret # if bbox.shape[0]==0: # return None # bbox = bbox[:,0:4] # points = points[:,:].reshape((-1,2,5)) # points = np.transpose(points, (0,2,1)) # for b in bbox: # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2) # for p in points: # for i in range(5): # cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2) # cv2.imshow("detection result", img) # cv2.waitKey(0) M = np.zeros((bbox.shape[0], 2, 3), dtype=np.float32) ret = np.zeros((bbox.shape[0], 68, 2), dtype=np.float32) for i in range(bbox.shape[0]): M[i] = img_helper.estimate_trans_bbox(bbox[i, :], self.image_size[0], s=1.2) rimg = cv2.warpAffine(img, M[i], self.image_size, borderValue=0.0) img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(img2, (2, 0, 1)) #3*112*112, RGB # cv2.imshow("detection result", rimg) # cv2.waitKey(0) # filename = 'sample-images/%d.jpg'%(i+1) # cv2.imwrite(filename, rimg) input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) input_blob[0] = img2 ta = datetime.datetime.now() data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] tb = datetime.datetime.now() print('module time cost', (tb - ta).total_seconds()) # ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32) for j in xrange(alabel.shape[0]): a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) #ret[i] = (ind[0], ind[1]) #h, w ret[i, j] = (ind[1], ind[0]) #w, h return ret, M
PARTS = [1, 2, 3] MAX_LABEL = 99999 print(args, MAX_LABEL, PARTS) SPLIT = [0, 1] if len(args.split) > 0: _v = args.split.split(',') SPLIT[0] = int(_v[0]) SPLIT[1] = int(_v[1]) print('SPLIT:', SPLIT) detector = ESSHDetector('./model/ssh-model/essh', 0, ctx_id=args.gpu, test_mode=False) def get_faces(video, is_train=True): R = [] sampling = args.sampling while True: cap = cv2.VideoCapture(video) frame_num = 0 while cap.isOpened(): ret, frame = cap.read() if frame is None: break frame_num += 1 if frame_num % sampling != 0:
import cv2 import sys import numpy as np import datetime #sys.path.append('.') from essh_detector import ESSHDetector scales = [1200, 1600] #scales = [600, 1200] t = 10 detector = ESSHDetector('./model/essh', 0) f = './sample-images/t1.jpg' if len(sys.argv) > 1: f = sys.argv[1] img = cv2.imread(f) im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min > target_size or im_size_max > max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape) for i in xrange(t - 1): #warmup
class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model self.detector = ESSHDetector('./essh-model/essh', 0) def trans_dot(self, trans1, trans2): trans1 = np.vstack((trans1, [0, 0, 1])) trans2 = np.vstack((trans2, [0, 0, 1])) trans21 = np.dot(trans2, trans1)[0:2] return trans21 def get_maxpos(self, img, det): img_size = np.asarray(img.shape)[0:2] # bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) # bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering bindex = np.argmin( offset_dist_squared) # some extra weight on the centering return bindex def get_landmark(self, img, label, dataset, use_essh): if use_essh: ret = self.detector.detect(img, threshold=0.4) if ret is None or ret.shape[0] == 0: return None, None bindex = self.get_maxpos(img, ret) face = ret[bindex] bbox = face[0:4] points = face[5:15].reshape(5, 2) # b = bbox # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) # for p in landmark: # cv2.circle(img, (int(p[0]), int(p[1])), 1, (0, 0, 255), 2) # cv2.imshow("detection result", img) # cv2.waitKey(0) # for i in range(bbox.shape[0]): rimg, label2, trans1 = img_helper.preprocess( img, points, img.shape[0]) ret2 = self.detector.detect(rimg, threshold=0.4) if ret2 is None or ret2.shape[0] == 0: return None, None bindex2 = self.get_maxpos(rimg, ret2) rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[bindex2, 0:4], dataset) else: rimg, label2, trans1 = img_helper.preprocess( img, label, img.shape[0]) rimg, trans2 = img_helper.transform2(rimg, label2, self.image_size[0], None, dataset) trans = self.trans_dot(trans1, trans2) # cv2.imshow("rimg", rimg) # cv2.waitKey(0) # img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img2 = np.transpose(rimg, (2, 0, 1)) #3*128*128, RGB input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) input_blob[0] = img2 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] IM = cv2.invertAffineTransform(trans) landmark = np.zeros((68, 2), dtype=np.float32) for i in xrange(alabel.shape[0]): a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) point = (ind[1], ind[0], 1.0) #w, h point = np.dot(IM, point) landmark[i] = point[0:2] npt = img_helper.transform_pt(label[i], trans) if config.landmark_type == '2d': npt = np.floor(npt) else: npt = np.round(npt) point = (npt[0], npt[1], 1.0) point = np.dot(IM, point) label[i] = point[0:2] return landmark, label
import cv2 import sys import numpy as np import datetime #sys.path.append('.') from essh_detector import ESSHDetector scales = [1200, 1600] #scales = [600, 1200] t = 10 detector = ESSHDetector('./model/essh-r50', 0, ctx_id=0) def main(): f = './sample-images/t1.jpg' if len(sys.argv) > 1: f = sys.argv[1] img = cv2.imread(f) im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min > target_size or im_size_max > max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) print('resize to', img.shape)
def __init__(self): self.total_time = 0 self.scales = [1200, 1600] self.t = 10 self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1)