def test_pnet(img, min_img_size, net_size, net): norm_img = (img.copy() - 127.5) / 127.5 h, w, c = norm_img.shape scales = gen_scales(w, h, min_img_size, net_size) rects = [] for scale in scales: sh = int(h * scale) sw = int(w * scale) scale_img = cv2.resize(norm_img, (sw, sh)) scale_img = cv2.transpose(scale_img) scale_img = np.swapaxes(scale_img, 0, 2) net.blobs['data'].reshape(1, 3, sh, sw) net.blobs['data'].data[...] = scale_img print(scale_img.shape) print(scale_img) out = net.forward() conv1 = net.blobs["conv1"] label_prob = out[config.NET_OUTPUTS['pnet']['label']][0] bbox = out[config.NET_OUTPUTS['pnet']['bbox']][0] print(conv1.data.shape) print(conv1.data) out_h, out_w = label_prob[1].shape out_side = max(out_h, out_w) rect = tools.detect_face_12net(label_prob[1], bbox, out_side, 1 / scale, w, h, 0.65) rects += rect break rects = tools.NMS(rects, 0.7, 'iou') return rects
def detectFace(img_path,threshold): img = cv2.imread(img_path) caffe_img = img.copy()-128 origin_h,origin_w,ch = caffe_img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h*scale) ws = int(origin_w*scale) scale_img = cv2.resize(caffe_img,(ws,hs)) scale_img = np.swapaxes(scale_img, 0, 2) net_12.blobs['data'].reshape(1,3,ws,hs) net_12.blobs['data'].data[...]=scale_img caffe.set_device(0) caffe.set_mode_gpu() out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['cls_score'][0][1] roi = out[i]['conv4-2'][0] out_h,out_w = cls_prob.shape out_side = max(out_h,out_w) rectangle = tools.detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0]) rectangles.extend(rectangle) return rectangles
def detectFace(img_path, threshold): img = cv2.imread(img_path) caffe_img = img.copy() - 128 origin_h, origin_w, ch = caffe_img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(caffe_img, (ws, hs)) scale_img = np.swapaxes(scale_img, 0, 2) net_12.blobs['data'].reshape(1, 3, ws, hs) net_12.blobs['data'].data[...] = scale_img # caffe.set_device(0) # caffe.set_mode_gpu() caffe.set_mode_cpu() out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['prob1'][0][1] roi = out[i]['conv4-2'][0] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) return rectangles
def detectFace(img_path, threshold): img = cv2.imread(img_path) caffe_img = img.copy() - 128 origin_h, origin_w, ch = caffe_img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(caffe_img, (ws, hs)) scale_img = np.swapaxes(scale_img, 0, 2) net_12.blobs['data'].reshape(1, 3, ws, hs) net_12.blobs['data'].data[...] = scale_img caffe.set_device(0) caffe.set_mode_gpu() out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['prob1'][0][1] roi = out[i]['conv4-2'][0] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) if len(rectangles) == 0: return rectangles net_24.blobs['data'].reshape(len(rectangles), 3, 24, 24) crop_number = 0 for rectangle in rectangles: crop_img = caffe_img[rectangle[1]:rectangle[3], rectangle[0]:rectangle[2]] scale_img = cv2.resize(crop_img, (24, 24)) scale_img = np.swapaxes(scale_img, 0, 2) net_24.blobs['data'].data[crop_number] = scale_img crop_number += 1 out = net_24.forward() cls_prob = out['prob1'] roi_prob = out['conv5-2'] rectangles = tools.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) return rectangles
def detectFace(img_path, threshold): fs = cv2.FileStorage(img_path, cv2.FileStorage_READ) img = fs.getNode('depth').mat() fs.release() if img is None: print("Fail to read XML file: " + depth_path) return None origin_h, origin_w = img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) net_12.blobs['data'].reshape(1, 1, hs, ws) net_12.blobs['data'].data[...] = preprocess(img, (ws, hs)) out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['prob1'][0][1] # print "cls_prob.shape: ", cls_prob.shape roi = out[i]['conv4-2'][0] out_h, out_w = cls_prob.shape # print "out_h: ", out_h # print "out_w: ", out_w out_side = max(out_h, out_w) rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold) rectangles.extend(rectangle) rectangles = tools.NMS(rectangles, 0.7, 'iou') if len(rectangles) == 0: print "rect drop to 0 at 12net" return rectangles rectangles_new = [] for rectangle in rectangles: rectangles_new.append(rectangular2square(rectangle, img)) return rectangles_new
def detectFace(img_path, threshold): # img = cv2.imread(img_path) fs = cv2.FileStorage(img_path, cv2.FileStorage_READ) img = fs.getNode('depth').mat() fs.release() origin_h, origin_w= img.shape scales = tools.calculateScales(img) # scales = [0.1, 0.07] out = [] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) net_12.blobs['data'].reshape(1, 1, hs, ws) net_12.blobs['data'].data[...] = preprocess(img, (ws, hs)) out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['prob1'][0][1] print "cls_prob.shape: ", cls_prob.shape roi = out[i]['conv4-2'][0] out_h, out_w = cls_prob.shape print "out_h: ", out_h print "out_w: ", out_w out_side = max(out_h, out_w) rectangle = tools.detect_face_12net( cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) rectangles = tools.NMS(rectangles, 0.7, 'iou') if len(rectangles) == 0: print "rect drop to 0 at 12net" return rectangles doc12 = open('/home/xingduan/YupengHan/inference/saving_docs/12/12doc.txt', 'w') for temp_rectangle in rectangles: doc12.write('%d %d %d %d %f\n' %(temp_rectangle[0], temp_rectangle[1], temp_rectangle[2], temp_rectangle[3], temp_rectangle[4])) # Here might be a problme net_24.blobs['data'].reshape(len(rectangles), 1, 24, 24) crop_number = 0 for rectangle in rectangles: rectangle = rectangular2square(rectangle, img) crop_img = img[int(rectangle[1]):int( rectangle[3]), int(rectangle[0]):int(rectangle[2])] net_24.blobs['data'].data[crop_number] = preprocess(crop_img, (24, 24)) crop_number += 1 # Here might be a problme out = net_24.forward() cls_prob = out['prob1'] roi_prob = out['ip_roi'] rectangles = tools.filter_face_24net( cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) doc24 = open('/home/xingduan/YupengHan/inference/saving_docs/24/24doc.txt', 'w') for temp_rectangle in rectangles: doc24.write('%d %d %d %d %f\n' %(temp_rectangle[0], temp_rectangle[1], temp_rectangle[2], temp_rectangle [3], temp_rectangle[4])) if len(rectangles) == 0: print "rect drop to 0 at 24net" return rectangles net_48.blobs['data'].reshape(len(rectangles), 1, 48, 48) crop_number = 0 for rectangle in rectangles: rectangle = rectangular2square(rectangle, img) crop_img = img[int(rectangle[1]):int( rectangle[3]), int(rectangle[0]):int(rectangle[2])] net_48.blobs['data'].data[crop_number] = preprocess(crop_img, (48, 48)) crop_number += 1 out = net_48.forward() cls_prob = out['prob1'] roi_prob = out['ip_roi'] rectangles = tools.filter_face_48net( cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def main(args): image_size = 24 save_dir = str(image_size) anno_file = 'wider_face_train.txt' im_dir = 'WIDER_train/images/' neg_save_dir = save_dir + '/negative' pos_save_dir = save_dir + '/positive' part_save_dir = save_dir + '/part' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(save_dir + '/pos_24.txt', 'w') f2 = open(save_dir + '/neg_24.txt', 'w') f3 = open(save_dir + '/part_24.txt', 'w') threshold = 0.6 with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print('%d pics in total' % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 with tf.device('/gpu:0'): minsize = 20 factor = 0.709 model_file = args.pnet_model with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=config) as sess: image = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image}, mode='test') out_tensor = pnet.get_all_output() init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() saver.restore(sess, model_file) def pnet_fun(img): return sess.run(out_tensor, feed_dict={image: img}) for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = im_dir + annotation[0] + '.jpg' img = cv2.imread(img_path) rectangles = detect_face_12net(img, minsize, pnet_fun, threshold, factor) image_idx += 1 view_bar(image_idx, num) for box in rectangles: lis = box.astype(np.int32) mask = lis < 0 lis[mask] = 0 x_left, y_top, x_right, y_bottom, _ = lis crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size: continue Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, '%s.jpg' % n_idx) f2.write('%s/negative/%s' % (save_dir, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) if np.max(Iou) >= 0.65: save_file = os.path.join( pos_save_dir, '%s.jpg' % p_idx) f1.write('%s/positive/%s' % (save_dir, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join( part_save_dir, '%s.jpg' % d_idx) f3.write('%s/part/%s' % (save_dir, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def main(args): image_size = 24 save_dir = str(image_size) anno_file = 'wider_face_train.txt' im_dir = 'WIDER_train/images/' neg_save_dir = save_dir+'/negative' pos_save_dir = save_dir+'/positive' part_save_dir = save_dir+'/part' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(save_dir+'/pos_24.txt', 'w') f2 = open(save_dir+'/neg_24.txt', 'w') f3 = open(save_dir+'/part_24.txt', 'w') threshold = 0.6 with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print('%d pics in total' % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 with tf.device('/gpu:0'): minsize = 20 factor = 0.709 model_file = args.pnet_model with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=config) as sess: image = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image}, mode='test') out_tensor = pnet.get_all_output() init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() saver.restore(sess, model_file) def pnet_fun(img): return sess.run( out_tensor, feed_dict={image: img}) for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = im_dir + annotation[0] + '.jpg' img = cv2.imread(img_path) rectangles = detect_face_12net(img, minsize, pnet_fun, threshold, factor) image_idx += 1 view_bar(image_idx, num) for box in rectangles: lis = box.astype(np.int32) mask = lis < 0 lis[mask] = 0 x_left, y_top, x_right, y_bottom, _ = lis crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size: continue Iou = IoU(box, gts) cropped_im = img[y_top: y_bottom+1, x_left: x_right+1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, '%s.jpg' % n_idx) f2.write('%s/negative/%s' % (save_dir, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, '%s.jpg' % p_idx) f1.write('%s/positive/%s' % (save_dir, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, '%s.jpg' % d_idx) f3.write('%s/part/%s' % (save_dir, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def main(args): detect_totalTime = 0.0 frameCount = 0 # Does there need store result images or not # If yes, check the directory which store result is existed or not # If the directory is existed, delete the directory recursively then recreate the directory. if args.save_image: output_directory = args.save_image print(args.save_image) if os.path.exists(output_directory): shutil.rmtree(output_directory) os.mkdir(output_directory) fw = open(os.path.join(output_directory, args.save_bbox_coordinates + '_dets.txt'), 'w') # Create # The steps are similiar to "store result images" above. if args.save_camera_images is not False: source_directory = args.save_camera_images if os.path.exists(source_directory): shutil.rmtree(source_directory) os.mkdir(source_directory) with tf.device('/cpu:0'): with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: file_paths = get_model_filenames(args.model_dir) print(file_paths, len(file_paths)) # The if else statement is to check which type of model user used. # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models. # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate". # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory. if len(file_paths) == 3: image_pnet = tf.placeholder( tf.float32, [None, None, None, 3]) pnet = PNet({'data': image_pnet}, mode='test') out_tensor_pnet = pnet.get_all_output() image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3]) rnet = RNet({'data': image_rnet}, mode='test') out_tensor_rnet = rnet.get_all_output() image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3]) onet = ONet({'data': image_onet}, mode='test') out_tensor_onet = onet.get_all_output() saver_pnet = tf.train.Saver( [v for v in tf.global_variables() if v.name[0:5] == "pnet/"]) saver_rnet = tf.train.Saver( [v for v in tf.global_variables() if v.name[0:5] == "rnet/"]) saver_onet = tf.train.Saver( [v for v in tf.global_variables() if v.name[0:5] == "onet/"]) saver_pnet.restore(sess, file_paths[0]) def pnet_fun(img): return sess.run( out_tensor_pnet, feed_dict={image_pnet: img}) saver_rnet.restore(sess, file_paths[1]) def rnet_fun(img): return sess.run( out_tensor_rnet, feed_dict={image_rnet: img}) saver_onet.restore(sess, file_paths[2]) def onet_fun(img): return sess.run( out_tensor_onet, feed_dict={image_onet: img}) else: saver = tf.train.import_meta_graph(file_paths[0]) saver.restore(sess, file_paths[1]) def pnet_fun(img): return sess.run( ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'), feed_dict={ 'Placeholder:0': img}) def rnet_fun(img): return sess.run( ('softmax_1/softmax:0', 'rnet/conv5-2/rnet/conv5-2:0'), feed_dict={ 'Placeholder_1:0': img}) def onet_fun(img): return sess.run( ('softmax_2/softmax:0', 'onet/conv6-2/onet/conv6-2:0', 'onet/conv6-3/onet/conv6-3:0'), feed_dict={ 'Placeholder_2:0': img}) video_capture = cv2.VideoCapture(0) print(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH), video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) if video_capture.isOpened() == False: print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.") else: print(video_capture.get(cv2.CAP_PROP_FPS)) while True: ret, frame = video_capture.read() original_img = frame.copy() if ret: width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)*args.resize) height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)*args.resize) resized_image = cv2.resize(frame, (width, height)) start_time = time.time()*1000 # P-Net + R-Net + O-Net if args.net == "ALL": rectangles, points = detect_face(resized_image, args.minsize, pnet_fun, rnet_fun, onet_fun, args.threshold, args.factor) # P-Net + R-Net without faces' landmarks elif args.net == "PR": rectangles = detect_face_24net(resized_image, args.minsize, pnet_fun, rnet_fun, args.threshold, args.factor) # Only P-Net elif args.net == "P": rectangles = detect_face_12net(resized_image, args.minsize, pnet_fun, args.threshold, args.factor) else: print("ERROR: WRONG NET INPUT") end_time = time.time()*1000 detect_totalTime = detect_totalTime + (end_time - start_time) if args.net == "ALL": points = np.transpose(points) # The outputs of O-Net which are faces' landmarks else: points = None # the others add_overlays(frame, rectangles, points, 1000/(end_time - start_time), 1/args.resize, 1/args.resize) cv2.imshow("MTCNN-Tensorflow wangbm", frame) print("ID: {:d}, cost time: {:.1f}ms".format(frameCount, (end_time - start_time))) s if points is not None: for point in points: for i in range(0, 10, 2): point[i] = point[i] * (1/args.resize) point[i+1] = point[i+1] * (1/args.resize) print("\tID: {:d}, face landmarks x = {:.1f}, y = {:.1f}".format(int(i/2+1), point[i], point[i+1])) if args.save_image: outputFilePath = os.path.join(output_directory, str(frameCount) + ".jpg") cv2.imwrite(outputFilePath, frame) for rectangle in rectangles: fw.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(str(frameCount), rectangle[4], rectangle[0], rectangle[1], rectangle[2], rectangle[3])) fw.close() if args.save_camera_images: sourceFilePath = os.path.join(source_directory, str(frameCount) + ".jpg") cv2.imwrite(sourceFilePath, original_img) frameCount = frameCount + 1 if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break video_capture.release() detect_average_time = detect_totalTime/frameCount print("*" * 50) print("detection average time: " + str(detect_average_time) + "ms" ) print("detection fps: " + str(1000/detect_average_time))
def detectFace(img, threshold): caffe_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, ch = caffe_img.shape scales = tools.calculateScales(img) out = [] t0 = time.time() # del scales[:4] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(caffe_img, (ws, hs)) input = scale_img.reshape(1, *scale_img.shape) ouput = Pnet.predict( input ) # .transpose(0,2,1,3) should add, but seems after process is wrong then. out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i][0][ 0][:, :, 1] # i = #scale, first 0 select cls score, second 0 = batchnum, alway=0. 1 one hot repr roi = out[i][1][0] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) # print('calculating img scale #:', i) cls_prob = np.swapaxes(cls_prob, 0, 1) roi = np.swapaxes(roi, 0, 2) rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) rectangles = tools.NMS(rectangles, 0.7, 'iou') t1 = time.time() print('time for 12 net is: ', t1 - t0) if len(rectangles) == 0: return rectangles crop_number = 0 out = [] predict_24_batch = [] for rectangle in rectangles: crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) crop_number += 1 predict_24_batch = np.array(predict_24_batch) out = Rnet.predict(predict_24_batch) cls_prob = out[ 0] # first 0 is to select cls, second batch number, always =0 cls_prob = np.array(cls_prob) # convert to numpy roi_prob = out[ 1] # first 0 is to select roi, second batch number, always =0 roi_prob = np.array(roi_prob) rectangles = tools.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) t2 = time.time() print('time for 24 net is: ', t2 - t1) if len(rectangles) == 0: return rectangles crop_number = 0 predict_batch = [] for rectangle in rectangles: # print('calculating net 48 crop_number:', crop_number) crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) crop_number += 1 predict_batch = np.array(predict_batch) output = Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] # index # rectangles = tools.filter_face_48net_newdef(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, # threshold[2]) rectangles = tools.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) t3 = time.time() print('time for 48 net is: ', t3 - t2) return rectangles
def detectFace(img, threshold): caffe_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, ch = caffe_img.shape scales = calculateScales(img) out = [] for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(caffe_img, (ws, hs)) input = scale_img.reshape(1, *scale_img.shape) ouput = Pnet.predict(input) out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i][0][0][:, :, 1] roi = out[i][1][0] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) cls_prob = np.swapaxes(cls_prob, 0, 1) roi = np.swapaxes(roi, 0, 2) rectangle = detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) rectangles = NMS(rectangles, 0.7, 'iou') if len(rectangles) == 0: return rectangles crop_number = 0 out = [] predict_24_batch = [] for rectangle in rectangles: crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) crop_number += 1 predict_24_batch = np.array(predict_24_batch) out = Rnet.predict(predict_24_batch) cls_prob = out[ 0] # first 0 is to select cls, second batch number, always =0 cls_prob = np.array(cls_prob) # convert to numpy roi_prob = out[ 1] # first 0 is to select roi, second batch number, always =0 roi_prob = np.array(roi_prob) rectangles = filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles crop_number = 0 predict_batch = [] for rectangle in rectangles: crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) crop_number += 1 predict_batch = np.array(predict_batch) output = Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] rectangles = filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def main(args): detect_totalTime = 0.0 frameCount = 0 # Does there need store result images or not # If yes, check the directory which store result is existed or not # If the directory is existed, delete the directory recursively then recreate the directory. if args.save_image: output_directory = args.save_image print(args.save_image) if os.path.exists(output_directory): shutil.rmtree(output_directory) os.mkdir(output_directory) fw = open( os.path.join(output_directory, args.save_bbox_coordinates + '_dets.txt'), 'w') with tf.device('/cpu:0'): with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: file_paths = get_model_filenames(args.model_dir) # The if else statement is to check which type of model user used. # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models. # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate". # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory. if len(file_paths) == 3: image_pnet = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image_pnet}, mode='test') out_tensor_pnet = pnet.get_all_output() image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3]) rnet = RNet({'data': image_rnet}, mode='test') out_tensor_rnet = rnet.get_all_output() image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3]) onet = ONet({'data': image_onet}, mode='test') out_tensor_onet = onet.get_all_output() saver_pnet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "pnet/" ]) saver_rnet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "rnet/" ]) saver_onet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "onet/" ]) saver_pnet.restore(sess, file_paths[0]) def pnet_fun(img): return sess.run(out_tensor_pnet, feed_dict={image_pnet: img}) saver_rnet.restore(sess, file_paths[1]) def rnet_fun(img): return sess.run(out_tensor_rnet, feed_dict={image_rnet: img}) saver_onet.restore(sess, file_paths[2]) def onet_fun(img): return sess.run(out_tensor_onet, feed_dict={image_onet: img}) else: saver = tf.train.import_meta_graph(file_paths[0]) saver.restore(sess, file_paths[1]) def pnet_fun(img): return sess.run( ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'), feed_dict={'Placeholder:0': img}) def rnet_fun(img): return sess.run(('softmax_1/softmax:0', 'rnet/conv5-2/rnet/conv5-2:0'), feed_dict={'Placeholder_1:0': img}) def onet_fun(img): return sess.run(('softmax_2/softmax:0', 'onet/conv6-2/onet/conv6-2:0', 'onet/conv6-3/onet/conv6-3:0'), feed_dict={'Placeholder_2:0': img}) for filename in os.listdir(args.image_path): img = cv2.imread(os.path.join(args.image_path, filename)) height, width, _ = img.shape orginal_img = img.copy() width = int(width * args.resize) height = int(height * args.resize) resized_image = cv2.resize(img, (width, height)) start_time = time.time() * 1000 # P-Net + R-Net + O-Net if args.net == "ALL": rectangles, points = detect_face( resized_image, args.minsize, pnet_fun, rnet_fun, onet_fun, args.threshold, args.factor) # P-Net + R-Net without faces' landmarks elif args.net == "PR": rectangles = detect_face_24net(resized_image, args.minsize, pnet_fun, rnet_fun, args.threshold, args.factor) # Only P-Net elif args.net == "P": rectangles = detect_face_12net(resized_image, args.minsize, pnet_fun, args.threshold, args.factor) else: print("ERROR: WRONG NET INPUT") end_time = time.time() * 1000 detect_totalTime = detect_totalTime + (end_time - start_time) print( str(frameCount) + " time : " + str(end_time - start_time) + "ms") if args.net == "ALL": points = np.transpose( points ) # The outputs of O-Net which are faces' landmarks else: points = None # the others add_overlays(img, rectangles, points, 1000 / (end_time - start_time), 1 / args.resize, 1 / args.resize) cv2.imshow("MTCNN-Tenssorflow wangbm", img) frameCount = frameCount + 1 if args.save_image: outputFilePath = os.path.join(output_directory, filename) cv2.imwrite(outputFilePath, img) for rectangle in rectangles: fw.write( '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(filename[:-4], rectangle[4], rectangle[0], rectangle[1], rectangle[2], rectangle[3])) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break if args.save_image: fw.close() detect_average_time = detect_totalTime / frameCount print("*" * 50) print("detection average time: " + str(detect_average_time) + "ms") print("detection fps: " + str(1000 / detect_average_time))
def main(args): detect_totalTime = 0.0 totalTime = 0.0 frameCount = 0 if args.save_image: output_directory = args.save_path print(args.save_image) if os.path.exists(output_directory): shutil.rmtree(output_directory) else: os.mkdir(output_directory) with tf.device('/cpu:0'): with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: file_paths = get_model_filenames(args.model_dir) if len(file_paths) == 3: image_pnet = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image_pnet}, mode='test') out_tensor_pnet = pnet.get_all_output() image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3]) rnet = RNet({'data': image_rnet}, mode='test') out_tensor_rnet = rnet.get_all_output() image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3]) onet = ONet({'data': image_onet}, mode='test') out_tensor_onet = onet.get_all_output() saver_pnet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "pnet/" ]) saver_rnet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "rnet/" ]) saver_onet = tf.train.Saver([ v for v in tf.global_variables() if v.name[0:5] == "onet/" ]) saver_pnet.restore(sess, file_paths[0]) def pnet_fun(img): return sess.run(out_tensor_pnet, feed_dict={image_pnet: img}) saver_rnet.restore(sess, file_paths[1]) def rnet_fun(img): return sess.run(out_tensor_rnet, feed_dict={image_rnet: img}) saver_onet.restore(sess, file_paths[2]) def onet_fun(img): return sess.run(out_tensor_onet, feed_dict={image_onet: img}) else: saver = tf.train.import_meta_graph(file_paths[0]) saver.restore(sess, file_paths[1]) def pnet_fun(img): return sess.run( ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'), feed_dict={'Placeholder:0': img}) def rnet_fun(img): return sess.run(('softmax_1/softmax:0', 'rnet/conv5-2/rnet/conv5-2:0'), feed_dict={'Placeholder_1:0': img}) def onet_fun(img): return sess.run(('softmax_2/softmax:0', 'onet/conv6-2/onet/conv6-2:0', 'onet/conv6-3/onet/conv6-3:0'), feed_dict={'Placeholder_2:0': img}) # for filename in os.listdir(args.image_path): video_capture = cv2.VideoCapture(0) if video_capture.isOpened() == False: print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.") else: video_capture.set(3, 1280) video_capture.set(4, 720) while True: ret, frame = video_capture.read() if ret: resized_image = cv2.resize(frame, (640, 360)) # resized_image = cv2.resize(frame, (640, 480)) start_time = time.time() * 1000 # P-Net + R-Net + O-Net if args.net == "ALL": rectangles, points = detect_face( resized_image, args.minsize, pnet_fun, rnet_fun, onet_fun, args.threshold, args.factor) # P-Net + R-Net without faces' landmarks elif args.net == "PR": rectangles = detect_face_24net( resized_image, args.minsize, pnet_fun, rnet_fun, args.threshold, args.factor) # Only P-Net elif args.net == "P": rectangles = detect_face_12net( resized_image, args.minsize, pnet_fun, args.threshold, args.factor) else: print("ERROR: WRONG NET INPUT") end_time = time.time() * 1000 detect_totalTime = detect_totalTime + (end_time - start_time) print( str(frameCount) + " time : " + str(end_time - start_time) + "ms") # print(type(rectangles)) if args.net == "ALL": points = np.transpose( points ) # The outputs of O-Net which are faces' landmarks for rectangle in rectangles: cv2.putText( resized_image, str(rectangle[4]), (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) cv2.rectangle( resized_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1) if args.net == "ALL": for point in points: for i in range(0, 10, 2): cv2.circle( resized_image, (int(point[i]), int(point[i + 1])), 2, (0, 255, 0)) cv2.imshow("MTCNN-Tensorflow wangbm", resized_image) if args.save_image: outputFilePath = os.path.join( output_directory, str(frameCount) + ".jpg") cv2.imwrite(outputFilePath, resized_image) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break frameCount = frameCount + 1 video_capture.release() detect_average_time = detect_totalTime / frameCount print("detection average time: " + str(detect_average_time) + "ms") print("detection fps: " + str(1 / (detect_average_time / 1000)))