def mtcnn_localize_faces(image, pnet, rnet, onet, minsize=20, threshold=[0.7, 0.8, 0.85], factor=0.75): """ Localize faces & its landmarks in image using MTCNN Params :image :minsize - min. face size :threshold - a list/array with 3 values. The thresholds for pnet, rnet & onet, respectively :factor - sclaing factor for image octave Return :bbs - list of bounding boxes :lds - list of face landmarks """ image = image[:, :, 0:3] bounding_boxes, landmarks = detect_face.detect_face( image, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] bbs = list() lds = list() if nrof_faces > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((nrof_faces, 4), dtype=np.int32) lands = np.zeros((nrof_faces, 10), dtype=np.int32) landmarks = np.reshape(landmarks, (nrof_faces, 10)) for i in range(nrof_faces): ## Convert to int32 lands[i] = np.ravel(landmarks[i]) bb[i] = np.ravel(det[i]) # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( image[0]) or bb[i][3] >= len(image): print('face is inner of range!') continue else: ## get as top, right, bottom, left bbs.append((bb[i][1], bb[i][2], bb[i][3], bb[i][0])) lds.append(lands[i]) return bbs, lds
def detect_faces_and_keypoints(self, image): """ Detect faces from a image, return face bounding boxes and face key points. face boxes maybe further filtered with scores and keypoints """ bounding_boxes, markers = detect_face.detect_face( image, self.minsize, self.nets[0], self.nets[1], self.nets[2], self.threshold, self.factor) num_faces = bounding_boxes.shape[0] bboxes = [] scores = [] keypoints = [] for i in range(num_faces): bboxes.append(bounding_boxes[i, 0:4]) scores.append(bounding_boxes[i, 4]) pts = [] for k in range(5): pts.append((int(markers[k, i]), int(markers[5 + k, i]))) keypoints.append(pts) return bboxes, scores, keypoints
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) configs_ = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) configs_.gpu_options.allow_growth = True sess = tf.Session(config=configs_) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def mtcnn(self, image, image_size=182, margin=44, gpu_memory_fraction=1.0, detect_multiple_faces=False): # output_dir = os.path.expanduser(output_dir) # if not os.path.exists(output_dir): # os.makedirs(output_dir) # Store some git revision info in a text file in the log directory # src_path,_ = os.path.split(os.path.realpath(__file__)) # # # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) # dataset = facenet.get_dataset(input_dir) # for f in glob.glob(os.path.join(input_dir,'*.jpg')): # dataset.append(f) # print(dataset) # print('Creating networks and loading parameters') self.detection = True # with tf.Graph().as_default(): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes # random_key = np.random.randint(0, high=99999) # bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_temp.txt') # # with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 # if random_order: # random.shuffle(dataset) for i in range(0, 1): # image_path = dataset nrof_images_total += 1 # filename = os.path.splitext(os.path.split(image_path)[1])[0] # print(filename) # filename = 'temp' # output_filename = os.path.join(output_dir, filename+'.jpg') # print(image_path) # if not os.path.exists(output_filename): # try: # img = misc.imread(image_path) img = image # except (IOError, ValueError, IndexError) as e: # errorMessage = '{}: {}'.format(image_path, e) # print(errorMessage) # else: # if img.ndim<2: # print('Unable to align "%s"' % image_path) # text_file.write('%s\n' % (output_filename)) # continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, self.pnet, self.rnet, self.onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] # scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') scaled = np.array( Image.fromarray(cropped).resize(size=(image_size, image_size))) nrof_successfully_aligned += 1 # filename_base, file_extension = os.path.splitext(output_filename) # if detect_multiple_faces: # output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) # else: # output_filename_n = "{}{}".format(filename_base, file_extension) # misc.imsave(output_filename_n, scaled) # text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: # print('Unable to align') print() # text_file.write('%s\n' % (output_filename)) # print('Total number of images: %d' % nrof_images_total) # print('Number of successfully aligned images: %d' % nrof_successfully_aligned) if nrof_successfully_aligned != 0: self.detection = True return scaled, bb else: return None, None
def main(): if ARGS.test == 'train': train(ARGS) elif ARGS.test == 'gen': gen(ARGS) elif ARGS.test == 'predict': predict(ARGS) elif ARGS.test == 'server': server_start(ARGS) elif ARGS.test == 'server_production': server_start(ARGS, ARGS.port) elif ARGS.test == 'hnm': hnm(ARGS) elif ARGS.test == 'val': val(ARGS) elif ARGS.test == 'fer': fer(ARGS) elif ARGS.test == 'profiling': profiling(ARGS) elif ARGS.test == 'facenet': print(facenet) mtcnn = tf.Session() pnet, rnet, onet = FaceDetector.create_mtcnn(mtcnn, None) # Load the model t_ = time.time() print('Loading model...') #fnet = facenet.load_model('../models/facenet/20180204-160909') # squeezenet fnet = facenet.load_model( '../models/facenet/20170512-110547.pb') # InceptionResnet V1 t_ = time.time() - t_ print('done', t_ * 1000) stats = { 'same': { 'd_avg': 0., 'd_max': -9999., 'd_min': 9999., 'sqr_avg': 0., 'count': 0, }, 'diff': { 'd_avg': 0., 'd_max': -9999., 'd_min': 9999., 'sqr_avg': 0., 'count': 0, }, 'precision': {}, 'timing': { 'count': 0, 'forward': 0. } } if True: # Get input and output tensors emb = None names = [] for iteration in range(16): # Load faces from LFW dataset and parse their names from path to group faces images = [] batch_size = 128 fid = 0 for i in range(batch_size): f = DirectoryWalker().get_a_file( directory='../data/face/lfw', filters=['.jpg']) if f is None or not f.path: break n = os.path.split(os.path.split(f.path)[0])[1] #print('name', n) n = abs(hash(n)) % (10**8) img = cv2.imread(f.path, 1) img = img extents, landmarks = FaceDetector.detect_face( img / 255., 120, pnet, rnet, onet, threshold=[0.6, 0.7, 0.9], factor=0.6, interpolation=cv2.INTER_LINEAR) for j, e in enumerate(extents): x1, y1, x2, y2, confidence = e.astype(dtype=np.int) #print(len(landmarks[j])) #cropped = img[int(x1):int(x2), int(y1):int(y2), :] aligned = FaceApplications.align_face(img, landmarks[j], intensity=1., sz=160, ortho=True, expand=1.5) #cv2.imwrite('../data/face/mtcnn_cropped/'+str(fid).zfill(4)+'.jpg', aligned) images.append(aligned / 255.) names.append(n) """debug = aligned.astype(dtype=np.int) print('debug', debug) for p in debug: cv2.circle(img, (p[0], p[1]), 2, (255, 0, 255)) for p in landmarks[j]: cv2.circle(img, (p[0], p[1]), 2, (255, 255, 0))""" fid += 1 #cv2.imwrite('../data/face/mtcnn_cropped/'+str(i).zfill(4)+'-annotated.jpg', img) # Run forward pass to calculate embeddings if len(images): t_ = time.time() if emb is None: emb = fnet(images) else: emb = np.concatenate((emb, fnet(images))) #emb = emb + sess.run(embeddings, feed_dict=feed_dict) t_ = time.time() - t_ stats['timing']['count'] += len(images) stats['timing']['forward'] += t_ * 1000 print('forward', emb.shape, t_ * 1000) print() print() print('avg. forward time:', stats['timing']['forward'] / stats['timing']['count']) # Test distance samples = sklearn.preprocessing.normalize(emb) for i1, s1 in enumerate(samples): for i2, s2 in enumerate(samples): if i1 != i2: d_ = scipy.spatial.distance.cosine(s1, s2) if names[i1] == names[ i2]: # Same person as annotated by LFW cate = 'same' else: # Different person cate = 'diff' c_ = stats[cate]['count'] stats[cate]['d_avg'] = stats[cate]['d_avg'] * c_ / ( c_ + 1) + d_ / (c_ + 1) d_sqr = d_ * d_ stats[cate]['sqr_avg'] = stats[cate][ 'sqr_avg'] * c_ / (c_ + 1) + d_sqr / (c_ + 1) if d_ > stats[cate]['d_max']: stats[cate]['d_max'] = d_ elif d_ < stats[cate]['d_min']: stats[cate]['d_min'] = d_ stats[cate]['count'] += 1 # Get statistics of precision on different thresholds increments = 64 for t_ in range(increments): threshold = 0.2 + t_ * (0.6 / increments) if threshold not in stats['precision']: stats['precision'][threshold] = { 'correct': 0, 'total': 0, 'precision': 0., 'true_pos': 0, 'total_pos': 0, 'recall': 0., } if (cate == 'same' and d_ <= threshold) or ( cate == 'diff' and d_ > threshold): stats['precision'][threshold]['correct'] += 1 if cate == 'same': if d_ <= threshold: stats['precision'][threshold][ 'true_pos'] += 1 stats['precision'][threshold]['total_pos'] += 1 stats['precision'][threshold][ 'recall'] = stats['precision'][threshold][ 'true_pos'] / stats['precision'][ threshold]['total_pos'] stats['precision'][threshold]['total'] += 1 stats['precision'][threshold]['precision'] = stats[ 'precision'][threshold]['correct'] / stats[ 'precision'][threshold]['total'] """tree = scipy.spatial.KDTree(samples) for i, s in enumerate(samples): print(i, tree.query(s))""" for cate in ['same', 'diff']: stats[cate]['stddev'] = stats[cate][ 'sqr_avg'] - stats[cate]['d_avg'] * stats[cate]['d_avg'] print() pp = pprint.PrettyPrinter(indent=4) pp.pprint(stats) # Print precision vs recall print() print('threshold,recall,precision') for t in stats['precision']: t_stat = stats['precision'][t] print( str(t) + ',' + str(t_stat['recall']) + ',' + str(t_stat['precision'])) elif ARGS.test == 'align': face_app = FaceApplications() face_app.align_dataset() elif ARGS.test == 'fxpress': fxpress(ARGS) elif ARGS.test == 'fxpress_train': fxpress_train(ARGS) elif ARGS.test == 'emoc': classifier = EmotionClassifier() classifier.build_network(ARGS) classifier.val(ARGS) elif ARGS.test == 'face_app': face_app = FaceApplications() face_app.detect() elif ARGS.test == 'face_benchmark': # Test different parameters, resolutions, interpolation methods for MTCNN face detection time vs precision interpolations = ['NEAREST', 'LINEAR', 'AREA'] resolutions = [256, 320, 384, 448, 512, 640, 1024, 1280] factors = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] expectations = [ ['0001.jpg', 4], ['0002.jpg', 1], ['0003.jpg', 1], ['0004.jpg', 0], ['0005.jpg', 1], ['0006.jpg', 1], ['0007.jpg', 59], ['0008.jpg', 6], ['0009.jpg', 1], ['0010.jpg', 5], ['0011.jpg', 4], ['0012.jpg', 17], ['0013.jpg', 20], ['0014.jpg', 48], ['0015.jpg', 22], ] log_file = open('../data/face_benchmark.csv', 'w') log_file.write('time,precision_index,cp,options\n') for interp in interpolations: for res_cap in resolutions: for factor in factors: #res_cap = resolutions[0] #factor = factors[0] #interp = interpolations[0] options = { 'res_cap': res_cap, 'factor': factor, 'interp': interp, } if interp == 'NEAREST': interpolation = cv2.INTER_NEAREST elif interp == 'LINEAR': interpolation = cv2.INTER_LINEAR elif interp == 'AREA': interpolation = cv2.INTER_AREA iterations = 20 file_count = len(expectations) time_sampling = np.zeros(( file_count, iterations, ), dtype=np.float) pi_sampling = np.zeros(( file_count, iterations, ), dtype=np.float) image_dir = '../data/face_benchmark/' for k, item in enumerate(expectations): filename, expected_faces = item inpath = image_dir + filename img = cv2.imread(inpath, 1) if img is None: break img, scaling = ImageUtilities.fit_resize( img, maxsize=(res_cap, res_cap), interpolation=interpolation) retval, bindata = cv2.imencode('.jpg', img) bindata_b64 = base64.b64encode(bindata).decode() requests = { 'requests': [{ 'requestId': str(uuid.uuid1()), 'media': { 'content': bindata_b64 }, 'services': [{ 'type': 'face_', 'model': 'a-emoc', 'options': options }] }], 'timing': { 'client_sent': time.time() } } for i in range(iterations): #url = 'http://10.129.11.4/cgi/predict' requests['timing']['client_sent'] = time.time() url = 'http://192.168.41.41:8080/predict' postdata = json.dumps(requests) #print() #print(postdata) #print() request = Request(url, data=postdata.encode()) response = json.loads( urlopen(request).read().decode()) timing = response['timing'] server_time = timing['server_sent'] - timing[ 'server_rcv'] #print('server time:', server_time) total_time = (time.time() - timing['client_sent']) * 1000 client_time = total_time - server_time print('response time:', total_time) pi = 0. for r_ in response['requests']: for s_ in r_['services']: rects_ = s_['results']['rectangles'] if expected_faces: pi = len(rects_) / expected_faces elif len(rects_): pi = expected_faces / len(rects_) else: pi = 1.0 #print('faces detected:', len(rects_), pi) time_sampling[k][i] = total_time pi_sampling[k][i] = pi #time.sleep(0.5) #print() #print(response) #print() time_mean = np.mean(time_sampling) pi_mean = np.mean(pi_sampling) * 100 cp = pi_mean * pi_mean / time_mean print(time_mean, pi_mean) print() log_file.write(','.join([ str(time_mean), str(pi_mean), str(cp), json.dumps(options) ]) + '\n') log_file.flush() log_file.close()
def age_gender(): sample_url = url_for('uploaded_file', filename="sample.jpeg") if request.method == 'POST': if 'file' not in request.files: return render_template("face.html", error_msg="No file has been selected!") file = request.files['file'] if file and allowed_file(file.filename): with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = mtcnn.create_mtcnn( sess, MTCNN_MODEL_PATH) filename = file.filename file_name = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_name) start = time.time() # Resize the orginal image to 680-width and save as jpeg cv_ori = cv2.imread(file_name) cv_resize = cv2.resize( cv_ori, dsize=(680, int(cv_ori.shape[0] / cv_ori.shape[1] * 680)), interpolation=cv2.INTER_CUBIC) cv2.imwrite(file_name, cv_resize) # Read the image for face detection img = misc.imread(file_name) cv_img = cv2.imread(file_name) # Detect faces and landmarks by MTCNN bounding_boxes, landmarks = mtcnn.detect_face( img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) # Crop the aligned faces for age and gender classification aligned_images = gender_age_predict.load_image(cv_img, landmarks) # Estimate gender and age of each face using ResNet50 genders, ages = gender_age_predict.inception( FROZEN_GRAPH_PATH, aligned_images) # Draw boxes and labels for faces gender_age_predict.draw_label(cv_img, bounding_boxes, genders, ages) save_path = os.path.join(app.config['NEW_FOLDER'], filename) cv2.imwrite(save_path, cv_img) end = time.time() print('\n Evaluation time: {:.3f}s\n'.format(end - start)) file_url = url_for('uploaded_file', filename=filename) return render_template("face.html", user_image=file_url, error_msg='') else: print('\n Incorrect upload image format.\n') return render_template("face.html", user_image=sample_url, error_msg='Incorrect image format!') return render_template("face.html", user_image=sample_url, error_msg='')
def detect_faces(self, image): with self.graph.as_default(): with self.sess.as_default(): boxes, points = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) return self._assemble_detect_face_result(boxes, points)
def cam_mtcnn(draw): minsize = 40 # minimum size of face threshold = [0.6, 0.7, 0.9] # three steps's threshold factor = 0.709 # scale factor #draw = cv2.resize(draw, (960, 540)) #img=cv2.cvtColor(draw,cv2.COLOR_BGR2GRAY) original = draw.copy() bounding_boxes, points = detect_face.detect_face(draw, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] w, h, _ = draw.shape face = [] for b in bounding_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0)) # [y1:y2, x1:x2] zeros = np.zeros((w, h)) # face_w = int(b[2]) - int(b[0]) # face_h = int(b[3]) - int(b[1]) # # if face_w > w: # face_w = w # if face_h > h: # face_h = h face_h, face_w = zeros[int(b[1]):int(b[3]), int(b[0]):int(b[2])].shape ones = np.ones((face_h, face_w)) zeros[int(b[1]):int(b[3]), int(b[0]):int(b[2])] = ones face_mask = zeros face = original[int(b[1]):int(b[3]), int(b[0]):int(b[2])] if len(points) != 0 and len(face) != 0: for p in points.T: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) # print (points.shape) size = 30 i = 0 cv2.rectangle(draw, (int(p[i] - size), int(p[i + 5] - size)), (int(p[i] + size), int(p[i + 5] + size)), (0, 255, 0)) left_eye = [p[i], p[i + 5]] eye_left = original[int(p[i + 5] - size):int(p[i + 5] + size), int(p[i] - size):int(p[i] + size)] i = 1 cv2.rectangle(draw, (int(p[i] - size), int(p[i + 5] - size)), (int(p[i] + size), int(p[i + 5] + size)), (0, 255, 0)) right_eye = [p[i], p[i + 5]] eye_right = original[int(p[i + 5] - size):int(p[i + 5] + size), int(p[i] - size):int(p[i] + size)] # cv2.namedWindow('Face Detection',cv2.WINDOW_NORMAL) # cv2.resizeWindow('Face Detection', 1920, 1080) # cv2.imshow('Face Detection',draw) # disp_img("face", face) # disp_img("eye_left", eye_left) # disp_img("eye_right", eye_right) # disp_img("face_mask", face_mask) # disp_img("original", original) # # cv2.waitKey(0) return [ original, draw, face, eye_left, eye_right, face_mask, left_eye, right_eye ] else: return []
def convert_to_aligned_face(data_set, base_path, dataset_name): file_name = data_set.file_name genders = data_set.gender ages = data_set.age face_score = data_set.score num_images = data_set.shape[0] if dataset_name == "imdb": data_base_dir = os.path.join(base_path, "imdb_crop") elif dataset_name == "wiki": data_base_dir = os.path.join(base_path, "wiki_crop") else: raise NameError # load the mtcnn face detector with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = mtcnn.create_mtcnn(sess, MTCNN_MODEL_PATH) error_count = 0 write_count = 0 for index in range(num_images): if face_score[index] < 0.75: continue if ~(0 <= ages[index] <= 100): continue if np.isnan(genders[index]): continue try: # Read the image for face detection img = misc.imread( os.path.join(data_base_dir, str(file_name[index][0]))) cv_img = cv2.imread( os.path.join(data_base_dir, str(file_name[index][0])), cv2.IMREAD_COLOR) # Detect faces for age and gender classification bounding_boxes, landmarks = mtcnn.detect_face( img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) if bounding_boxes.shape[0] != 1: continue else: # Crop aligned faces from image aligned_faces = load_image(cv_img, landmarks) face = aligned_faces[0] # Resize and write image to path output_dir = os.getcwd() + '/faces/' + dataset_name + '/' if os.path.isdir(output_dir): pass else: os.mkdir(output_dir) image_name = 'image{}_{}_{}.jpg'.format( index + 70000, int(genders[index]), ages[index]) output_path = output_dir + image_name cv2.imwrite(output_path, face) except Exception: # some files seem not exist in face_data dir error_count = error_count + 1 print("read {} error".format(index + 1)) pass write_count = write_count + 1 print("There are ", error_count, " missing pictures") print("Found", write_count, "valid faces")
def detect(self, media): timing = dict() result = dict({ 'mtcnn': list(), 'mtcnn_5p': list(), 'emotions': list(), }) #print(media) src_img = None if 'content' in media: bindata = base64.b64decode(media['content'].encode()) src_img = cv2.imdecode(np.frombuffer(bindata, np.uint8), 1) if src_img is not None: #print(img.shape) src_shape = src_img.shape time_start = time.time() gray = ImageUtilities.preprocess(src_img, convert_gray=cv2.COLOR_RGB2YCrCb, equalize=False, denoise=False, maxsize=384) time_diff = time.time() - time_start timing['preprocess'] = time_diff * 1000 #print('preprocess', time_diff) processed_shape = gray.shape mrate = [ processed_shape[0] / src_shape[0], processed_shape[1] / src_shape[1] ] time_start = time.time() rects, landmarks = FaceDetector().detect(gray) time_diff = time.time() - time_start timing['detect'] = time_diff * 1000 #print('hog+svm detect', time_diff) time_start = time.time() facelist = list() rects_ = list() predictions_ = list() # Crop faces from source image for rect in rects: face = None (x, y, w, h) = ImageUtilities.rect_to_bb(rect, mrate=mrate) height, width, *rest = src_img.shape (x, y, w, h) = ImageUtilities.rect_fit_ar([x, y, w, h], [0, 0, width, height], 1., mrate=1.) if w > 0 and h > 0: face = ImageUtilities.transform_crop((x, y, w, h), src_img, r_intensity=0., p_intensity=0.) face = imresize(face, shape_raw[0:2]) #face = ImageUtilities.preprocess(face, convert_gray=None) if face is not None: facelist.append(face) rects_.append([x, y, w, h]) predictions_.append([0., 0.]) val_data = np.array(facelist, dtype=np.float32) / 255 reshaped = val_data.reshape((-1, ) + shape_flat) time_diff = time.time() - time_start timing['crop'] = time_diff * 1000 #print('prepare data for cnn', time_diff) # MTCNN img = ImageUtilities.preprocess(src_img, convert_gray=None, equalize=False, denoise=False, maxsize=384) mrate_ = src_shape[0] / img.shape[0] time_start = time.time() minsize = 40 # minimum size of face threshold = [0.6, 0.7, 0.9] # three steps's threshold factor = 0.709 # scale factor img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) bounding_boxes, points = mtcnn_detect.detect_face( img, minsize, self.pnet, self.rnet, self.onet, threshold, factor) if len(bounding_boxes): points = np.array(points) * mrate_ points = points.reshape(2, -1) points = np.transpose(points) points = points.reshape((len(bounding_boxes), -1, 2)) time_diff = time.time() - time_start timing['mtcnn'] = time_diff * 1000 timing['emotion'] = 0 print() print() print('result len', len(bounding_boxes), len(points)) nrof_faces = bounding_boxes.shape[0] for i, b in enumerate(bounding_boxes): r_ = (np.array([b[0], b[1], b[2] - b[0], b[3] - b[1]]) * mrate_).astype(dtype=np.int).tolist() result['mtcnn'].append(r_ + [ int(b[4] * 1000), ]) result['mtcnn_5p'].append( points[i].astype(dtype=np.int).tolist()) #rects_.append(r_) #predictions_.append([0., 2.]) # Facial Expression time_start = time.time() (x, y, w, h) = ImageUtilities.rect_fit_ar( r_, [0, 0, src_shape[1], src_shape[0]], 1., crop=False) if w > 0 and h > 0: face = ImageUtilities.transform_crop((x, y, w, h), src_img, r_intensity=0., p_intensity=0.) #face = imresize(face, shape_raw[0:2]) #face = ImageUtilities.preprocess(face, convert_gray=cv2.COLOR_RGB2YCrCb, equalize=False, denoise=False) face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) cv2.imwrite('./face.jpg', face) #face = np.array(face, dtype=np.float32)/255 face = cv2.resize(face, (48, 48), interpolation=cv2.INTER_CUBIC) / 255. if face is not None: #emotions = self.fxpress.predict(face)[0] emotions = self.emoc.predict(face) print('emotion', face.shape, emotions) result['emotions'].append( (np.array(emotions) * 1000).astype(dtype=np.int).tolist()) time_diff = time.time() - time_start timing['emotion'] += time_diff * 1000 print('rect', b, r_) print('emotions', result['emotions']) print('mtcnn_5p', result['mtcnn_5p']) print() print() # Self-trained cascade face detection img = ImageUtilities.preprocess(src_img, convert_gray=None, equalize=False, denoise=False, maxsize=384) ms_rects, ms_predictions, ms_timing = self.multi_scale_detection( img, expanding_rate=1.2, stride=12) mrate_ = src_shape[0] / img.shape[0] timing['cnn'] = ms_timing['cnn'] timing['window_count'] = ms_timing['window_count'] use_nms = True if len(ms_predictions): if use_nms: # Apply non-maximum-supression scores = np.array( ms_predictions)[:, target_class:target_class + 1].reshape( (-1, )) nms = tf.image.non_max_suppression(np.array(ms_rects), scores, iou_threshold=0.5, max_output_size=99999) for index, value in enumerate(nms.eval()): r_ = (np.array(ms_rects[value]) * mrate_).astype(dtype=np.int).tolist() p_ = ms_predictions[value] rects_.append(r_) predictions_.append(p_) else: for index, p_ in enumerate(ms_predictions): r_ = (np.array(ms_rects[index]) * mrate_).astype(dtype=np.int).tolist() rects_.append(r_) predictions_.append(p_) """time_start = time.time() feed_dict = {self.model.x: val_data.reshape((-1,)+shape_flat)} predictions = self.model.sess.run(self.model.y, feed_dict) time_diff = time.time() - time_start timing['cnn'] = time_diff*1000 #print('cnn classify', time_diff, len(facelist)) #print('predictions', predictions)""" return (rects_, predictions_, timing, result)