def main(): align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor)) landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE dataset = facenet.get_dataset(FLAGS.input_dir) # Scale the image such that the face fills the frame when cropped to crop_size scale = float(FLAGS.face_size) / FLAGS.image_size for cls in dataset: output_class_dir = os.path.join(os.path.expanduser(FLAGS.output_dir), cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') if not os.path.exists(output_filename): print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim == 2: img = facenet.to_rgb(img) if FLAGS.use_new_alignment: aligned = align.align_new(FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=True, scale=scale) else: aligned = align.align(FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=True, scale=scale) if aligned is not None: misc.imsave(output_filename, aligned)
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum( det[0] * (1 - args.margin / 2), 0) bb[1] = np.maximum( det[1] * (1 - args.margin / 2), 0) bb[2] = np.minimum( det[2] * (1 + args.margin / 2), img_size[1]) bb[3] = np.minimum( det[3] * (1 + args.margin / 2), img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] new_size = list(cropped.shape[0:2]) if cropped.shape[0] > cropped.shape[1]: new_size[0] = int(args.image_size * cropped.shape[0] / cropped.shape[1]) new_size[1] = args.image_size else: new_size[0] = args.image_size new_size[1] = int(args.image_size * cropped.shape[1] / cropped.shape[0]) scaled = misc.imresize(cropped, new_size, interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def one_by_one(rel_path, url=False): print('Start Recognition!') prevTime = 0 # TODO: support multiple url if url: img_list = [None] else: img_list = glob.glob(os.path.join(rel_path, '*')) results = list() # cnt = 0 # ok_list = list() for img_path in img_list: # for each image in the list res = None # print('===', url) if url: try: rsp = urlget(rel_path) # print(rsp) if rsp.status_code == 200: frame = np.array(Image.open(BytesIO(rsp.content))) else: print('status code: ', rsp.status_code) exit(-1) except Exception as e: print(repr(e)) exit(-1) else: frame = cv2.imread(img_path) # ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) if frame is None: print( f'failure in reading image {img_path}, do not use chinese characters in file name!' ) continue curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): # detect faces in the current image find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): # crop all the faces emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is out of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append( facenet.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize( scaled[0], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[0], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if i == 0: res = best_class_indices[0] # ok_list.append(cnt) # cnt += 1 cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 # print('result: ', best_class_indices[0]) if show_flag: for H_i in class_names: if class_names[best_class_indices[0]] == H_i: result_names = class_names[ best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('No face detected.') exit(-1) sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 if show_flag: cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(0) & 0xFF == ord('q'): break a, b, m, n = bb[0] if res is not None: results.append([res] + list(predictions[0]) + [m - a, n - b]) else: # results.append([res]*10) print(f'cannot detect any face for {img_path}, skip') continue # video_capture.release() # #video writer # out.release() try: cv2.destroyAllWindows() except: pass # pred = np.zeros_like(img_list) # print(len(ok_list),len(results)) # pred[ok_list] = results # print(pred) if len(results) == 0: return None results = np.array(results) # print(results.shape) # print(results) # labels = [class_names[int(i)] if i is not None else None for i in results[:,0]] # comb = np.concatenate([np.array(img_list).reshape((-1,1)),np.array(labels).reshape((-1,1)), results[:,1:]], axis=1)#list(zip(img_list, results)) # pd.DataFrame(comb).to_csv(args.output_file + '.csv', index=False, header=header) comb = results[:, 1:] # 1,9 df = pd.DataFrame(comb) ret = df.apply(proc_line, axis=1) # return df.iloc[:,:-2].values, ret.values return ret.values
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, '../../data/') minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det = det[index,:] det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def _main(): args = get_args() with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 print('Loading feature extraction model') modeldir = './models/facenet/20190310-055158' facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = './myclassifier/my_classifier.pkl' classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('load classifier file-> %s' % classifier_filename_exp) video_capture = cv2.VideoCapture(0) c = 0 print('Start Recognition!') prevTime = 0 myYolo = YOLO(args) while True: ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] #print(frame.shape[0]) #print(frame.shape[1]) image = Image.fromarray(frame) img, bounding_boxes = myYolo.detect_image(image) # Remove the bounding boxes with low confidence nrof_faces = len(bounding_boxes) ## Use MTCNN to get the bounding boxes # bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) # nrof_faces = bounding_boxes.shape[0] #print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: # det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] # cropped = [] # scaled = [] # scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = bounding_boxes[i][0] bb[i][1] = bounding_boxes[i][1] bb[i][2] = bounding_boxes[i][2] bb[i][3] = bounding_boxes[i][3] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) # cropped[0] = facenet.flip(cropped[0], False) # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), # interpolation=cv2.INTER_CUBIC) # scaled[0] = facenet.prewhiten(scaled[0]) # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3])) cropped = facenet.flip(cropped, False) scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear')) scaled = cv2.resize(scaled, (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 # for H_i in HumanNames: # if HumanNames[best_class_indices[0]] == H_i: result_names = class_names[best_class_indices[0]] if best_class_probabilities[0] > 0.45 else "Unknown" #print(result_names) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() # #video writer # out.release() cv2.destroyAllWindows()
def main(argv=None): align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor)) landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE output_dir = os.path.expanduser(FLAGS.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) store_revision_info(src_path, output_dir, ' '.join(argv)) dataset = facenet.get_dataset(FLAGS.input_dir) # Scale the image such that the face fills the frame when cropped to crop_size scale = float(FLAGS.face_size) / FLAGS.image_size for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim == 2: img = facenet.to_rgb(img) if FLAGS.use_new_alignment: aligned = align.align_new(FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=True, scale=scale) else: aligned = align.align(FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=True, scale=scale) if aligned is not None: print(image_path) misc.imsave(output_filename, aligned) elif FLAGS.prealigned_path: # Face detection failed. Use center crop from pre-aligned dataset class_name = os.path.split(output_class_dir)[1] image_path_without_ext = os.path.join(os.path.expanduser(FLAGS.prealigned_path), class_name, filename) # Find the extension of the image exts = ('jpg', 'png', 'gif') for ext in exts: temp_path = image_path_without_ext + '.' + ext image_path = '' if os.path.exists(temp_path): image_path = temp_path break try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: scaled = misc.imresize(img, FLAGS.prealigned_scale, interp='bilinear') sz1 = scaled.shape[1]/2 sz2 = FLAGS.image_size/2 cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:] print(image_path) misc.imsave(output_filename, cropped)
def collect_data(self): output_dir = os.path.expanduser(self.output_datadir) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = facenet.get_dataset(self.input_datadir) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext( os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print("Image: %s" % image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('No of Detected Face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = misc.imresize( cropped_temp, (image_size, image_size), interp='bilinear') nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled_temp) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) return (nrof_images_total, nrof_successfully_aligned)
def main(args): # count = 1 gpuid = 0 detector = RetinaFace( '/home/tmt/Documents/insightface/RetinaFace/model/retinaface-R50/', 0, gpuid, 'net3') sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') # with tf.Graph().as_default(): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # with sess.as_default(): # pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in tqdm(dataset): output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: thresh = 0.8 scales = [1024, 1980] nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: # img = misc.imread(image_path) img = cv2.imread(image_path) print('image shape', img.shape) im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 # if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) print('im_scale', im_scale) scales = [im_scale] flip = False except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, landmarks = detector.detect( img, thresh, scales=scales, do_flip=flip) # bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) print('-------------bounding----------------', bounding_boxes) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) # some extra weight on the centering index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def align_frames(input_dir, output_dir, image_size=182, margin=44, gpu_memory_fraction=1.0): sleep(random.random()) output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_paths = facenet.get_image_paths(input_dir) tf.logging.set_verbosity(tf.logging.ERROR) with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face # first step shallow cnn to detect face windows # second step deep cnn to throw out non face windows # third step detect face landmarks # trying to raise second step threshold threshold = [0.6, 0.7, 0.7 ] # threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) bar = progressbar.ProgressBar(maxval=len(image_paths), widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if not os.path.exists(output_dir): os.makedirs(output_dir) for image_path in image_paths: nrof_images_total += 1 bar.update(nrof_images_total) filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename_prefix = os.path.join(output_dir, filename) # print(image_path) if not os.path.exists(output_filename_prefix): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: # print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename_prefix)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: for i in range(nrof_faces): # NEW det = bounding_boxes[i, 0:4] img_size = np.asarray(img.shape)[0:2] '''if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det = det[index,:]''' det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') nrof_successfully_aligned += 1 output_filename = output_filename_prefix + '_' + str( i) + '.png' misc.imsave(output_filename, scaled) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3])) else: # print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename_prefix)) bar.finish()
def main(args): if len(args) < 2: print("Usage: " + args[0] + " <image>") return img_path = args[1] config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Graph().as_default(): with tf.Session(config=config) as sess: np.random.seed(666) # 0. Read image try: img = misc.imread(img_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(img_path, e) print(errorMessage) return if img.ndim < 2: print('Unable to align "%s"' % image_path) return elif img.ndim == 2: img = facenet.to_rgb(img) elif len(img.shape) > 2 and img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) [h, w] = np.asarray(img.shape)[0:2] # 1. Detect Face pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.8, 0.85, 0.85] # three steps's threshold factor = 0.709 # scale factor bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] faces = [] boxes = [] if nrof_faces > 0: for i in range(nrof_faces): det = np.squeeze(bounding_boxes[i, 0:4]) #y0 = int(det[1] * h) #y1 = int(det[3] * h) #x0 = int(det[0] * w) #x1 = int(det[2] * w) x0 = max(int(det[0]) - 20, 0) x1 = min(int(det[2]) + 20, w - 1) y0 = max(int(det[1]) - 20, 0) y1 = min(int(det[3]) + 20, h - 1) [x0, y0, x1, y1] = get_square_box(x0, y0, x1, y1, w, h) print( str(x0) + " " + str(y0) + " " + str(x1) + " " + str(y1)) cropped = img[y0:y1, x0:x1, :] scaled = misc.imresize(cropped, (160, 160), interp='bilinear') prew = facenet.prewhiten(scaled) faces.append(prew) boxes.append([x0, y0, x1, y1]) misc.imsave("roi" + str(i) + ".png", prew) # 2. Recognize Face # Load the model print('Loading feature extraction model') facenet.load_model( '../models/facenet/20170512-110547/20170512-110547.pb') # facenet.load_model('../models/facenet/20170511-185253/20170511-185253.pb') # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] emb_array = np.zeros((len(faces), embedding_size)) # Run forward pass to calculate embeddings print('Calculating features for images') feed_dict = { images_placeholder: faces, phase_train_placeholder: False } emb_array[:, :] = sess.run(embeddings, feed_dict=feed_dict) # Load embeddings from file and concatenate with computed embeddings # with open('../models/emb_array.bin', 'rb') as infile: # emb_array_cls = pickle.load(infile) # print(emb_array_cls) # emb_arrys = np.concatenate((emb_array, emb_array_cls), axis=0) img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED) with open('../models/facenet/lfw_classifier-20170512-110547.pkl', 'rb') as infile: # with open('../models/facenet/lfw_classifier-20170511-185253.pkl', 'rb') as infile: (model, class_names) = pickle.load(infile) predictions = model.predict_proba(emb_array) # Print all prediction in sorted order # sorted_class_indices = np.argsort(predictions, axis=1) # for i in range(len(predictions)): # for j in range(len(class_names)): # print('%.4f %s' % (predictions[i][sorted_class_indices[i][j]], class_names[sorted_class_indices[i][j]])) # print("----------") # print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #predictions = model.predict(emb_array) #best_class_indices = predictions #best_class_probabilities = predictions for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) vis_util.draw_bounding_box_on_image_array( img, boxes[i][1], boxes[i][0], boxes[i][3], boxes[i][2], "red", 3, [ class_names[best_class_indices[i]], "{:.3f}".format(best_class_probabilities[i]) ], False) cv2.imwrite("img.png", img)
def main(args): sleep(random.random()) # 如果还没有输出文件夹,则创建 # 设置对齐后的人脸图像存放的路径 output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # 在日志目录的文本文件中存储一些Git修订信息 # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) # 在output_dir文件夹下创建revision_info.txt文件,里面存的是执行该命令时的参数信息 # 当前使用的tensorflow版本,git hash,git diff facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) # 获取数据集下所有人名和其人名目录下是所有图片, # 放到ImageClass类中,再将类存到dataset列表里 dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') '''2、建立MTCNN网络,并预训练(即使用训练好的网络初始化参数)''' with tf.Graph().as_default(): # 设置Session的GPU参数,每条线程分配多少显存 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # 获取P-Net,R-Net,O-Net网络 pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face最小尺寸 threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold阈值 factor = 0.709 # scale factor 比例因子 # Add a random key to the filename to allow alignment using multiple processes # 获取一个随机数,用于创建下面的文件名 random_key = np.random.randint(0, high=99999) # 将图片和求得的相应的Bbox保存到bounding_boxes_XXXXX.txt文件里 bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) '''3、每个图片中人脸所在的边界框写入记录文件中''' with open(bounding_boxes_filename, "w") as text_file: # 处理图片的总数量 nrof_images_total = 0 nrof_successfully_aligned = 0 # 是否对所有图片进行洗牌 if args.random_order: random.shuffle(dataset) # 获取每一个人,以及对应的所有图片的绝对路径 for cls in dataset: # 每一个人对应的输出文件夹 output_class_dir = os.path.join(output_dir, cls.name) # 如果目的文件夹里还没有相应的人名的文件夹,则创建相应文件夹 if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) #遍历每一张图片 for image_path in cls.image_paths: nrof_images_total += 1 # 对齐后的图片文件名 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: # 读取图片文件 img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] # img = misc.imresize(img,0.8) #plt.imshow(img) #plt.show() # 检测人脸,bounding_boxes可能包含多张人脸框数据, # 一张人脸框有5个数据,第一和第二个数据表示框左上角坐标,第三个第四个数据表示框右下角坐标, # 最后一个数据应该是可信度 # 人脸检测 bounding_boxes:表示边界框 形状为[n,5] 5对应x1,y1,x2,y2,score # _:人脸关键点坐标 形状为 [n,10] bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) # ###################仿射变换########################### rows,cols,hn = img.shape _new = np.transpose(_) # (10,2)->(2,10) for i in range(len(_new)): # print("左眼的位置(%s,%s)" %(_new[i,0],_new[i,5])) # print("右眼的位置(%s,%s)" %(_new[i,1],_new[i,6])) eye_center_x = (_new[i, 0] + _new[i, 1]) * 0.5 eye_center_y = (_new[i, 5] + _new[i, 6]) * 0.5 dy = _new[i, 5] - _new[i, 6] dx = _new[i, 0] - _new[i, 1] angle = math.atan2(dy, dx) * 180.0 / math.pi + 180.0 #print("旋转角度为%s" % angle) M = cv2.getRotationMatrix2D((eye_center_x, eye_center_y), angle, 1) dst = cv2.warpAffine(img, M, (cols, rows)) #################################################### bounding_boxes, _ = align.detect_face.detect_face(dst, minsize, pnet, rnet, onet, threshold, factor) # 获得的人脸数量(#边界框个数) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: # [n,4] 人脸框 det = bounding_boxes[:,0:4] # 保存所有人脸框 det_arr = [] # 原图片大小 img_size = np.asarray(dst.shape)[0:2] #img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: # 一张图片中检测多个人脸 if args.detect_multiple_faces: # 如果要检测多张人脸的话 for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: # 即使有多张人脸,也只要一张人脸就够了 # 获取人脸框的大小 bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) # 原图片中心坐标 img_center = img_size / 2 # 求人脸框中心点相对于图片中心点的偏移, # (det[:,0]+det[:,2])/2和(det[:,1]+det[:,3])/2组成的坐标其实就是人脸框中心点 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) # 求人脸框中心到图片中心偏移的平方和 # 假设offsets=[[ 4.20016056 145.02849352 -134.53862838] [ -22.14250919 -26.74770141 -30.76835772]] # 则offset_dist_squared=[ 507.93206189 21748.70346425 19047.33436466] offset_dist_squared = np.sum(np.power(offsets,2.0),0) # 用人脸框像素大小减去偏移平方和的两倍,得到的结果哪个大就选哪个人脸框 # 其实就是综合考虑了人脸框的位置和大小,优先选择框大,又靠近图片中心的人脸框 index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: # 只有一个人脸框的话,那就没得选了 det_arr.append(np.squeeze(det)) # 遍历每一个人脸框 for i, det in enumerate(det_arr): # [4,] 边界框扩大margin区域,并进行裁切 det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) # 边界框周围的裁剪边缘,就是我们这里要裁剪的人脸框要比MTCNN获取的人脸框大一点, # 至于大多少,就由margin参数决定了 bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) # 裁剪人脸框,再缩放 cropped = dst[bb[1]:bb[3],bb[0]:bb[2],:] #cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] # 缩放到指定大小,并保存图片,以及边界框位置信息 scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename)#分离文件名和扩展名 if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) # 保存图片 misc.imsave(output_filename_n, scaled) # 记录信息到bounding_boxes_XXXXX.txt文件里 text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(args): #Load MTCNN model for detecting and aligning Faces in the Captured Photos with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor nrof_successfully_aligned = 0 # Save faces files locally just to varify. You may want to remove this once your system is set up. output_filename = 'd:\PhotoCaptured.png' with tf.Graph().as_default(): with tf.Session() as sess: # args.seed defaulted to 666 np.random.seed(seed=666) # Load the model once print('Loading feature extraction model') # Use your path where you have saved pretrained facenet model facenet.load_model('./models/20170512-110547.pb') # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] # your custom classifier trained the last layer with your own image database. Please refer to Facenet repo for training custom classifier classifier_filename_exp = os.path.expanduser( './models/my_classifier.pkl') # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) #Start Video Capture video_capture = cv2.VideoCapture(0) #All the pre-loading is done. Now loop through capturing photos and recognizing faces in the frames while True: try: ret, frame = video_capture.read() img = frame except (IOError, ValueError, IndexError) as e: print("Error") else: if img.ndim < 2: print('Unable to align "%s"' % image_path) if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, box_cord = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #Define npArray of 3x2 and assign scaled to it. XXXXXXXXXXXXXXxx #face_array = np.array(160,160,3) face_list = [] print('Number of faces ******* %s', nrof_faces) #for rectangle in range(0,nrof_faces): #cv2.rectangle(img,box_cord[rectangle],(0,255,0),5) print('Type of Box Cord ******* %s', type(box_cord)) print('shape of Box Cord ******* %s', box_cord.shape) # Display the resulting frame cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: #if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) # Hardcoding # args.margin = 32 image_size 160 bb[0] = np.maximum(det[0] - 32 / 2, 0) bb[1] = np.maximum(det[1] - 32 / 2, 0) bb[2] = np.minimum(det[2] + 32 / 2, img_size[1]) bb[3] = np.minimum(det[3] + 32 / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (160, 160), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) #if args.detect_multiple_faces: #Try keeping it in nparray insted of writing output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) #else: #output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) print('type of scaled************', type(scaled)) #Appending each face to face_array face_list.append(scaled) else: print('No Image or - Unable to align') continue #Invoke Classifier Code # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = nrof_faces nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / 1000)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): #start_index = i * args.batch_size - Hardcoded Batch Size start_index = i * 1000 #end_index = min((i + 1) * args.batch_size, nrof_images) end_index = min((i + 1) * 1000, nrof_images) images = Face_load_data(face_list, False, False, 160) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #Print Face recognization result for each Face in the Frame for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) video_capture.release()
print(class_names) print('Loaded classifier model from file "%s"' % classifier_filename_exp) video_capture = cv2.VideoCapture(0) capture_interval = 5 capture_count = 0 frame_count = 0 while True: ret, frame = video_capture.read() # if(capture_count % capture_interval == 0): gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if gray.ndim == 2: gray = facenet.to_rgb(gray) bounding_boxes, points = detect_face.detect_face( gray, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] for face_position in bounding_boxes: face_position = face_position.astype(int) cropped = gray[face_position[1]:face_position[3], face_position[0]:face_position[2], :] if cropped.shape[0] == 0 or cropped.shape[1] == 0: continue scaled = cv2.resize(cropped, (image_size, image_size),
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, landmarks = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] landmark = landmarks img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] landmark = landmark[:, index] det = np.squeeze(det) landmark = np.squeeze(landmark) if args.align_face_image == 'off': bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled) else: cv_img = cv2.imread(image_path) cv_img = face_alignment(cv_img, args.image_size, landmark) cv2.imwrite(output_filename, cv_img) nrof_successfully_aligned += 1 if args.landmark_image == 'on': cv_img_landmark = cv2.imread(image_path) # TODO : Write marked image with landmark and bounding box else: print('Unable to align "%s"' % image_path) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) print('read data dimension: ', img.ndim) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] print('after data dimension: ', img.ndim) bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('detected_face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
def detect_faces(args): """ :param img_path: input image for face recognition :return: the bounding box and the cropped faces """ minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor nrof_successfully_aligned = 0 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) try: img = misc.imread(args.img_dir[0]) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(args.img_dir, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % args.img_dir) if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('number of faces is {}'.format(nrof_faces)) if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) # print('type of det_arr {} and {}'.format(type(det_arr), det_arr)) else: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) # print('det shape is {}'.format(np.shape(det_arr))) ##### save cropped faces in images scaled = [] for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] crop_img = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') # print(type(crop_img)) prewhitened = facenet.prewhiten(crop_img) # print(np.shape(crop_img)) scaled.append(prewhitened) nrof_successfully_aligned += 1 # print('size of scaled faces is {}'.format(np.shape(scaled))) else: print('Unable to align "%s"' % args.img_dir) return img, det_arr, scaled
def main(args): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) facenet.load_model(args.model_trained) # Get input and output tensors images_placeholder = \ tf.get_default_graph().get_tensor_by_name("input:0") embeddings = \ tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = \ tf.get_default_graph().get_tensor_by_name("phase_train:0") # (?, 128) print(">>> Embedding size: ", embeddings.get_shape()) labels, class_names, embed_arrays = joblib.load( args.model_filename) # Classify images model = joblib.load(args.classifier_filename) # ================================================================= minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Get a reference to webcam #0 (the default one) video_capture = cv2.VideoCapture(0) while True: ret, img = video_capture.read() nrof_successfully_aligned = 0 if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] # print("Ndim: %d" % img.ndim) # print("Shape: %d" % img.shape) bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * \ (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) # some extra weight on the centering index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) # CROP IMAGE print(">>> Resize image") img_croped = misc.imresize( img[bb[1]:bb[3], bb[0]:bb[2], :], (args.image_size, args.image_size), interp='bilinear') img_croped = facenet.load_test_web_data( img_croped, False, False, args.image_size) # RUN print(">>> Feed dict") feed_dict = { images_placeholder: img_croped, # ndarray phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) # print(emb_array) nrof_successfully_aligned += 1 # left, top, right, bottom print(bb[0], bb[1], bb[2], bb[3]) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 2) # Draw a label with a name below the face cv2.rectangle(img, (bb[0], bb[3] - 35), (bb[2], bb[1]), (0, 0, 255)) print("\t>>> Embed shape: ", emb_array.shape) distances, indexes = model.kneighbors( emb_array.reshape(1, -1), return_distance=True) print(emb_array.tolist()) # PREDICTION predictions = model.predict(emb_array) print("\t>>> Index (non threshold): ", class_names[predictions[0]]) print("\t>>> Predictions (non threshold): ", predictions[0]) checked = any(d < CONST_DIST for d in distances[0]) print("\t>>> Distance: ", distances[0]) font = cv2.FONT_HERSHEY_DUPLEX if checked: # max_dist = np.argmin(distances[0]) # class_name = \ # class_names[labels[indexes[0][max_dist]]] # cv2.putText( # img, "{} ".format(i) + class_name, # (bb[0] + 6, bb[3] - 6), font, # 1.0, (255, 255, 255), 1 # ) cv2.putText( img, "{} ".format(i) + class_names[predictions[0]], (bb[0] + 6, bb[3] - 6), font, 1.0, (255, 255, 255), 1) print("\t>>> Label: %s" % class_names[predictions[0]]) else: cv2.putText(img, "{} ".format(i) + "---", (bb[0] + 6, bb[3] - 6), font, 1.0, (255, 255, 255), 1) print("\t>>> Label: %s" % "Unknown") # emb_array = np.array(emb_array).reshape(1, -1) # print(">>> Prediction") # predictions = model.predict(emb_array) # print("Predictions: ", predictions[0]) # font = cv2.FONT_HERSHEY_DUPLEX # cv2.putText( # img, class_names[predictions[0]], # (bb[0] + 6, bb[3] - 6), font, # 1.0, (255, 255, 255), 1 # ) # print("Label: %s" % class_names[predictions[0]]) del img_croped, emb_array del det, det_arr else: print('Unable to align') cv2.imshow('Video', img) print("Show frame") # Hit 'q' on the keyboard to quit! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=0.3, fy=0.3) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] #print(frame.shape[0]) #print(frame.shape[1]) # Use YOLO to get bounding boxes blob = cv2.dnn.blobFromImage(frame, 1 / 255, (IMG_WIDTH, IMG_HEIGHT), [0, 0, 0], 1, crop=False) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers
def main(argv=None): align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor)) landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE output_dir = os.path.expanduser(FLAGS.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(argv)) dataset = facenet.get_dataset(FLAGS.input_dir) random.shuffle(dataset) # Scale the image such that the face fills the frame when cropped to crop_size scale = float(FLAGS.face_size) / FLAGS.image_size nrof_images_total = 0 nrof_prealigned_images = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim == 2: img = facenet.to_rgb(img) if FLAGS.use_new_alignment: aligned = align.align_new( FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=False, scale=scale) else: aligned = align.align(FLAGS.image_size, img, landmarkIndices=landmarkIndices, skipMulti=False, scale=scale) if aligned is not None: print(image_path) nrof_successfully_aligned += 1 misc.imsave(output_filename, aligned) elif FLAGS.prealigned_path: # Face detection failed. Use center crop from pre-aligned dataset class_name = os.path.split(output_class_dir)[1] image_path_without_ext = os.path.join( os.path.expanduser(FLAGS.prealigned_path), class_name, filename) # Find the extension of the image exts = ('jpg', 'png') for ext in exts: temp_path = image_path_without_ext + '.' + ext image_path = '' if os.path.exists(temp_path): image_path = temp_path break try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: scaled = misc.imresize(img, FLAGS.prealigned_scale, interp='bilinear') sz1 = scaled.shape[1] / 2 sz2 = FLAGS.image_size / 2 cropped = scaled[(sz1 - sz2):(sz1 + sz2), (sz1 - sz2):(sz1 + sz2), :] print(image_path) nrof_prealigned_images += 1 misc.imsave(output_filename, cropped) else: print('Unable to align "%s"' % image_path) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of pre-aligned images: %d' % nrof_prealigned_images)
def align_image(input_image, image_size=182, margin=44, gpu_memory_fraction=0.5): sleep(random.random()) #print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img = input_image if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) #continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, points = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] #bb2 = dlib.rectangle(left=int(bb[0]), top=int(bb[1]), right=int(bb[2]), bottom=int(bb[3])) temp_x = (int(bb[0]) + int(bb[2]) + 1) if temp_x < 0: temp_x -= 1 temp_y = (int(bb[1]) + int(bb[3] + 1)) if temp_y < 0: temp_y -= 1 bb2_center_x = temp_x / 2 scaled = (extract_image_chips.extract_image_chips( img, np.transpose(points), image_size, 0.37))[0] #scaled = misc.imresize(scaled, (image_size, image_size), interp='bilinear') return scaled, bb2_center_x else: print("no face detected..\n") return img
def main(args): sleep(random.random()) # get the complete path of output directory output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # get the path of this program file src_path, _ = os.path.split(os.path.realpath(__file__)) # Store some git revision info in a text file in the log directory facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) # get_dataset is a function which return a list of special objects # this kind of object has two element, one is the class name, # and the other is the path of all entry belonging to this class dataset = facenet.get_dataset(args.input_dir) # dataset = facenet.get_dataset_from_difference_sources(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) # classes shuffle for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) # images shuffle for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) # img = cv2.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: # make sure that all images are normal # ---------------------------------------------- if img.ndim < 2: # an normal image should has at least two dimension(width and high) print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: # an image which has only one channel img = facenet.to_rgb(img) img = img[:, :, 0:3] # ---------------------------------------------- # bounding_boxes: faces of all person in this image # _: five landmarks of each person in this image bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # at least one face if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] # more than one faces in this image # ------------------------------------------------------------------- # if nrof_faces > 1: # bounding_box_size = (det[:, 2]-det[:, 0])*(det[:, 3]-det[:, 1]) # width * high # img_center = img_size / 2 # # # how far is each person(center of face) from the center of this image # offsets = np.vstack([(det[:, 0]+det[:, 2])/2-img_center[1], (det[:, 1]+det[:, 3])/2-img_center[0]]) # offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) # # # choose the most import person in this image # index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering # det = det[index, :] # ------------------------------------------------------------------- for det_no in range(nrof_faces): each_det = np.squeeze(det[det_no]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum( each_det[0] - args.margin / 2, 0) bb[1] = np.maximum( each_det[1] - args.margin / 2, 0) bb[2] = np.minimum( each_det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum( each_det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') if nrof_faces > 1: output_filename = os.path.join( output_class_dir, filename + '_%d.png' % (det_no)) misc.imsave(output_filename, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3])) nrof_successfully_aligned += 1 else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def face_image(filename): img_path = filename modeldir = './models/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy = './npy' train_img = "./train_img" find_final_results = [] #final results with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue if len(cropped) >= i: cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print("predictions: ", predictions) best_class_indices = np.argmax(predictions, axis=1) #print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 #print(best_class_indices[0] > 0.80) if predictions[0][best_class_indices[0]] > 0.80: print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: # print(H_i) if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] if result_names not in find_final_results: find_final_results.append( result_names) cv2.putText( frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), thickness=2, lineType=2) else: print('Result Indices: ', -1) print(HumanNames) cv2.putText(frame, "Unkown", (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), thickness=2, lineType=2) else: print('Unable to align') frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) cv2.imshow('Image', frame) if cv2.waitKey(1000000) & 0xFF == ord('q'): sys.exit("Thanks") return (find_final_results) cv2.destroyAllWindows()
def main(args): align = align_dlib.AlignDlib(os.path.expanduser(args.dlib_face_predictor)) landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) random.shuffle(dataset) # Scale the image such that the face fills the frame when cropped to crop_size scale = float(args.face_size) / args.image_size nrof_images_total = 0 nrof_prealigned_images = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim == 2: img = facenet.to_rgb(img) if args.use_center_crop: scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear') sz1 = scaled.shape[1]/2 sz2 = args.image_size/2 aligned = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:] else: aligned = align.align(args.image_size, img, landmarkIndices=landmarkIndices, skipMulti=False, scale=scale) if aligned is not None: print(image_path) nrof_successfully_aligned += 1 misc.imsave(output_filename, aligned) elif args.prealigned_dir: # Face detection failed. Use center crop from pre-aligned dataset class_name = os.path.split(output_class_dir)[1] image_path_without_ext = os.path.join(os.path.expanduser(args.prealigned_dir), class_name, filename) # Find the extension of the image exts = ('jpg', 'png') for ext in exts: temp_path = image_path_without_ext + '.' + ext image_path = '' if os.path.exists(temp_path): image_path = temp_path break try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear') sz1 = scaled.shape[1]/2 sz2 = args.image_size/2 cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:] print(image_path) nrof_prealigned_images += 1 misc.imsave(output_filename, cropped) else: print('Unable to align "%s"' % image_path) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of pre-aligned images: %d' % nrof_prealigned_images)
def crop_my_baby(vid_image): ## print(vid_image) sleep(random.random()) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 random_key = np.random.randint(0, high=99999) ## bounding_boxes_filename = os.path.join( scaled = preprocess_image(vid_image) try: ## img = misc.imread(vid_image) img = vid_image except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(vid_image, e) print(errorMessage) else: if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] ## print(img) bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # print('nrof_faces') # print(nrof_faces) if nrof_faces > 0: print('nrof_faces' + str(nrof_faces)) det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - 22, 0) bb[1] = np.maximum(det[1] - 22, 0) bb[2] = np.minimum(det[2] + 22, img_size[1]) bb[3] = np.minimum(det[3] + 22, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (160, 160), interp='bilinear') print('Success aligning') print('scaled_type' + str(type(scaled)) + 'scaled_dim') print(scaled.shape) else: print('scaled_type' + str(type(scaled)) + 'scaled_dim') print(scaled.shape) return scaled
print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] #print(frame.shape[0]) #print(frame.shape[1]) # Use YOLO to get bounding boxes blob = cv2.dnn.blobFromImage(frame, 1 / 255, (IMG_WIDTH, IMG_HEIGHT), [0, 0, 0], 1, crop=False) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers outs = net.forward(get_outputs_names(net)) # Remove the bounding boxes with low confidence bounding_boxes = post_process(frame, outs, CONF_THRESHOLD, NMS_THRESHOLD)
def batch_inp(rel_path): print('Start Recognition!') prevTime = 0 img_list = glob.glob(os.path.join(rel_path, '*')) results = list() cnt = 0 ok_ind = list() for img_path in img_list: # for each image in the list res = None frame = cv2.imread(img_path) # ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): # detect faces in the current image find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] scaled_reshape = [] bb = [int(np.round(i)) for i in det[0]] # inner exception if bb[0] <= 0 or bb[1] <= 0 or bb[2] >= len( frame[0]) or bb[3] >= len(frame): print('face is out of range!') continue cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] cropped = facenet.flip(cropped, False) scaled = facenet.imresize(cropped, (image_size, image_size), interp='bilinear') scaled = cv2.resize(scaled, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape.append( scaled.reshape(input_image_size, input_image_size, 3)) ok_ind.append(cnt) cnt += 1 feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) # n,n_emb predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) # n,1 # best_class_probabilities = np.max(predictions, axis=1) results = np.zeros_like(img_list) results[ok_ind] = [class_names[i] for i in best_class_indices] comb = list(zip(img_list, results)) pd.DataFrame(comb).to_csv('test_results.csv')