def main_train(self): with tf.Graph().as_default(): with tf.Session() as sess: img_data = facenet.get_dataset(self.datadir) path, label = facenet.get_image_paths_and_labels(img_data) print('Classes: %d' % len(img_data)) print('Images: %d' % len(path)) facenet.load_model(self.modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('Extracting features of images for model') batch_size = 1000 image_size = 160 nrof_images = len(path) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) classifier_file_name = os.path.expanduser( self.classifier_filename) # Training Started print('Training Started') model = SVC(kernel='linear', probability=True) model.fit(emb_array, label) class_names = [cls.name.replace('_', ' ') for cls in img_data] # Saving model with open(classifier_file_name, 'wb') as outfile: pickle.dump((model, class_names), outfile) return classifier_file_name
def race_det(img_path): result_names = [] with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = ['Asian', 'Black', 'Indian', 'White'] #HumanNames.sort() print('Loading embedding extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 print('Start Recognition!') prevTime = 0 i = 0 ratio = 0.0 name = os.path.basename(img_path) frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') return 'Unable to align, face too close' continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) #print(predictions) best_class_indices = np.argmax(predictions, axis=1) #if HumanNames[best_class_indices[0]]==folder: #pred_len+=1 # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 #print('Result Indices: ', best_class_indices[0]) #print(HumanNames) for H_i in HumanNames: # print(H_i) if HumanNames[best_class_indices[0]] == H_i: result_names.append( HumanNames[best_class_indices[0]]) #cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, #1, (0, 0, 255), thickness=1, lineType=2) return result_names else: return ['No face detected']
pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0
def get_image_item(self, img_path): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 probval = "" kindval = "" HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] #print(embedding_size) classifier_filename_exp = os.path.expanduser( classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') #items.append(dict(prob='', kind='face is too close')) item = dict(prob='', kind='face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) #cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) #best_class_indices[0]=3 print(HumanNames) for H_i in HumanNames: #print(HumanNames[best_class_indices[0]]) if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] print(result_names) if best_class_probabilities > 0.5: probval += str( best_class_probabilities) + "," kindval += result_names + "," #items.append(dict(prob=str(best_class_probabilities), kind=result_names)) #cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) if probval != "": item = dict(prob=probval.rstrip(','), kind=kindval.rstrip(',')) else: item = dict(prob='', kind='Unable to find face') else: item = dict(prob='', kind='No face is avilable') print('Unable to align') #cv2.imshow('Image', frame) #cv2.waitKey(0) #if cv2.waitKey(1000000) & 0xFF == ord('q'): #sys.exit("Thanks") #cv2.destroyAllWindows() #pred = self.predict_images([img_path])[0] #prob, kind = self.get_prob_and_kind(pred) #item = dict(prob=prob, kind=kind) return item
def deti(input_filepath): import pyrebase config = { "apiKey": "apiKey", "authDomain": "projectId.firebaseapp.com", "databaseURL": "https://databaseName.firebaseio.com", "storageBucket": "projectId.appspot.com" } firebase = pyrebase.initialize_app(config) auth = firebase.auth() #authenticate a user user = auth.sign_in_with_email_and_password("*****@*****.**", "123456") user['idToken'] db = firebase.database() img_path = input_filepath modeldir = '' classifier_filename = './class/classifier.pkl' npy = '' train_img = "./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 10000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() now = datetime.datetime.now() print("printing to firebase") for a in range(1, len(HumanNames)): student = {HumanNames[a]: "Absent"} db.child("Attendance").child(now.year).child(now.month).child( now.day).child(now.hour).update(student, user['idToken']) print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) now = datetime.datetime.now() print("printing to firebase") student = { HumanNames[best_class_indices[0]]: "Present" } db.child("Attendance").child(now.year).child( now.month).child(now.day).child(now.hour).update( student, user['idToken']) print(student) f = open("demofile.txt", "w") f.write(HumanNames[best_class_indices[0]]) f.close() for H_i in HumanNames: # print(H_i) if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') cv2.imshow('Image', frame) if cv2.waitKey(1000000) & 0xFF == ord('q'): sys.exit("Thanks") cv2.destroyAllWindows() sess.close()
def recognize(filename="img.jpg"): image_path = TEST_FOLDER + filename with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor frame_interval = 3 image_size = 182 input_image_size = 160 HumanNames = os.listdir(TRAIN_FOLDER) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(MODEL_DIR) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(CLASSIFIER) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 print('Start Recognition!') frame = cv2.imread(image_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) timeF = frame_interval if (c % timeF == 0): if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1, input_image_size, input_image_size, 3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) # print("emb_array",emb_array) predictions = model.predict_proba(emb_array) print("Predictions ", predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print("Best Predictions ", best_class_probabilities) if best_class_probabilities[0] > 0.3: print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: # print(H_i) if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] print("Face Recognized: ", result_names) return str(result_names) else: print('Not Recognized') return False else: print('Unable to align') return False return False
def main_train(self): with tf.Graph().as_default(): with tf.Session() as sess: img_data = facenet.get_dataset(self.datadir) path, label = facenet.get_image_paths_and_labels(img_data) print("label") print(label) print('Classes: %d' % len(img_data)) print('Images: %d' % len(path)) facenet.load_model(self.modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('Extracting features of images for model') batch_size = 10000 image_size = 160 nrof_images = len(path) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) #print(nrof_batches_per_epoch) #for i in range(nrof_batches_per_epoch): start_index = 0 * batch_size end_index = min((0 + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) print("emb_array[0]") print(emb_array[0]) class_names = [cls.name.replace('_', ' ') for cls in img_data] classifier_file_name = os.path.expanduser( self.classifier_filename) print('emb_array') print(emb_array) X_embedded = TSNE(n_components=2).fit_transform(emb_array) X_embedded -= X_embedded.min(axis=0) X_embedded /= X_embedded.max(axis=0) print("X_embedded") print(X_embedded) #for i in range(0, nrof_images-1): # plt.plot(X_embedded[i, 0], X_embedded[i, 1],'bo') plt.legend(bbox_to_anchor=(1, 1)) plt.show() out_dim = round(math.sqrt(nrof_images)) out_res = 160 to_plot = np.square(out_dim) grid = np.dstack( np.meshgrid(np.linspace(0, 1, out_dim), np.linspace(0, 1, out_dim))).reshape(-1, 2) cost_matrix = cdist(grid, X_embedded, "sqeuclidean").astype(np.float32) cost_matrix = cost_matrix * (100000 / cost_matrix.max()) print(cost_matrix) #rids, cids = solve_dense(costs) #print(rids) print("zaczalem to robic") #row_ind, col_ind = linear_sum_assignment(cost_matrix) row_asses, col_asses, _ = lapjv(cost_matrix) #print("To cos") #print (col_asses) print("teraz to!") #print (row_ind) #print (col_ind) #for r,c in zip(row_ind, col_asses): # print(r,c) # Row/column pairings grid_jv = grid[col_asses] out = np.ones((out_dim * out_res, out_dim * out_res, 3)) print(grid_jv) for pos, img in zip(grid_jv, images[0:to_plot]): h_range = int(np.floor(pos[0] * (out_dim - 1) * out_res)) w_range = int(np.floor(pos[1] * (out_dim - 1) * out_res)) out[h_range:h_range + out_res, w_range:w_range + out_res] = image.img_to_array(img) print(out) im = image.array_to_img(out) im.save("obrazekV2.jpg", quality=100)
def recognizer(video_path, pretrain_model='./models/20180408-102900', classifier='./class/classifier.pkl', npy_dir='./packages', train_img_dir='./datasets'): with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy_dir) img_options = { 'minsize': 20, 'threshold': [0.6, 0.7, 0.7], 'factor': 0.709, 'margin': 44, 'frame_interval': 3, 'batch_size': 100, 'image_size': 182, 'input_image_size': 160 } HumanNames = os.listdir(train_img_dir) HumanNames.sort() print('Loading model...') facenet.load_model(pretrain_model) images_placeholder = tf.get_default_graph().get_tensor_by_name( 'input:0') embeddings = tf.get_default_graph().get_tensor_by_name( 'embeddings:0') phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name('phase_train:0') embedding_size = embeddings.get_shape()[1] classifier_exp = os.path.expanduser(classifier) with open(classifier_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 print('Facial Recognition Starting...') #win = dlib.image_window() cap = cv2.VideoCapture(video_path) while True: ret, frame = cap.read() frame = cv2.resize(frame, (0, 0), fx=0.7, fy=0.7) #frame = dlib.load_rgb_image(img) #frame = dlib.resize_image(img, 0.5, 0.5) timeF = img_options['frame_interval'] if (c % timeF == 0): #find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, img_options['minsize'], pnet, rnet, onet, img_options['threshold'], img_options['factor']) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_options['img_size'] = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (img_options['image_size'], img_options['image_size']), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (img_options['input_image_size'], img_options['input_image_size']), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, img_options['input_image_size'], img_options['input_image_size'], 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #print(best_class_probabilities) print('Accuracy: ', best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 255), 1) if best_class_probabilities > 0.25: text_x = bb[i][0] text_y = bb[i][1] - 10 print('Result Indices: ', best_class_indices[0]) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: predict_names = HumanNames[ best_class_indices[0]] cv2.putText( frame, predict_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: text_x = bb[i][0] text_y = bb[i][1] - 10 print('Result Indices: ', best_class_indices[0]) cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to find face') if ret: cv2.imshow('Facial Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): print('Ending...') break #sys.exit('Ending...') cap.release() cv2.destroyAllWindows()
def main_train(self): with tf.Graph().as_default(): with tf.Session() as sess: img_data = facenet.get_dataset(self.datadir) path, label = facenet.get_image_paths_and_labels(img_data) print("label") print(label) print('Classes: %d' % len(img_data)) print('Images: %d' % len(path)) facenet.load_model(self.modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('Extracting features of images for model') batch_size = 1000 image_size = 160 nrof_images = len(path) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) print(nrof_batches_per_epoch) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) class_names = [cls.name.replace('_', ' ') for cls in img_data] classifier_file_name = os.path.expanduser( self.classifier_filename) print('emb_array') print(emb_array) X_embedded = TSNE(n_components=2).fit_transform(emb_array) print('X_embedded') print(X_embedded) faces_group = np.zeros((nrof_images, embedding_size)) for i in range(0, nrof_images - 1): print("i: ") print(i) j = label[i] print("j: ") print(j) faces_group[j].append(X_embedded[i]) #plt.plot(X_embedded[i, 0], X_embedded[i, 1], label=name)#label=name) #plt.scatter(X_embedded[i, 0], X_embedded[i, 1], label=name) for i in enumerate(set(faces_group)): name = class_names[i] plt.scatter(faces_group[i, 0], faces_group[i, 1], label=name) plt.legend(bbox_to_anchor=(1, 1)) #plt.axis([-50, 50, -50, 50]) plt.show() #time.sleep(5) #plt.imshow() # Training Started print('Training Started') model = SVC(kernel='linear', probability=True) model.fit(emb_array, label) print(class_names) # Saving model with open(classifier_file_name, 'wb') as outfile: pickle.dump((model, class_names), outfile) return classifier_file_name
def get_frame(self): modeldir = '/Users/manohar/Downloads/RT-face-recognition/model/20180402-114759.pb' classifier_filename = './class/classifier.pkl' npy='' api_url_base = 'https://hackathon-faceapp.herokuapp.com/recognize' headers = {'cache-control': 'no-cache'} with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) # Load the model facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor image_size = 182 input_image_size = 160 ret, frame = self.cap.read() frame = cv2.resize(frame, (0,0), fx=1, fy=1) #resize frame (optional) if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 crop_img = frame[(bb[i][1]):(bb[i][3]), (bb[i][0]):(bb[i][2])] cv2.imwrite("final2.png", crop_img) image = open('final2.png', 'rb') files = {'imageSrc':image} #cv2.putText(frame, "Manohar", (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) response = requests.post(api_url_base, files=files, headers=headers) if response.status_code == 200: api_response = json.loads(response.content.decode('utf-8')) print (api_response) if api_response['images'][0]['transaction']['face_id'] == 1: cv2.putText(frame, "Unknown", (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print(api_response['images'][0]['transaction']['face_id']) Identified = api_response['images'][0]['candidates'][0]['subject_id'] if api_response['images'][0]['candidates'][0]['confidence'] >= 0.70: cv2.putText(frame, Identified, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: None else: print ("error code: ",response.status_code) if ret: ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes() else: return None
def get_video_item(self, img_path): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser( classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture() video_capture.open('./cache/short_hamilton_clip.mp4') # False #print(img_path) print(video_capture.read()) c = 0 print('Start Recognition') prevTime = 0 ret, frame = video_capture.read() while (True): #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] #if frame.ndim == 2: #frame = facenet.to_rgb(frame) #frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len(frame[0]) or bb[i][3] >= len( frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices, ' with accuracy ', best_class_probabilities) # print(best_class_probabilities) if best_class_probabilities > 0.53: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] item = dict(prob='1', kind=result_names) else: item = dict(prob='1', kind='No face') #cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: item = dict(prob='1', kind='Alignment Failure') print('Alignment Failure') # c+=1 cv2.imshow('Video', frame) #if cv2.waitKey(1) & 0xFF == ord('q'): #break item = dict(prob='4', kind='Failure') video_capture.release() #cv2.destroyAllWindows() return item
def main_train(self): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)).as_default() as sess: img_data = facenet.get_dataset(self.datadir) path, label = facenet.get_image_paths_and_labels(img_data) print('Classes: %d' % len(img_data)) print('Images: %d' % len(path)) facenet.load_model(self.modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('Extracting features of images for model') batch_size = 100 #batch size 100 image_size = 160 nrof_images = len(path) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) classifier_file_name = os.path.expanduser( self.classifier_filename) score_path = os.path.expanduser(self.score_path) # Training Started print('Training Started') #parameters tuning param_grid = { 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.001, 0.0001], 'kernel': ['linear', 'rbf'] } grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2) grid.fit(emb_array, label) best_param = grid.best_params_ print('Best Parameters: ', best_param) print('Train Using Best Parameters...') model = SVC(C=best_param['C'], gamma=best_param['gamma'], kernel=best_param['kernel'], probability=True) model.fit(emb_array, label) score = model.score(emb_array, label) print('Model Accuracy:', score) #Saving acc with open(score_path, 'w') as out_score: out_score.write('Accuracy: {}\n '.format(score)) class_names = [cls.name.replace('_', ' ') for cls in img_data] # Saving model with open(classifier_file_name, 'wb') as outfile: pickle.dump((model, class_names), outfile) return classifier_file_name
def det(runt): input_video = "akshay_mov.mp4" modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy = '' train_img = "./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print(HumanNames) now = datetime.datetime.now() print("printing to firebase") if runt == 1: for a in range(1, len(HumanNames)): student = {HumanNames[a]: "Absent"} db.child("Attendance").child(now.year).child( now.month).child(now.day).child(now.hour).update( student, user['idToken']) print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture(0) c = 0 print('Start Recognition') runt = runt + 1 prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices, ' with accuracy ', best_class_probabilities) # print(best_class_probabilities) if best_class_probabilities > 0.43: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0] + 1) print(HumanNames) now = datetime.datetime.now() print("printing to firebase") student = { HumanNames[best_class_indices[0] + 1]: "Present" } db.child("Attendance").child(now.year).child( now.month).child(now.day).child( now.hour).update( student, user['idToken']) for H_i in HumanNames: if HumanNames[best_class_indices[0] + 1] == H_i: result_names = HumanNames[ best_class_indices[0] + 1] cv2.putText( frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Alignment Failure') # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()