pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = constants.FACE_REG_MINSIZE # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = constants.FACE_REG_MARGIN frame_interval = 3 batch_size = 1000 image_size = 160 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') face_net.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture(input_video) width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) # float height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float fourcc = cv2.VideoWriter_fourcc(*'MP4V')
def train(epoch, batches, weight1, weight2, weight3, lr, layer_name, dir_name): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: img1_raw_data = tf.gfile.FastGFile('data/wyz160.jpg', 'rb').read() img2_raw_data = tf.gfile.FastGFile('data/xtf160.jpg', 'rb').read() image_batch1 = tf.reshape(tf.cast(tf.image.decode_jpeg(img1_raw_data), tf.float32), [1, 160, 160, 3], name='input1') image_batch2 = tf.reshape(tf.cast(tf.image.decode_jpeg(img2_raw_data), tf.float32), [1, 160, 160, 3], name='input2') # image_batch2 = tf.reshape(tf.cast(tf.image.decode_jpeg(img2_raw_data), tf.float32), [1, 160, 160, 3], name='input2') gen_batch1 = image_variable('data/xtf160.jpg') # gen_batch1 = get_noise_image(160, 160, 3) with tf.variable_scope("normalize1"): split_list = [] for i in range(0, batches): split_list.append(1) imgs1 = tf.split(gen_batch1, split_list, axis=0) imgs2 = tf.split(image_batch1, split_list, axis=0) imgs3 = tf.split(image_batch2, split_list, axis=0) norm_imgs1 = [] norm_imgs2 = [] norm_imgs3 = [] for i in range(0, batches): norm_imgs1.append(tf.reshape(tf.image.per_image_standardization(tf.reshape(imgs1[i], [160, 160, 3])), [1, 160, 160, 3])) norm_imgs2.append(tf.reshape(tf.image.per_image_standardization(tf.reshape(imgs2[i], [160, 160 ,3])), [1, 160, 160, 3])) norm_imgs3.append(tf.reshape(tf.image.per_image_standardization(tf.reshape(imgs3[i], [160, 160 ,3])), [1, 160, 160, 3])) norm1 = tf.concat(norm_imgs1, axis=0) norm2 = tf.concat(norm_imgs2, axis=0) norm3 = tf.concat(norm_imgs3, axis=0) group_batch1 = tf.concat([norm1, norm2, norm3], axis=0) # # 人脸识别网络 face_net.load_model(group_batch1) # face_output = sess.graph.get_tensor_by_name('import/embeddings:0') # face_output = sess.graph.get_tensor_by_name('import/InceptionResnetV1/Repeat_2/block8_5/Relu:0') # face_output = sess.graph.get_tensor_by_name('import/InceptionResnetV1/Conv2d_2b_3x3/Relu:0') face_output = sess.graph.get_tensor_by_name(layer_name + ':0') face_output_1, face_output_2, face_output_3 = tf.split(face_output, [batches, batches, batches]) # group for emotion recognition # group_batch2 = tf.concat([gen_batch1, image_batch2], axis=0) # 表情识别网络 # 将图片转为1维的灰度图 # reduced_group_batch = tf.reshape(tf.div(tf.reduce_sum(group_batch2, axis=3), 3.0), shape=[-1, 160, 160, 1]) # emotion_net.load_model(reduced_group_batch) # emotion_net.load_model(group_batch2) # emotion_output = sess.graph.get_tensor_by_name('import_1/emonet/block4/Block4/conv4/Conv4/Conv:0') # emotion_output = sess.graph.get_tensor_by_name('import/emonet/conv1/Block4/Conv4:0') # emotion_output_1, emotion_output_2 = tf.split(emotion_output, [batches, batches]) # norm_emotion_output = tf.nn.l2_normalize(emotion_output, 1, 1e-10, name='emotion_embeddings') # emotion_output_1, emotion_output_2 = tf.split(norm_emotion_output, [batches, batches]) # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=emotion_output_1, labels=emotion_output_2, name='cross_entropy') # # Face original loss # with tf.variable_scope('Oringial_loss'): # ct = tf.constant(128.0) # norm1_1d = tf.reshape(tf.divide(gen_batch1, ct), [160*160*3]) # norm2_1d = tf.reshape(tf.divide(image_batch1, ct), [160*160*3]) # loss0 = tf.reduce_mean(tf.square(tf.subtract(norm1_1d, norm2_1d))) # loss0_weight = tf.constant(weight1) # loss0 = tf.multiply(loss0, loss0_weight, name='orignal_loss') # with tf.variable_scope('Oringial_loss2'): # ct = tf.constant(128.0) # norm1_1d = tf.reshape(tf.divide(gen_batch1, ct), [160*160*3]) # norm2_1d = tf.reshape(tf.divide(image_batch2, ct), [160*160*3]) # loss1 = tf.reduce_mean(tf.square(tf.subtract(norm1_1d, norm2_1d))) # loss1_weight = tf.constant(weight2) # loss1 = tf.multiply(loss1, loss1_weight, name='orignal_loss') # 人脸识别损失 with tf.variable_scope('Face_loss'): temp_loss = tf.reduce_mean(tf.square(tf.subtract(face_output_1, face_output_2))) loss1_weight = tf.constant(weight1) loss1 = tf.multiply(temp_loss, loss1_weight, name='face_loss') # 纹理损失 with tf.variable_scope('Texture_loss'): gram_loss1 = get_gram_loss(sess, 'import/InceptionResnetV1/Conv2d_1a_3x3/Relu:0', 79, 'gram_1a') gram_loss2 = get_gram_loss(sess, 'import/InceptionResnetV1/Conv2d_2b_3x3/Relu:0', 77, 'gram_2b') gram_loss3 = get_gram_loss(sess, 'import/InceptionResnetV1/Conv2d_3b_1x1/Relu:0', 38, 'gram_3b') gram_loss4 = get_gram_loss(sess, 'import/InceptionResnetV1/Conv2d_4b_3x3/Relu:0', 17, 'gram_4b') gram_loss5 = get_gram_loss(sess, 'import/InceptionResnetV1/Repeat/block35_5/Relu:0', 17, 'gram_block35') gram_loss = gram_loss1 + gram_loss2 + gram_loss3 + gram_loss4 + gram_loss5 gram_weight = tf.constant(weight2) gram_loss = tf.multiply(gram_loss, gram_weight, name='gram_loss') # 浅层信息损失 # lower_output = sess.graph.get_tensor_by_name('import/InceptionResnetV1/Conv2d_2b_3x3/Relu:0') # lower_output_1, lower_output_2, lower_output_3 = tf.split(lower_output, [batches, batches, batches]) with tf.variable_scope('content_loss'): # temp_loss = tf.reduce_mean(tf.square(tf.subtract(norm1, norm2))) temp_loss = tf.reduce_mean(tf.abs(tf.subtract(norm1, norm3))) loss3_weight = tf.constant(weight3) loss3 = tf.multiply(temp_loss, loss3_weight, name='face_loss') # 表情识别损失 # with tf.variable_scope('Emotion_loss'): # temp_loss = tf.reduce_mean(tf.square(tf.subtract(emotion_output_1, emotion_output_2))) # loss4_weight = tf.constant(1.0) # loss4 = tf.multiply(temp_loss, loss4_weight, name='emotion_loss') # tf.summary.scalar('loss0', loss0) tf.summary.scalar('identification_loss2', loss1) tf.summary.scalar('gram_loss2', gram_loss) tf.summary.scalar('content_loss2', loss3) # # 组合3个损失 total_loss = tf.add(tf.add(loss1, gram_loss), loss3, name='total_loss') # total_loss = tf.add(total_loss, loss4, name='total_loss2') tf.summary.scalar('total_loss', total_loss) # 做梯度优化 train_step = tf.train.AdamOptimizer(lr).minimize(total_loss) merge = tf.summary.merge_all() writer = tf.summary.FileWriter('./train_summary', sess.graph) phase_train = sess.graph.get_tensor_by_name('import/phase_train:0') # keep_prob = sess.graph.get_tensor_by_name('import_1/emonet/keep_prob:0') tf.local_variables_initializer().run() tf.global_variables_initializer().run() start_time = time.time() # a = sess.run(emotion_output, feed_dict={keep_prob:1.0}) # print a for i in range(0, 10000): _, total_summary, _loss, img_data = sess.run([train_step, merge, total_loss, gen_batch1], feed_dict={phase_train:False}) print('Current {0} loss: {1} gen_image: {2}'.format(i, _loss, img_data[0][0][0][0])) writer.add_summary(total_summary, i) if (i+1) % 100 == 0: end_time = time.time() timedelta = end_time - start_time print(timedelta) image = Image.new('RGB', (160, 160), (255, 255, 255)) draw = ImageDraw.Draw(image) # for x in range(160): # for y in range(160): # draw.point((y, x), fill=(img_data[0][x][y][2], img_data[0][x][y][1], img_data[0][x][y][0])) # # draw.point((y, x), fill=(img_data[0][x][y][0], img_data[0][x][y][0], img_data[0][x][y][0])) # image.save('result/edit/' + dir_name + '/gen_image_' + str(timedelta)[:5] + '.bmp', 'bmp') for x in range(160): for y in range(160): draw.point((y, x), fill=(img_data[0][x][y][0], img_data[0][x][y][1], img_data[0][x][y][2])) # draw.point((y, x), fill=(img_data[0][x][y][0], img_data[0][x][y][0], img_data[0][x][y][0])) image.save('result/edit2/' + dir_name + '/gen_image_' + str(timedelta)[:5] + '.bmp', 'bmp') writer.close()
def main(data_dir, model_dir, output_dir): if constants.CLASSIFIER_MODE == 'TRAIN': classifier_filename_exp = os.path.expanduser(output_dir + '/classifier_{0}.pkl'.format(datetime.strftime(datetime.now(), '%Y%m%d%H%M%S'))) elif constants.CLASSIFIER_MODE == 'CLASSIFY': classifier_filename_exp = os.path.expanduser(output_dir + '/classifier_test.pkl') with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=constants.CLASSIFIER_SEED) if constants.CLASSIFIER_USE_SPLIT_DATASET: dataset_tmp = face_net.get_dataset(data_dir) train_set, test_set = split_dataset(dataset_tmp, constants.CLASSIFIER_MIN_NROF_IMAGES_PER_CLASS, constants.CLASSIFIER_NROF_TRAIN_IMAGES_PER_CLASS) if constants.CLASSIFIER_MODE == 'TRAIN': dataset = train_set elif constants.CLASSIFIER_MODE == 'CLASSIFY': dataset = test_set else: dataset = face_net.get_dataset(constants.CLASSIFIER_DATA_DIR) # Check that there are at least one training image per class for cls in dataset: assert len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' paths, labels = face_net.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') face_net.load_model(model_dir) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / constants.CLASSIFIER_BATCH_SIZE)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * constants.CLASSIFIER_BATCH_SIZE end_index = min((i + 1) * constants.CLASSIFIER_BATCH_SIZE, nrof_images) paths_batch = paths[start_index:end_index] images = face_net.load_data(paths_batch, False, False, constants.IMAGE_SIZE) feed_dict = {images_placeholder: images, phase_train_placeholder: False} emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict) if constants.CLASSIFIER_MODE == 'TRAIN': # Train classifier print('Training classifier') model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) # Create a list of class names class_names = [cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: # Serialize & De-Serialize object pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp) elif constants.CLASSIFIER_MODE == 'CLASSIFY': # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): print('%4d %s: %.3f - %s' % (i, class_names[best_class_indices[i]], best_class_probabilities[i], paths[i])) accuracy = np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy) return classifier_filename_exp
def main(image_path, data_dir, model_dir, classifier_file): npy = '' with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=constants. GPU_MEMORY_FRACTION_DEFAULT) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = constants.FACE_REG_MINSIZE # minimum size of face threshold = constants.ALIGN_THRESHOLD # three steps's threshold factor = constants.ALIGN_FACTOR # scale factor frame_interval = 3 image_size = 160 input_image_size = 160 human_names = os.listdir(data_dir) human_names.sort() print('Loading feature extraction model') face_net.load_model(model_dir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_file) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 print('Start Recognition!') frame = cv2.imread(image_path, 0) time_f = frame_interval if c % time_f == 0: if frame.ndim == 2: frame = face_net.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') break cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = face_net.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = face_net.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] - 10 print( i, 'Result Indices: ', best_class_indices[0], ' : ', 'Face detected of : {0}'.format( human_names[best_class_indices[0]])) print(human_names) for H_i in human_names: # print(H_i) if human_names[best_class_indices[ 0]] == H_i and best_class_probabilities >= constants.FACE_REG_POSSIBILITY: result_names = human_names[ best_class_indices[0]] cv2.putText(frame, str(i) + ': ' + result_names, (text_x, text_y), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), thickness=1, lineType=1) print('------------------') else: print('Unable to align') frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) cv2.imshow('Image', frame) cv2.imwrite('output/' + image_path.split('/')[-1], frame) if cv2.waitKey(1000000) & 0xFF == ord('q'): sys.exit("Thanks")
def main(test_dir, data_dir, model_dir, classifier_file): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=constants. GPU_MEMORY_FRACTION_DEFAULT) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = constants.FACE_REG_MINSIZE # minimum size of face threshold = constants.ALIGN_THRESHOLD # three steps's threshold factor = constants.ALIGN_FACTOR # scale factor image_size = 160 input_image_size = 160 human_names = os.listdir(data_dir) human_names.sort() print('Loading feature extraction model') face_net.load_model(model_dir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_file) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 print('Start Recognition!') dataset = face_net.get_dataset(test_dir) number_of_face_recognition = 0 for cls in dataset: for image_path in cls.image_paths: frame = cv2.imread(image_path, 0) if frame.ndim == 2: frame = face_net.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print( 'Face is too close {0}'.format(image_path)) break cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = face_net.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = face_net.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # plot result idx under box for H_i in human_names: # print(H_i) if human_names[best_class_indices[0]] == H_i \ and H_i in image_path: print('{0} : {1}'.format( best_class_probabilities, image_path)) number_of_face_recognition = number_of_face_recognition + 1 else: print('Unable to recognition {0}'.format(image_path)) print("Finish!!!!") print('Number face detected {0}'.format(number_of_face_recognition))