def get_unsupervised_dataset(path): domain_unsupervised_dataset = {} path_exp = os.path.expanduser(path) domains = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] domains.sort() for domain_name in domains: if domain_name != "id+camera": facedir = os.path.join(path_exp, domain_name) image_paths = facenet.get_image_paths(facedir) for i in range(len(image_paths) - 1, -1, -1): # for i in range(0, num_list.__len__())[::-1] extname = os.path.splitext(os.path.split(image_paths[i])[1])[1] if extname not in ['.jpg', '.png']: image_paths.pop(i) path_dir_exp = os.path.join(path_exp, domain_name) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths += facenet.get_image_paths(facedir) domain_unsupervised_dataset[domain_name] = facenet.ImageClass( domain_name, image_paths) return domain_unsupervised_dataset
def main(args): # load trained model model_path = os.path.join(args.model_dir, args.model_name) if args.loss == 'binary_crossentropy': model = load_model(model_path + '.h5') else: from train_utils import binary_crossentropy_custom import keras.activations keras.activations.custom_activation = binary_crossentropy_custom model = load_model(model_path + '.h5', custom_objects={'binary_crossentropy_custom': binary_crossentropy_custom}) # load Nx40 labels from mat file # mat file is located under the data directory loaded = loadmat('../data/label_all.mat') args.label_all = np.array(loaded['label']) # custom batch generator img_list = facenet.get_image_paths(os.path.join(args.data_dir, 'test')) assert len(img_list) > 0, 'The training set should not be empty' """ partition = {'test': img_list} # IDs params = {'dim': (args.data_dim, args.data_dim), 'batch_size': args.batch_size, 'shuffle': False} test_generator = DataGenerator(partition['test'], args.label_all, **params) prediction = model.predict_generator(test_generator, use_multiprocessing=False) # prediction = model.predict_generator(test_generator, use_multiprocessing=False, verbose=0) """ prediction = compute_attr_embedding(model, path=img_list) # compute label-wise performance truth = get_label_from_filename(img_list, args.label_all) evaluate_multilab(prediction, truth, save_name=model_path)
def main(args): print('Creating networks and loading parameters') input_images = facenet.load_data(facenet.get_image_paths(args.input_dir), do_random_crop=False, do_random_flip=False, image_size=args.image_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess: print('Loading feature extraction model') feature_net = FaceNet(sess, args) batch_count = len(input_images) // args.batch_size if len(input_images) % args.batch_size != 0: batch_count += 1 all_embeddings = [] for i in range(batch_count): batch_images = input_images[i*args.batch_size:(i+1)*args.batch_size] embeddings = feature_net.extract_feature(batch_images) if all_embeddings == []: all_embeddings = embeddings else: all_embeddings = np.concatenate([all_embeddings, embeddings]) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) face_cluster = FaceClustering(num_clusters=args.num_clusters) labels = face_cluster.clustering(all_embeddings) for i in range(len(labels)): if not os.path.exists(os.path.join(args.output_dir, str(labels[i]))): os.mkdir(os.path.join(args.output_dir, str(labels[i]))) misc.imsave(os.path.join(args.output_dir, str(labels[i]), str(i) + '.jpg'), input_images[i])
def update_idtoname(self): self.image_paths = facenet.get_image_paths(self.face_lib_dir) for i, image_path in enumerate(self.image_paths): image_id = os.path.splitext( os.path.split(image_path)[1])[0].split('_')[1] image_name = os.path.splitext( os.path.split(image_path)[1])[0].split('_')[0] self.idtoname[image_id] = [image_name, image_path]
def get_BatchGenerator(args): # prepare python batch generators for training and validation if 'celeba' in args.data_dir: img_list_train = facenet.get_image_paths( os.path.join(args.data_dir, 'train')) img_list_val = facenet.get_image_paths( os.path.join(args.data_dir, 'validation')) assert len(img_list_train) > 0, 'The training set should not be empty' partition = { 'train': img_list_train, 'validation': img_list_val } # IDs params = { 'dim': (args.data_dim, args.data_dim), 'batch_size': args.batch_size, 'shuffle': True } train_generator = DataGenerator(partition['train'], args.label_all, **params) validation_generator = DataGenerator(partition['validation'], args.label_all, **params) else: from keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.1, zoom_range=0.1, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( os.path.join(args.data_dir, 'train'), target_size=(args.data_dim, args.data_dim), batch_size=args.batch_size, class_mode='categorical') validation_generator = test_datagen.flow_from_directory( os.path.join(args.data_dir, 'validation'), target_size=(args.data_dim, args.data_dim), batch_size=args.batch_size, class_mode='categorical') return train_generator, validation_generator
def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) # 把path中包含的"~"和"~user"转换成用户目录 facedir = os.path.join(path_exp, "nil") image_paths = facenet.get_image_paths(facedir) dataset.append(facenet.ImageClass("nil", image_paths)) return dataset
def main(args): images = load_and_align_data(args.image_files, args.image_size, args.margin, args.gpu_memory_fraction) with tf.Graph().as_default(): with tf.Session() as sess: # Load the model facenet.load_model(args.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") # Run forward pass to calculate embeddings feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb = sess.run(embeddings, feed_dict=feed_dict) path_exp = os.path.expanduser(args.image_files) image_paths = facenet.get_image_paths(path_exp) nrof_images = len(image_paths) print('Images:') for i in range(nrof_images): print('%1d: %s' % (i, image_paths[i])) print('') # Print distance matrix print('Distance matrix') print(' ', end='') for i in range(nrof_images): print(' %1d ' % i, end='') print('') for i in range(nrof_images): print('%1d ' % i, end='') for j in range(nrof_images): dist = np.sqrt( np.sum(np.square(np.subtract(emb[i, :], emb[j, :])))) print(' %1.4f ' % dist, end='') print('')
def get_supervised_dataset_single(path, nrof_data_augmentation): path_exp = os.path.expanduser(path) dataset = [] path_dir_exp = os.path.join(path_exp) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir, nrof_data_augmentation) dataset.append(facenet.ImageClass(class_name, image_paths)) # logger.debug(dataset) return dataset
def main(args): if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess: print('Loading feature extraction model') feature_net = FaceNet(sess, args) dir_list = [name for name in os.listdir(args.input_dir) if os.path.isdir(os.path.join(args.input_dir, name))] for dir_name in dir_list: input_dir = os.path.join(args.input_dir, dir_name) input_images = facenet.load_data(facenet.get_image_paths(input_dir), do_random_crop=False, do_random_flip=False, image_size=args.image_size) batch_count = len(input_images) // args.batch_size if len(input_images) % args.batch_size != 0: batch_count += 1 all_embeddings = [] for i in range(batch_count): batch_images = input_images[i*args.batch_size:(i+1)*args.batch_size] embeddings = feature_net.extract_feature(batch_images) if all_embeddings == []: all_embeddings = embeddings else: all_embeddings = np.concatenate([all_embeddings, embeddings]) if not os.path.exists(os.path.join(args.output_dir, dir_name)): os.mkdir(os.path.join(args.output_dir, dir_name)) mean_embedding = np.mean(all_embeddings, axis=0) distance = np.mean(np.square(all_embeddings - mean_embedding), axis=1) if len(all_embeddings) < args.k: selection = len(all_embeddings) else: selection = args.k minimum_k = np.argpartition(distance, selection)[:selection] count = 0 for idx in minimum_k: misc.imsave(os.path.join(args.output_dir, dir_name, '%d.jpg' % count), input_images[idx]) count += 1
def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) classes = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): # 一个class代表一个people class_name = classes[i] peopledir = os.path.join(path_exp, class_name) videos= os.listdir(peopledir) videoset= [] for video in videos: videopath = os.path.join(peopledir, video) image_paths = facenet.get_image_paths(videopath) video_paths = ImageClass(video, image_paths) videoset.append(video_paths) dataset.append(VideoClass(class_name, videoset)) return dataset
def get_dataset(path, has_class_directories=True): datadict = {} path_exp = os.path.expanduser(path) dirs = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] dirs.sort() for k in range(len(dirs)): dataset = [] path_dir_exp = os.path.join(path_exp, dirs[k]) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir) dataset.append(facenet.ImageClass(class_name, image_paths)) datadict[dirs[k]] = dataset return datadict
def load_and_align_data(image_files, image_size, margin, gpu_memory_fraction): # TODO: face detection code (4week-1day ex) ######################################### # TODO: set the parameters (minsize, threshold, scale factor) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor ########################################## print('Creating networks and loading parameters') # TODO: create MT-CNN (P-net, R-net, O-net) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) path_exp = os.path.expanduser(image_files) image_paths = facenet.get_image_paths(path_exp) nrof_samples = len(image_paths) img_list = [None] * nrof_samples for i in range(nrof_samples): img = cv2.imread(image_paths[i]) bounding_boxes, landmarks = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) landmark = landmarks aligned = face_alignment(img, image_size, landmark) # TODO: face detection and alignment prewhitened = facenet.prewhiten(aligned) # to reduce the impact of lights to minimize the effect of lights img_list[i] = prewhitened images = np.stack(img_list) return images
def get_supervised_dataset_multiple(path, nrof_data_augmentation): domain_supervised_dataset = {} path_exp = os.path.expanduser(path) domains = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] domains.sort() # # merge identical person under "id" and "camera" # def insert_image_paths(class_name, image_paths): # for key, value in domain_supervised_dataset.items(): # for cls in value: # if class_name == cls.name: # cls.image_paths += image_paths # return True # return False for domain_name in domains: dataset = [] path_dir_exp = os.path.join(path_exp, domain_name) classes = [path for path in os.listdir(path_dir_exp) \ if os.path.isdir(os.path.join(path_dir_exp, path))] classes.sort() nrof_classes = len(classes) # logger.debug('classes: %s' % (classes)) # logger.debug('domain_name: %s, nrof_classes: %d' % (domain_name, nrof_classes)) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_dir_exp, class_name) image_paths = facenet.get_image_paths(facedir, nrof_data_augmentation) # if insert_image_paths(class_name, image_paths) is False: dataset.append(facenet.ImageClass(class_name, image_paths)) if len(dataset) > 0: domain_supervised_dataset[domain_name] = dataset return domain_supervised_dataset
def main(args): img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) vae_checkpoint = os.path.expanduser(args.vae_checkpoint) fields, attribs_dict = read_annotations(args.annotations_filename) vae_def = importlib.import_module(args.vae_def) vae = vae_def.Vae(args.latent_var_size) gen_image_size = vae.get_image_size() with tf.Graph().as_default(),tf.device('/device:GPU:0'): tf.set_random_seed(args.seed) image_list = facenet.get_image_paths(os.path.expanduser(args.data_dir)) # Get attributes for images nrof_attributes = len(fields) attribs_list = [] for img in image_list: key = os.path.split(img)[1].split('.')[0] attr = attribs_dict[key] assert len(attr)==nrof_attributes attribs_list.append(attr) # Create the input queue index_list = range(len(image_list)) input_queue = tf.train.slice_input_producer([image_list, attribs_list, index_list], num_epochs=1, shuffle=False) nrof_preprocess_threads = 4 image_per_thread = [] for _ in range(nrof_preprocess_threads): filename = input_queue[0] file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) image = tf.image.resize_image_with_crop_or_pad(image, 160, 160) #image = tf.image.resize_images(image, (64,64)) image.set_shape((args.image_size, args.image_size, 3)) attrib = input_queue[1] attrib.set_shape((nrof_attributes,)) image = tf.cast(image, tf.float32) image_per_thread.append([image, attrib, input_queue[2]]) images, attribs, indices = tf.train.batch_join( image_per_thread, batch_size=args.batch_size, shapes=[(args.image_size, args.image_size, 3), (nrof_attributes,), ()], enqueue_many=False, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) # Normalize images_norm = (images-img_mean) / img_stddev # Resize to appropriate size for the encoder images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) # Create encoder network mean, log_variance = vae.encoder(images_norm_resize, True) epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) std = tf.exp(log_variance/2) latent_var = mean + epsilon * std # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Start running operations on the Graph gpu_memory_fraction = 1.0 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if vae_checkpoint: print('Restoring VAE checkpoint: %s' % vae_checkpoint) saver.restore(sess, vae_checkpoint) nrof_images = len(image_list) nrof_batches = int(math.ceil(len(image_list) / args.batch_size)) latent_vars = np.zeros((nrof_images, args.latent_var_size)) attributes = np.zeros((nrof_images, nrof_attributes)) for i in range(nrof_batches): start_time = time.time() latent_var_, attribs_, indices_ = sess.run([latent_var, attribs, indices]) latent_vars[indices_,:] = latent_var_ attributes[indices_,:] = attribs_ duration = time.time() - start_time print('Batch %d/%d: %.3f seconds' % (i+1, nrof_batches, duration)) # NOTE: This will print the 'Out of range' warning if the last batch is not full, # as described by https://github.com/tensorflow/tensorflow/issues/8330 # Calculate average change in the latent variable when each attribute changes attribute_vectors = np.zeros((nrof_attributes, args.latent_var_size), np.float32) for i in range(nrof_attributes): pos_idx = np.argwhere(attributes[:,i]==1)[:,0] neg_idx = np.argwhere(attributes[:,i]==-1)[:,0] pos_avg = np.mean(latent_vars[pos_idx,:], 0) neg_avg = np.mean(latent_vars[neg_idx,:], 0) attribute_vectors[i,:] = pos_avg - neg_avg filename = os.path.expanduser(args.output_filename) print('Writing attribute vectors, latent variables and attributes to %s' % filename) mdict = {'latent_vars':latent_vars, 'attributes':attributes, 'fields':fields, 'attribute_vectors':attribute_vectors } with h5py.File(filename, 'w') as f: for key, value in iteritems(mdict): f.create_dataset(key, data=value)
def load_and_align_data(image_files, image_size, margin, gpu_memory_fraction): # TODO: set the parameters (minsize, threshold, scale factor) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') # TODO: create MT-CNN (P-net, R-net, O-net) with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) path_exp = os.path.expanduser(image_files) image_paths = facenet.get_image_paths(path_exp) nrof_samples = len(image_paths) img_list = [None] * nrof_samples for i in range(nrof_samples): # TODO: face detection and alignment img = cv2.imread(image_paths[i]) bounding_boxes, landmarks = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] landmark = landmarks img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] landmark = landmark[:, index] det = np.squeeze(det) landmark = np.squeeze(landmark) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = face_alignment(cropped, image_size, landmark) prewhitened = facenet.prewhiten(aligned) img_list[i] = prewhitened images = np.stack(img_list) return images
def main(args): img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) vae_checkpoint = os.path.expanduser(args.vae_checkpoint) fields, attribs_dict = read_annotations(args.annotations_filename) vae_def = importlib.import_module(args.vae_def) vae = vae_def.Vae(args.latent_var_size) gen_image_size = vae.get_image_size() with tf.Graph().as_default(): tf.set_random_seed(args.seed) image_list = facenet.get_image_paths(os.path.expanduser(args.data_dir)) # Get attributes for images nrof_attributes = len(fields) attribs_list = [] for img in image_list: key = os.path.split(img)[1].split('.')[0] attr = attribs_dict[key] assert len(attr)==nrof_attributes attribs_list.append(attr) # Create the input queue index_list = range(len(image_list)) input_queue = tf.train.slice_input_producer([image_list, attribs_list, index_list], num_epochs=1, shuffle=False) nrof_preprocess_threads = 4 image_per_thread = [] for _ in range(nrof_preprocess_threads): filename = input_queue[0] file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) image = tf.image.resize_image_with_crop_or_pad(image, 160, 160) #image = tf.image.resize_images(image, (64,64)) image.set_shape((args.image_size, args.image_size, 3)) attrib = input_queue[1] attrib.set_shape((nrof_attributes,)) image = tf.cast(image, tf.float32) image_per_thread.append([image, attrib, input_queue[2]]) images, attribs, indices = tf.train.batch_join( image_per_thread, batch_size=args.batch_size, shapes=[(args.image_size, args.image_size, 3), (nrof_attributes,), ()], enqueue_many=False, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) # Normalize images_norm = (images-img_mean) / img_stddev # Resize to appropriate size for the encoder images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) # Create encoder network mean, log_variance = vae.encoder(images_norm_resize, True) epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) std = tf.exp(log_variance/2) latent_var = mean + epsilon * std # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Start running operations on the Graph gpu_memory_fraction = 1.0 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if vae_checkpoint: print('Restoring VAE checkpoint: %s' % vae_checkpoint) saver.restore(sess, vae_checkpoint) nrof_images = len(image_list) nrof_batches = int(math.ceil(len(image_list) / args.batch_size)) latent_vars = np.zeros((nrof_images, args.latent_var_size)) attributes = np.zeros((nrof_images, nrof_attributes)) for i in range(nrof_batches): start_time = time.time() latent_var_, attribs_, indices_ = sess.run([latent_var, attribs, indices]) latent_vars[indices_,:] = latent_var_ attributes[indices_,:] = attribs_ duration = time.time() - start_time print('Batch %d/%d: %.3f seconds' % (i+1, nrof_batches, duration)) # NOTE: This will print the 'Out of range' warning if the last batch is not full, # as described by https://github.com/tensorflow/tensorflow/issues/8330 # Calculate average change in the latent variable when each attribute changes attribute_vectors = np.zeros((nrof_attributes, args.latent_var_size), np.float32) for i in range(nrof_attributes): pos_idx = np.argwhere(attributes[:,i]==1)[:,0] neg_idx = np.argwhere(attributes[:,i]==-1)[:,0] pos_avg = np.mean(latent_vars[pos_idx,:], 0) neg_avg = np.mean(latent_vars[neg_idx,:], 0) attribute_vectors[i,:] = pos_avg - neg_avg filename = os.path.expanduser(args.output_filename) print('Writing attribute vectors, latent variables and attributes to %s' % filename) mdict = {'latent_vars':latent_vars, 'attributes':attributes, 'fields':fields, 'attribute_vectors':attribute_vectors } with h5py.File(filename, 'w') as f: for key, value in mdict.iteritems(): f.create_dataset(key, data=value)
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = '5' model_dir = '/net/per920a/export/das14a/satoh-lab/wangz/ins2018/src/facenet-master/model/' image_list_file_dir = '/net/per920a/export/das14a/satoh-lab/wangz/ins2018/face_dict/dict.txt' input_dir = '/net/per920a/export/das14a/satoh-lab/wangz/ins2018/face_dict/' output_dir = '/net/per920a/export/das14a/satoh-lab/wangz/ins2018/aligned_face_dict/' output_feature_dir = '/net/per920a/export/das14a/satoh-lab/wangz/ins2018/data/face_dict/dict_single_feature.npy' detect_multiple_faces = False margin = 44 image_size = 182 minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') g1 = tf.Graph() # detect and align g2 = tf.Graph() # feature with g1.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess1 = tf.Session(graph=g1) with sess1.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess1, None) with g2.as_default(): sess2 = tf.Session(graph=g2) with sess2.as_default(): facenet.load_model(model_dir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('loading image path') output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) image_paths = facenet.get_image_paths(input_dir) # emb_feature = np.zeros((len(image_paths), embedding_size)) emb_feature = np.zeros((100, embedding_size)) img_id = 0 image_list_file = open(image_list_file_dir, 'r') for image_path in image_list_file.readlines(): image_path = image_path.strip('\n') image_path = image_path.strip() img = misc.imread(input_dir + image_path) # throw small image, and change gray image to color image if img.ndim < 2: print('Unable to align "%s"' % image_path) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] # face detection bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) # for each detected face for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') scaled = misc.imresize(scaled, (160, 160), interp='bilinear') # filename = os.path.splitext(os.path.split(image_path)[1])[0] # output_filename = os.path.join(output_dir, filename + '.png') # filename_base, file_extension = os.path.splitext(output_filename) # if detect_multiple_faces: # output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) # else: # output_filename_n = "{}{}".format(filename_base, file_extension) # misc.imsave(output_filename_n, scaled) scaled_reshape = [] pre_img = facenet.prewhiten(scaled) scaled_reshape.append(pre_img.reshape(-1, 160, 160, 3)) emb_temp = np.zeros((1, embedding_size)) emb_temp[0, :] = sess2.run(embeddings, feed_dict={images_placeholder: scaled_reshape[0], phase_train_placeholder: False})[0] emb_feature[img_id, :] = emb_temp[0, :] img_id = img_id+1 else: print('Unable to align "%s"' % image_path) np.save(output_feature_dir, emb_feature)
def align_frames(input_dir, output_dir, image_size=182, margin=44, gpu_memory_fraction=1.0): sleep(random.random()) output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_paths = facenet.get_image_paths(input_dir) tf.logging.set_verbosity(tf.logging.ERROR) with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face # first step shallow cnn to detect face windows # second step deep cnn to throw out non face windows # third step detect face landmarks # trying to raise second step threshold threshold = [0.6, 0.7, 0.7 ] # threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) bar = progressbar.ProgressBar(maxval=len(image_paths), widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if not os.path.exists(output_dir): os.makedirs(output_dir) for image_path in image_paths: nrof_images_total += 1 bar.update(nrof_images_total) filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename_prefix = os.path.join(output_dir, filename) # print(image_path) if not os.path.exists(output_filename_prefix): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: # print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename_prefix)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: for i in range(nrof_faces): # NEW det = bounding_boxes[i, 0:4] img_size = np.asarray(img.shape)[0:2] '''if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det = det[index,:]''' det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') nrof_successfully_aligned += 1 output_filename = output_filename_prefix + '_' + str( i) + '.png' misc.imsave(output_filename, scaled) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3])) else: # print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename_prefix)) bar.finish()