def check_dataset(dataset): for cls in dataset: assert ( len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset') paths, labels = facenet.get_image_paths_and_labels(dataset) print(paths) print(labels)
def process_data(self): dataset = facenet.get_dataset(self.data_dir) train_set, val_set = facenet.split_dataset(dataset, self.valid_ratio, self.min_images_per_class, self.mode) image_list, label_list = facenet.get_image_paths_and_labels(train_set) val_image_list, val_label_list = facenet.get_image_paths_and_labels( val_set) image_list = self.preprocess_image(image_list) val_image_list = self.preprocess_image(val_image_list) train_data = zip(image_list, label_list) val_data = zip(val_image_list, val_label_list) train_rdd = self.sc.parallelize(train_data) val_rdd = self.sc.parallelize(val_data) train_df = train_rdd.toDF(DATAINDEX) val_df = val_rdd.toDF(DATAINDEX) return train_df, val_df
def train(self, dataset_folder, model_name): dataset = facenet.get_dataset(dataset_folder) # Check that there are at least one training image per class # for cls in dataset: # assert(len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset') paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) print("Calculating embeddings for new data") data_emb = self.calculate_embeddings(paths, False) class_names = [cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(model_name, 'wb') as outfile: pickle.dump((data_emb, class_names, labels), outfile) print('Saved classifier model to file "%s"' % model_name)
def fit(self): self.model_status = 'TRAIN' self.data_dir = My_align_dataset_mtcnn(0, 160, 32, True, 0.25).output_dir with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=self.seed) if self.use_split_dataset: dataset_tmp = facenet.get_dataset(self.data_dir) train_set, test_set = self.split_dataset( dataset_tmp, self.min_nrof_images_per_class, self.nrof_train_images_per_class) if (self.model_status == 'TRAIN'): dataset = train_set elif (self.model_status == 'CLASSIFY'): dataset = test_set else: dataset = facenet.get_dataset(self.data_dir) # Check that there are at least one training image per class for cls in dataset: assert ( len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' ) self.paths, self.labels = facenet.get_image_paths_and_labels( dataset) print("paths", self.paths) print("labels", self.labels) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(self.paths)) # Load the model print('Loading feature extraction model', self.model) load_model_YesOrNo = facenet.load_model(self.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(self.paths) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / self.batch_size)) print(nrof_images, self.batch_size, nrof_batches_per_epoch, embedding_size) self.emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * self.batch_size end_index = min((i + 1) * self.batch_size, nrof_images) paths_batch = self.paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, self.image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } self.emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) print("shape", self.emb_array.shape) self.classifier_filename_exp = os.path.expanduser( self.classifier_filename) # Train classifier print('Training classifier') self.classify_model = SVC(kernel='linear', probability=True) self.classify_model.fit(self.emb_array, self.labels) # Create a list of class names self.class_names = [ cls.name.replace('_', ' ') for cls in dataset ]
def main(args): network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), "%Y%m%d-%H%M%S") log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) stat_file_name = os.path.join(log_dir, "stat.h5") # Write arguments to a text file fc.write_arguments_to_file(args, os.path.join(log_dir, "arguments.txt")) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) fc.store_revision_info(src_path, log_dir, " ".join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) dataset = fc.get_dataset(args.data_dir) if args.filter_filename: dataset = filter_dataset( dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class, ) if args.validation_set_split_ratio > 0.0: train_set, val_set = fc.split_dataset( dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, "SPLIT_IMAGES") else: train_set, val_set = dataset, [] nrof_classes = len(train_set) print("Model directory: %s" % model_dir) print("Log directory: %s" % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print("Pre-trained model: %s" % pretrained_model) if args.lfw_dir: print("LFW directory: %s" % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = fc.get_image_paths_and_labels(train_set) assert len(image_list) > 0, "The training set should not be empty" val_image_list, val_label_list = fc.get_image_paths_and_labels(val_set) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, "index_dequeue") learning_rate_placeholder = tf.placeholder(tf.float32, name="learning_rate") batch_size_placeholder = tf.placeholder(tf.int32, name="batch_size") phase_train_placeholder = tf.placeholder(tf.bool, name="phase_train") image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name="image_paths") labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name="labels") control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name="control") nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None, ) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name="enqueue_op") image_batch, label_batch = fc.create_input_pipeline( input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, "image_batch") image_batch = tf.identity(image_batch, "input") label_batch = tf.identity(label_batch, "label_batch") print("Number of classes in training set: %d" % nrof_classes) print("Number of examples in training set: %d" % len(image_list)) print("Number of classes in validation set: %d" % len(val_set)) print("Number of examples in validation set: %d" % len(val_image_list)) print("Building training graph") # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay, ) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope="Logits", reuse=False, ) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name="embeddings") # Norm for the prelogits eps = 1e-4 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) # Add center loss prelogits_center_loss, _ = fc.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True, ) tf.summary.scalar("learning_rate", learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name="cross_entropy_per_example") cross_entropy_mean = tf.reduce_mean(cross_entropy, name="cross_entropy") tf.add_to_collection("losses", cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name="total_loss") # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = fc.train( total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms, ) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print("Restoring pretrained model: %s" % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print("Running training") nrof_steps = args.max_nrof_epochs * args.epoch_size nrof_val_samples = int( math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs) ) # Validate every validate_every_n_epochs as well as in the last epoch stat = { "loss": np.zeros((nrof_steps, ), np.float32), "center_loss": np.zeros((nrof_steps, ), np.float32), "reg_loss": np.zeros((nrof_steps, ), np.float32), "xent_loss": np.zeros((nrof_steps, ), np.float32), "prelogits_norm": np.zeros((nrof_steps, ), np.float32), "accuracy": np.zeros((nrof_steps, ), np.float32), "val_loss": np.zeros((nrof_val_samples, ), np.float32), "val_xent_loss": np.zeros((nrof_val_samples, ), np.float32), "val_accuracy": np.zeros((nrof_val_samples, ), np.float32), "lfw_accuracy": np.zeros((args.max_nrof_epochs, ), np.float32), "lfw_valrate": np.zeros((args.max_nrof_epochs, ), np.float32), "learning_rate": np.zeros((args.max_nrof_epochs, ), np.float32), "time_train": np.zeros((args.max_nrof_epochs, ), np.float32), "time_validate": np.zeros((args.max_nrof_epochs, ), np.float32), "time_evaluate": np.zeros((args.max_nrof_epochs, ), np.float32), "prelogits_hist": np.zeros((args.max_nrof_epochs, 1000), np.float32), } for epoch in range(1, args.max_nrof_epochs + 1): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() cont = train( args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization, ) stat["time_train"][epoch - 1] = time.time() - t if not cont: break t = time.time() if len(val_image_list) > 0 and ( (epoch - 1) % args.validate_every_n_epochs == args.validate_every_n_epochs - 1 or epoch == args.max_nrof_epochs): validate( args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization, ) stat["time_validate"][epoch - 1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() if args.lfw_dir: evaluate( sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization, ) stat["time_evaluate"][epoch - 1] = time.time() - t print("Saving statistics") with h5py.File(stat_file_name, "w") as f: for key, value in stat.iteritems(): f.create_dataset(key, data=value) return model_dir
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=args.seed) if args.use_split_dataset: dataset_tmp = facenet.get_dataset(args.data_dir) train_set, test_set = split_dataset( dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) if (args.mode == 'TRAIN'): dataset = train_set elif (args.mode == 'CLASSIFY'): dataset = test_set else: dataset = facenet.get_dataset(args.data_dir) # Check that there are at least one training image per class for cls in dataset: assert ( len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' ) paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') facenet.load_model(args.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / args.batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * args.batch_size end_index = min((i + 1) * args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, args.image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) classifier_filename_exp = os.path.expanduser( args.classifier_filename) if (args.mode == 'TRAIN'): # Train classifier print('Training classifier') model = GaussianNB() #model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) # Create a list of class names class_names = [cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp) elif (args.mode == 'CLASSIFY'): # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) accuracy = np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy)
def classify(use_split_dataset, mode, data_dir, min_nrof_images_per_class, nrof_train_images_per_class, model, classifier_filename, batch_size, image_size): seed = 666 with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=seed) if use_split_dataset: dataset_tmp = facenet.get_dataset(data_dir) train_set, test_set = split_dataset( dataset_tmp, min_nrof_images_per_class, nrof_train_images_per_class) if (args.mode == 'TRAIN'): dataset = train_set elif (args.mode == 'CLASSIFY'): dataset = test_set else: dataset = facenet.get_dataset(data_dir) # Check that there are at least one training image per class for cls in dataset: assert ( len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' ) paths, labels = facenet.get_image_paths_and_labels(dataset) # print('Number of classes: %d' % len(dataset)) # print('Number of images: %d' % len(paths)) # Load the model # print('Loading feature extraction model') facenet.load_model(model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) classifier_filename_exp = os.path.expanduser(classifier_filename) res = [] # print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) res.append((i, class_names[best_class_indices[i]], best_class_probabilities[i])) accuracy = np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy) return res
def train(use_split_dataset, mode, data_dir, min_nrof_images_per_class, nrof_train_images_per_class, model, classifier_filename, batch_size, image_size): seed = 666 with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=seed) if use_split_dataset: dataset_tmp = facenet.get_dataset(data_dir) train_set, test_set = split_dataset( dataset_tmp, min_nrof_images_per_class, nrof_train_images_per_class) if (args.mode == 'TRAIN'): dataset = train_set elif (args.mode == 'CLASSIFY'): dataset = test_set else: dataset = facenet.get_dataset(data_dir) # Check that there are at least one training image per class for cls in dataset: assert ( len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' ) paths, labels = facenet.get_image_paths_and_labels(dataset) # print('Number of classes: %d' % len(dataset)) # print('Number of images: %d' % len(paths)) # Load the model # print('Loading feature extraction model') facenet.load_model(model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) classifier_filename_exp = os.path.expanduser(classifier_filename) print('Training classifier') model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) # Create a list of class names class_names = [cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp)
import sys import math import pickle from sklearn.svm import SVC from sklearn.cluster import KMeans import pandas as pd import cv2 import os with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=666) os.system("python facenet/src/align/align_dataset_mtcnn.py " + sys.argv[1] + " aligned --image_size 160 --margin 32 --random_order") dataset = facenet.get_dataset("aligned") paths, labels = facenet.get_image_paths_and_labels(dataset) facenet.load_model('20180402-114759.pb') images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] images = facenet.load_data(paths, False, False, 150) emb_array_1 = np.zeros((len(labels), embedding_size)) j = min(len(images) - 1, 500) i = 0 while j < len(images): feed_dict = { images_placeholder: images[i:j], phase_train_placeholder: False
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) # Removed lfw dir. Using custom dataset # if args.lfw_dir: # print('LFW directory: %s' % args.lfw_dir) # # Read the file containing the pairs used for testing # pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # # Get the paths for the corresponding images # lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The dataset should not be empty' # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = facenet.center_loss( prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection( tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) # saver.restore(sess, pretrained_model) facenet.load_model(pretrained_model) # Training and validation loop print('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # # Evaluate on LFW # if args.lfw_dir: # evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, # embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) return model_dir
def embeding(model_dir='20170512-110547', data_dir='database_aligned', is_aligned=True, image_size=160, margin=44, gpu_memory_fraction=1.0, image_batch=1000, embeddings_name='embeddings.npy', labels_name='labels.npy', labels_strings_name='label_strings.npy', return_image_list=False): train_set = facenet.get_dataset(data_dir) image_list, label_list = facenet.get_image_paths_and_labels(train_set) # fetch the classes (labels as strings) exactly as it's done in get_dataset path_exp = os.path.expanduser(data_dir) classes = [ path for path in os.listdir(path_exp) if os.path.isdir(os.path.join(path_exp, path)) ] # get the label strings label_strings = [ name for name in classes if os.path.isdir(os.path.join(path_exp, name)) ] with tf.Graph().as_default(): with tf.Session() as sess: # Load model facenet.load_model(model_dir) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") # noqa: E501 embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") # noqa: E501 phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") # Run forward pass to calculate embeddings nrof_images = len(image_list) print('Number of images: ', nrof_images) batch_size = image_batch if nrof_images % batch_size == 0: nrof_batches = nrof_images // batch_size else: nrof_batches = (nrof_images // batch_size) + 1 print('Number of batches: ', nrof_batches) embedding_size = embeddings.get_shape()[1] emb_array = np.zeros((nrof_images, embedding_size)) start_time = time.time() for i in range(nrof_batches): if i == nrof_batches - 1: n = nrof_images else: n = i * batch_size + batch_size # Get images for the batch if is_aligned is True: images = facenet.load_data(image_list[i * batch_size:n], False, False, image_size) else: images = load_and_align_data(image_list[i * batch_size:n], image_size, margin, gpu_memory_fraction) feed_dict = { images_placeholder: images, phase_train_placeholder: False } # Use the facenet model to calculate embeddings embed = sess.run(embeddings, feed_dict=feed_dict) emb_array[i * batch_size:n, :] = embed print('Completed batch', i + 1, 'of', nrof_batches) run_time = time.time() - start_time print('Run time: ', run_time) # export embeddings and labels label_list = np.array(label_list) np.save(embeddings_name, emb_array) if emb_array.size > 0: labels_final = (label_list) - np.min(label_list) np.save(labels_name, labels_final) label_strings = np.array(label_strings) np.save(labels_strings_name, label_strings[labels_final]) np.save('image_list.npy', image_list) if return_image_list: np.save('validation_image_list.npy', image_list) return image_list, emb_array
def like_or_dislike_users(self, users): # automatically like or dislike users based on your previously trained # model on your historical preference. # facenet settings from export_embeddings.... data_dir = 'temp_images_aligned' embeddings_name = 'temp_embeddings.npy' # labels_name = 'temp_labels.npy' # labels_strings_name = 'temp_label_strings.npy' is_aligned = True image_size = 160 margin = 44 gpu_memory_fraction = 1.0 image_batch = 1000 prev_user = None for user in users: clean_temp_images() urls = user.get_photos(width='640') image_list = download_url_photos(urls, user.id, is_temp=True) # align the database tindetheus_align.main(input_dir='temp_images', output_dir='temp_images_aligned') # export the embeddings from the aligned database train_set = facenet.get_dataset(data_dir) image_list_temp, label_list = facenet.get_image_paths_and_labels( train_set) # noqa: E501 # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") # noqa: E501 embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") # noqa: E501 phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") # noqa: E501 # Run forward pass to calculate embeddings nrof_images = len(image_list_temp) print('Number of images: ', nrof_images) batch_size = image_batch if nrof_images % batch_size == 0: nrof_batches = nrof_images // batch_size else: nrof_batches = (nrof_images // batch_size) + 1 print('Number of batches: ', nrof_batches) embedding_size = embeddings.get_shape()[1] emb_array = np.zeros((nrof_images, embedding_size)) start_time = time.time() for i in range(nrof_batches): if i == nrof_batches - 1: n = nrof_images else: n = i * batch_size + batch_size # Get images for the batch if is_aligned is True: images = facenet.load_data( image_list_temp[i * batch_size:n], # noqa: E501 False, False, image_size) else: images = load_and_align_data( image_list_temp[i * batch_size:n], # noqa: E501 image_size, margin, gpu_memory_fraction) feed_dict = { images_placeholder: images, phase_train_placeholder: False } # Use the facenet model to calculate embeddings embed = self.sess.run(embeddings, feed_dict=feed_dict) emb_array[i * batch_size:n, :] = embed print('Completed batch', i + 1, 'of', nrof_batches) run_time = time.time() - start_time print('Run time: ', run_time) # export embeddings and labels label_list = np.array(label_list) np.save(embeddings_name, emb_array) if emb_array.size > 0: # calculate the n average embedding per profiles X = calc_avg_emb_temp(emb_array) # evaluate on the model yhat = self.model.predict(X) if yhat[0] == 1: didILike = 'Like' # check to see if this is the same user as before if prev_user == user.id: clean_temp_images_aligned() print('\n\n You have already liked this user!!! \n \n') print('This typically means you have used all of your' ' free likes. Exiting program!!! \n\n') self.likes_left = -1 return else: prev_user = user.id else: didILike = 'Dislike' else: # there were no faces in this profile didILike = 'Dislike' print('**************************************************') print(user.name, user.age, didILike) print('**************************************************') dbase_names = move_images_temp(image_list, user.id) if didILike == 'Like': print(user.like()) self.likes_left -= 1 else: print(user.dislike()) userList = [ user.id, user.name, user.age, user.bio, user.distance_km, user.jobs, user.schools, user.get_photos(width='640'), dbase_names, didILike ] self.al_database.append(userList) np.save('al_database.npy', self.al_database) clean_temp_images_aligned()