Exemple #1
0
def split_dataset(dataset, min_nrof_images_per_class,
                  nrof_test_images_per_class):
    """
    split_dataset - function to split the dataset into a train set and a test set

    args    dataset - dataset to be split
            min_nrof_images_per_class - minimum num of images required for a class to be used
            nrof_train_images_per_class - num of images used for training within a class

    returns train_set - dataset for training
            test_set - dataset for testing
            num_classes - number of classes
    """

    train_set = []
    test_set = []
    for cls in dataset:
        paths = cls.image_paths
        # Remove classes with less than min_nrof_images_per_class
        if len(paths) >= min_nrof_images_per_class:
            np.random.shuffle(paths)
            test_set.append(
                facenet.ImageClass(cls.name,
                                   paths[:nrof_test_images_per_class]))
            train_set.append(
                facenet.ImageClass(cls.name,
                                   paths[nrof_test_images_per_class:]))

    num_classes = len(test_set)
    print('Classes (Number of Faces): %d' % num_classes)

    return train_set, test_set, num_classes
Exemple #2
0
def split_dataset(dataset, min_nrof_images_per_class,
                  nrof_train_images_per_class):
    train_set = []
    test_set = []
    print("Inside split_dataset function call!")
    print(min_nrof_images_per_class)
    print(nrof_train_images_per_class)

    for cls in dataset:
        paths = cls.image_paths
        # Remove classes with less than min_nrof_images_per_class
        if len(paths) >= min_nrof_images_per_class:
            np.random.shuffle(paths)
            ### Redefining the ImageClass object again from before ###
            ### Get the first nrof_train_images_per_class for training and the final rest for testing ###
            train_set.append(
                facenet.ImageClass(cls.name,
                                   paths[:nrof_train_images_per_class]))
            test_set.append(
                facenet.ImageClass(cls.name,
                                   paths[nrof_train_images_per_class:]))

    print("Len of training set: ", len(train_set))
    print("Len of testing set: ", len(test_set))
    return train_set, test_set
Exemple #3
0
def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class):
    train_set = []
    test_set = []
    for cls in dataset:
        paths = cls.image_paths
        # Remove classes with less than min_nrof_images_per_class
        if len(paths)>=min_nrof_images_per_class:
            np.random.shuffle(paths)
            train_set.append(facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class]))
            test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:]))
    return train_set, test_set
def Remove_duplicate_names(args, dataset):
    image_del = 0
    name_deleted = []
    stak_host = []
    dataset_n = []
    features_filename = os.path.expanduser(args.features_filename)

    print("load_features from file :'%s'" % args.features_filename)
    with open(features_filename, 'rb') as infile:
        (emb_old, labels_old, class_old) = pickle.load(infile)

    if len(class_old) > 0:
        for cls in dataset:
            name = cls.name.replace('_', ' ')  #get the class name
            if name in class_old:  #find
                name_deleted.append(name)
                image_del += len(cls.image_paths)

            else:
                dataset_n.append(facenet.ImageClass(cls.name, cls.image_paths))

    if image_del > 0:

        #Remove duplicate names in the deletion list
        ResultList = sorted(set(name_deleted),
                            key=lambda x: name_deleted.index(x))
        print(
            "About (%s) files of data were deleted for about '%s' people whose names already existd"
            % image_del, len(ResultList))

    else:
        print("No one has been deleted(%s)" % len(name_deleted))
    stak_host = emb_old, labels_old, class_old
    return stak_host, dataset
Exemple #5
0
def get_unsupervised_dataset(path):
    domain_unsupervised_dataset = {}
    path_exp = os.path.expanduser(path)
    domains = [path for path in os.listdir(path_exp) \
               if os.path.isdir(os.path.join(path_exp, path))]
    domains.sort()

    for domain_name in domains:
        if domain_name != "id+camera":
            facedir = os.path.join(path_exp, domain_name)
            image_paths = facenet.get_image_paths(facedir)
            for i in range(len(image_paths) - 1, -1,
                           -1):  # for i in range(0, num_list.__len__())[::-1]
                extname = os.path.splitext(os.path.split(image_paths[i])[1])[1]
                if extname not in ['.jpg', '.png']:
                    image_paths.pop(i)

            path_dir_exp = os.path.join(path_exp, domain_name)
            classes = [path for path in os.listdir(path_dir_exp) \
                       if os.path.isdir(os.path.join(path_dir_exp, path))]
            classes.sort()
            nrof_classes = len(classes)
            for i in range(nrof_classes):
                class_name = classes[i]
                facedir = os.path.join(path_dir_exp, class_name)
                image_paths += facenet.get_image_paths(facedir)

            domain_unsupervised_dataset[domain_name] = facenet.ImageClass(
                domain_name, image_paths)

    return domain_unsupervised_dataset
Exemple #6
0
def get_dataset(path, has_class_directories=True):
    dataset = []
    path_exp = os.path.expanduser(path)  # 把path中包含的"~"和"~user"转换成用户目录

    facedir = os.path.join(path_exp, "nil")
    image_paths = facenet.get_image_paths(facedir)
    dataset.append(facenet.ImageClass("nil", image_paths))

    return dataset
Exemple #7
0
def get_supervised_dataset_single(path, nrof_data_augmentation):
    path_exp = os.path.expanduser(path)

    dataset = []
    path_dir_exp = os.path.join(path_exp)
    classes = [path for path in os.listdir(path_dir_exp) \
               if os.path.isdir(os.path.join(path_dir_exp, path))]
    classes.sort()
    nrof_classes = len(classes)
    for i in range(nrof_classes):
        class_name = classes[i]
        facedir = os.path.join(path_dir_exp, class_name)
        image_paths = facenet.get_image_paths(facedir, nrof_data_augmentation)
        dataset.append(facenet.ImageClass(class_name, image_paths))

    # logger.debug(dataset)
    return dataset
def get_dataset(path, has_class_directories=True):
    datadict = {}
    path_exp = os.path.expanduser(path)

    dirs = [path for path in os.listdir(path_exp) \
               if os.path.isdir(os.path.join(path_exp, path))]
    dirs.sort()
    for k in range(len(dirs)):
        dataset = []
        path_dir_exp = os.path.join(path_exp, dirs[k])
        classes = [path for path in os.listdir(path_dir_exp) \
                   if os.path.isdir(os.path.join(path_dir_exp, path))]
        classes.sort()
        nrof_classes = len(classes)
        for i in range(nrof_classes):
            class_name = classes[i]
            facedir = os.path.join(path_dir_exp, class_name)
            image_paths = facenet.get_image_paths(facedir)
            dataset.append(facenet.ImageClass(class_name, image_paths))
        datadict[dirs[k]] = dataset

    return datadict
def get_dataset(paths):
    dataset = []
    for path in paths.split(':'):
        path_exp = os.path.expanduser(path)
        classes = os.listdir(path_exp)
        print('Number of classes:', len(classes))
        #classes.sort()
        nrof_classes = len(classes)
        for pclass in classes:
            classDir = os.path.join(path_exp, pclass)
            if os.path.isdir(classDir):
                videos = os.listdir(classDir)
                for vid in videos:
                    videoDir = os.path.join(classDir, vid)
                    if os.path.isdir(videoDir):
                        images = os.listdir(videoDir)[:100]
                        image_paths = [
                            os.path.join(videoDir, img) for img in images
                        ]
                        dataset.append(
                            facenet.ImageClass(os.path.join(pclass, vid),
                                               image_paths))
    return dataset
Exemple #10
0
def get_supervised_dataset_multiple(path, nrof_data_augmentation):
    domain_supervised_dataset = {}
    path_exp = os.path.expanduser(path)
    domains = [path for path in os.listdir(path_exp) \
               if os.path.isdir(os.path.join(path_exp, path))]
    domains.sort()

    # # merge identical person under "id" and "camera"
    # def insert_image_paths(class_name, image_paths):
    #     for key, value in domain_supervised_dataset.items():
    #         for cls in value:
    #             if class_name == cls.name:
    #                 cls.image_paths += image_paths
    #                 return True
    #     return False

    for domain_name in domains:
        dataset = []
        path_dir_exp = os.path.join(path_exp, domain_name)
        classes = [path for path in os.listdir(path_dir_exp) \
                   if os.path.isdir(os.path.join(path_dir_exp, path))]
        classes.sort()
        nrof_classes = len(classes)
        # logger.debug('classes: %s' % (classes))
        # logger.debug('domain_name: %s, nrof_classes: %d' % (domain_name, nrof_classes))
        for i in range(nrof_classes):
            class_name = classes[i]
            facedir = os.path.join(path_dir_exp, class_name)
            image_paths = facenet.get_image_paths(facedir,
                                                  nrof_data_augmentation)
            # if insert_image_paths(class_name, image_paths) is False:
            dataset.append(facenet.ImageClass(class_name, image_paths))
        if len(dataset) > 0:
            domain_supervised_dataset[domain_name] = dataset

    return domain_supervised_dataset
Exemple #11
0
def train():
   # ask for the folder names all the time no function parameters to be passes at any given time.
   # Todo : split the datsset if the user says so and then ask for the test also if yes then call the test function according to the split set results. If split set no then on the whole dataset.
   
   path = input("\nEnter the path to the face images directory inside which multiple user folders are present or press ENTER if the default created output folder is present in this code directory only: ")
   if path == "":
      path = 'output'

   gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip()
   
   ''' 
   if gpu_fraction == "":
      gpu_fraction = 0.8
   else:
      gpu_fraction = round(float(gpu_fraction), 1)
   '''

   model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip()
   if model == "":
      model = "20180402-114759/20180402-114759.pb"
   else:
      model += "/20180402-114759/20180402-114759.pb"

   batch_size = 90
   ask = input("\nEnter the batch size of images to process at once OR press ENTER for default 90: ").rstrip().lstrip()
   if ask != "":
     batch_size = int(ask)

   image_size = 160
   ask = input("\nEnter the width_size of face images OR press ENTER for default 160: ").rstrip().lstrip()
   if ask != "":
     image_size = int(ask)

   classifier_filename = input("Enter the output SVM classifier filename OR press ENTER for default name= classifier: ")
   if classifier_filename == "":
      classifier_filename = 'classifier.pkl'
   else:
      classifier_filename += '.pkl'
   classifier_filename = os.path.expanduser(classifier_filename)

   split_dataset = input("\nPress Y if you want to split the dataset for Training and Testing: ").rstrip().lstrip().lower()

   # If yes ask for the percentage of training and testing division.
   percentage = 70
   if split_dataset == 'y':
      ask = input("\nEnter the percentage of training dataset for splitting OR press ENTER for default 70: ").rstrip().lstrip()
      if ask != "":
        percentage = float(ask)

   min_nrof_images_per_class = 0
   ask = input("\nEnter the minimum number of images that much be present for a single user to include him for classification. Press ENTER for default value 0: ")
   if ask != "":
     min_nrof_images_per_class = int(ask)

   dataset = facenet.get_dataset(path)
   train_set = []
   test_set = []
   
   if split_dataset == 'y':
     for cls in dataset:
         paths = cls.image_paths
         # Remove classes with less than min_nrof_images_per_class
         if len(paths) >= min_nrof_images_per_class:
            np.random.shuffle(paths)

            # Find the number of images in training set and testing set images for this class
            no_train_images = int(percentage * len(paths) * 0.01)

            train_set.append(facenet.ImageClass(cls.name, paths[:no_train_images]))
            test_set.append(facenet.ImageClass(cls.name, paths[no_train_images:]))
     
     ''' Check that there are at least one training image per class
     for cls in train_set:
        assert(len(cls.image_paths)>0, '\nUnable to have at least one image in train set for one of the class. Change parameter values.')
     for cls in test_set:
        assert(len(cls.image_paths)>0, '\nUnable to have at least one image in test set for one of the class. Change parameter values.')

   else:
       # Check that there are at least one training image per class
       for cls in dataset:
          assert(len(cls.image_paths)>0, '\nThere must be at least one image for each class in the dataset')
     '''
   paths_train = []
   labels_train = []
   paths_test = []
   labels_test = []
   emb_array = []
   class_names = []

   if split_dataset == 'y':
      paths_train, labels_train = facenet.get_image_paths_and_labels(train_set)
      paths_test, labels_test = facenet.get_image_paths_and_labels(test_set)
      print('\nNumber of classes: %d' % len(train_set))
      print('\nNumber of images in TRAIN set: %d' % len(paths_train))
      print('\nNumber of images in TEST set: %d' % len(paths_test))
   else:
      paths_train, labels_train = facenet.get_image_paths_and_labels(dataset)  
      print('\nNumber of classes: %d' % len(dataset))
      print('\nNumber of images: %d' % len(paths_train))

   # Find embedding
   emb_array = get_embeddings(model, paths_train, batch_size, image_size)

   # Train the classifier
   print('\nTraining classifier')
   model_svc = SVC(kernel='linear', probability=True)
   model_svc.fit(emb_array, labels_train)

   # Create a list of class names
   if split_dataset == 'y':
      class_names = [ cls.name.replace('_', ' ') for cls in train_set]
   else:
      class_names = [cls.name.replace('_', ' ') for cls in dataset]

   # Saving classifier model
   with open(classifier_filename, 'wb') as outfile:
        pickle.dump((model_svc, class_names), outfile)
  
   print('\nSaved classifier model to file: "%s"' % classifier_filename)
   
   if split_dataset == 'y':
     # Find embedding for test data
     emb_array = get_embeddings(model, paths_test, batch_size, image_size)
     
     # Call test on the test set.
     test(classifier_filename, emb_array, labels_test, model, batch_size, image_size)

   else:
     # Ask the user to test or not on the whole dataset
     ask = input("Press y if you want to run the TEST on whole dataset or press ENTER to exit: ").rstrip().lstrip().lower()
     if ask == 'y':
        test()
     else:
        sys.exit()
Exemple #12
0
def main(args):
    dataset = facenet.get_dataset(args.input_dir)
    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    nrof_images_total = 0
    nrof_successfully_aligned = 0

    filtered_dct = {}

    if args.random_order:
        random.shuffle(dataset)
    for cls in dataset:
        filtered_dct[cls.name] = []
        for image_path in cls.image_paths:
            nrof_images_total += 1
            try:
                img = misc.imread(image_path)
            except (IOError, ValueError, IndexError) as e:
                errorMessage = '{}: {}'.format(image_path, e)
                print(errorMessage)
            else:
                if img.ndim < 2:
                    print('Unable to align "%s"' % image_path)
                    ## Count this as alignment error
                    continue
                if img.ndim == 2:
                    img = facenet.to_rgb(img)
                img = img[:, :, 0:3]

                bounding_boxes, _ = align.detect_face.detect_face(
                    img, minsize, pnet, rnet, onet, threshold, factor)
                nrof_faces = bounding_boxes.shape[0]
                ## Accept only 1 face!
                if nrof_faces == 1:
                    ## Alignment success, .. now to classify the images.
                    nrof_successfully_aligned += 1
                    filtered_dct[cls.name].append(image_path)
                else:
                    print('Unable to align "%s"' % image_path)
                    ## Count this as alignment error
                    continue

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)

    ## By this point our dataset is filtered to only contain images with detected faces.
    filtered_dataset = [
        facenet.ImageClass(class_name, image_paths)
        for class_name, image_paths in filtered_dct.items()
    ]

    accuracy_aligned = 1. * nrof_successfully_aligned / nrof_images_total
    accuracy_full = test_identity(args, dataset)
    accuracy_filtered = test_identity(args, filtered_dataset)
    print()
    ## Alignment accuracy
    print('Aligned accuracy: %.3f' % accuracy_aligned)
    ## Accuracy for full dataset
    print('Identity accuracy for full test set: %.3f' % accuracy_full)
    ## Accuract for filtered dataset
    print('Identity accuracy for successful aligned images: %.3f' %
          accuracy_filtered)