Esempio n. 1
0
def dataset_creation():
   path = input("\nEnter the output folder location or simply press ENTER create a dataset folder in this directory only: ").rstrip()

   if os.path.isdir(path): 
     # User given path is present.
     path += '/output'
     if os.path.isdir(path):
       print("Directory already exists. Using it \n")
     else:
       if not os.makedirs(path):
         print("Directory successfully made in: " + path + "\n")

   # either user pressed ENTER or gave wrong location.
   else:
      if path == "":
        print("Making an output folder in this directory only. \n")
      else:
         print("No such directory exists. Making an output folder in this current code directory only. \n")

      path = 'output'
      if os.path.isdir(path):
        print("Directory already exists. Using it \n")
      else:
         if os.makedirs(path):
           print("error in making directory. \n")
           sys.exit()
         else:
            print("Directory successfully made: " + path + "\n")

   # Ask for webcam resolution
   res = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower()
   if res == "":
      res = (640, 480)
   else:
      res = tuple(map(int, res.split('x'))) 
   # Start MTCNN face detection and pose estimation module.
  
   # Take gpu fraction values
   gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip()
   if gpu_fraction == "":
     gpu_fraction = 0.8
   else:
     gpu_fraction = round(float(gpu_fraction), 1)

   # Some more MTCNN parameter
   minsize = 20 # minimum size of face
   threshold = [0.6, 0.7, 0.7]  # Three steps's threshold
   factor = 0.709 # scale factor
  
   with tf.Graph().as_default():
       gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
       sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
       with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

   # Create an object of face aligner module
   face_size = input("\nEnter desired face width and height in WidthxHeight format OR press ENTER for default 160x160 pixel: ").rstrip().lower()
   if face_size == "":
     face_size = (160, 160)
   else:
     face_size = tuple(map(int, face_size.split('x'))) 
   affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1])

   # Create dataset was choosen before and so working with taking dataset.
   while True:
    ask = input("\nEnter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ").rstrip()
    # removing all spaces with underscore
    ask = ask.replace(" ", "_")    

    if ask=="":
     folder_name = 'person' + str(personNo)
    else:
      folder_name = ask

    # Creating new user specific variables    
    personNo += 1
    users_folder = path + "/" + folder_name
    image_no = 1

    # Create folder with the given location and the given username.
    if os.path.isdir(users_folder):
         print("Directory already exists. Using it \n")
    else:
      if os.makedirs(users_folder):
        print("error in making directory. \n")
        sys.exit()
      else:
        print("Directory successfully made: " + users_folder + "\n")

    # Start webcam or videofile according to user.
    data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ").rstrip()

    # default webcam which uses infinite loop and video variable to find total frames
    loop_type = False
    total_frames = 0
    
    if data_type == "":
       data_type = 0
       loop_type = True

    # Initialize webcam or video
    device = cv2.VideoCapture(data_type)

    # If webcam set resolution
    if data_type == 0:
      device.set(3, res[0])
      device.set(4, res[1])
    else:
       # Finding total number of frames of video.
       total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
       # Shutting down webcam variable
       loop_type = False
      
    # Start web cam or start video and start creating dataset by user.
    while loop_type or (total_frames > 0):
         
         # If video selected dec counter
         if loop_type == False:
           total_frames -= 1

         ret, image = device.read()

         # Run MTCNN and do face detection until 's' keyword is pressed
         if (cv2.waitKey(1) & 0xFF) == ord("s"):

           # DETECT FACES. We get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)
           
           # See if face is detected
           if bb.shape[0] > 0:
             
             # align the detected faces
             for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:,col])
                
                # Save the image
                image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(4) + ".png"
                cv2.imwrite(image_name, aligned_image)
                image_no += 1

             # Draw the bounding boxes and pose landmarks on the image
             # Draw functions to show rectangles on the faces and circle on the the landmarks
             for i in range(bb.shape[0]):
                cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2)

             # loop over the (x, y)-coordinates for the facial landmarks
             # and draw each of them
             for col in range(points.shape[1]):
                for i in range(5):
                   cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

         # Show the output video to user
         cv2.imshow("Output", image)

         # Break this loop if 'q' keyword pressed to go to next user.
         if (cv2.waitKey(20) & 0xFF) == ord("q"):
           device.release()
           cv2.destroyAllWindows()
           break

    # Ask for more user using webcam or video else exit.
    ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ")
    ask = ask.rstrip().lstrip().lower()
    if ask != "":
      if ask[0] == 'q':
        break

   # This means dataset creating is complete. ASK the user for train now or exit.
   ask = input("Press ENTER to exit or \nPress T keyword to TRAIN and 'maybe' TEST later by creating a classifier on the facenet model OR \nPress W to test the dataset folder on a classifier model: ").rstrip().lstrip().lower()
   if ask == 't':
     train()

   elif ask == 'w':
      test()

   else:
     if ask == "":
       print("Cleaning and exiting. Thank You \n")
     else:
       print("\n wrong keyword pressed. Cleaning and exiting. \n Thank You \n")
def main(args):

    print('Creating networks and loading parameters')
    # Building seperate graphs for both the tf architectures
    #g1 = tf.Graph()
    g2 = tf.Graph()
    '''
    with g1.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with tf.Session() as sess:
        	# Load the model for FaceNet image recognition
            facenet.load_model(args.model)
    '''

    with g2.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33),
                         desiredFaceWidth=160,
                         desiredFaceHeight=160)

    # Taking the video and creating an object of it.
    print("[INFO] Taking the video input.")
    vs = cv2.VideoCapture(os.path.expanduser(args.video))

    # Finding the file format, size and the fps rate
    fps = vs.get(cv2.CAP_PROP_FPS)
    video_format = int(vs.get(cv2.CAP_PROP_FOURCC))
    frame_size = (int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    total_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
    output_video = cv2.VideoWriter("Output_" + args.video, video_format, fps,
                                   frame_size)

    # Create the output_faces directory by user or default arguments
    path = os.path.expanduser(args.output)
    path = path + "/output_faces"

    if not os.path.isdir(path):
        os.makedirs(path)

    image_numbers = 0

    print("Total number of frames \n" + str(total_frames) + "\n")
    #for i in range(total_frames):
    for i in range(total_frames):

        # Print the present frame / total frames to know how much we have completed
        print("\n" + str(i) + " / " + str(total_frames) + "\n")

        ret, image = vs.read()

        # Run MTCNN model to detect faces
        g2.as_default()
        with tf.Session(graph=g2) as sess:
            # we get the bounding boxes as well as the points for the face
            bb, points = align.detect_face.detect_face(image, minsize, pnet,
                                                       rnet, onet, threshold,
                                                       factor)

        # See if face is detected
        if bb.shape[0] > 0:

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            # create a numpy array to feed the network
            img_list = []
            images = np.empty([bb.shape[0], image.shape[0], image.shape[1]])

            for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:, col])

                if args.show_video == True:
                    cv2.imshow("aligned", aligned_image)

                # Prewhiten the image for facenet architecture to give better results
                #mean = np.mean(aligned_image)
                #std  = np.std(aligned_image)
                #std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size))
                #ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj)
                # Save the found out images
                place = path + "/" + "output_faces_" + str(
                    image_numbers) + ".png"
                print("saved to: " + place + "\n")
                cv2.imwrite(place, aligned_image)
                image_numbers += 1

            # if we want to show or save the video then draw the box and the points on the image
            if args.show_video == True or args.save_video == True:

                for i in range(bb.shape[0]):
                    cv2.rectangle(image, (int(bb[i][0]), int(bb[i][1])),
                                  (int(bb[i][2]), int(bb[i][3])), (0, 255, 0),
                                  2)

                # loop over the (x, y)-coordinates for the facial landmarks
                # and draw each of them
                for col in range(points.shape[1]):
                    for i in range(5):
                        cv2.circle(
                            image,
                            (int(points[i][col]), int(points[i + 5][col])), 1,
                            (255, 0, 0), -1)

        if args.save_video == True:
            output_video.write(image)

        if args.show_video == True:
            cv2.imshow("Output", image)

        # Save the final aligned face image in given format
        """   # Show the image
                #cv2.imshow(str(col), aligned_image)
                img_list.append(ready_image)
                images = np.stack(img_list)


          g1.as_default()
          with tf.Session(graph=g1) as sess:
          # Run forward pass on FaceNet to get the embeddings
              images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
              embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
              phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
              feed_dict = { images_placeholder: images, phase_train_placeholder:False }
              embedding = sess.run(embeddings, feed_dict=feed_dict)
          
              print("Here is the embedding \n")
              print(embedding.shape)
              print("\n")

        """

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            #if keyboard.is_pressed('q'):
            # do a bit of cleanup
            vs.release()
            output_video.release()
            cv2.destroyAllWindows()
            break
Esempio n. 3
0
def recognize():

   # Taking the parameters for recogniton by the user
   classifier_filename = input("\nEnter the path of the classifier .pkl file or press ENTER if a filename 'classifier.pkl' is present in this code directory itself: ")
   if classifier_filename == "":
      classifier_filename = 'classifier.pkl'
   classifier_filename = os.path.expanduser(classifier_filename)

   model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip()
   if model == "":
      model = "20180402-114759/20180402-114759.pb"

   # Create an object of face aligner module
   image_size = (160, 160)
   ask = input("\nEnter desired face width and height in WidthxHeight format for face aligner to take OR press ENTER for default 160x160 pixel: ").rstrip().lower()
   if ask != "":
      image_size = tuple(map(int, ask.split('x'))) 
   
   # Take gpu fraction values
   gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip()
   if gpu_fraction == "":
      gpu_fraction = 0.8
   else:
      gpu_fraction = round(float(gpu_fraction), 1)

   input_type = input("\nPress I for image input OR\nPress V for video input OR\nPress W for webcam input OR\nPress ENTER for default webcam: ").lstrip().rstrip().lower()
   if input_type == "":
      input_type = 'w'

   # Load the face aligner model
   affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=image_size[0], desiredFaceHeight=image_size[1])

   # Building seperate graphs for both the tf architectures
   g1 = tf.Graph()
   g2 = tf.Graph()

   # Load the model for FaceNet image recognition    
   with g1.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with tf.Session() as sess:
            facenet.load_model(model)

   # Load the model of MTCNN face detection.
   with g2.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

   # Some MTCNN network parameters
   minsize = 20 # minimum size of face
   threshold = [0.6, 0.7, 0.8]  # Three steps's threshold
   factor = 0.709 # scale factor
   ask = input("\nEnter the threshold FACE DETECTION CONFIDENCE SCORE to consider detection by MTCNN OR press ENTER for default 0.80: ")
   if ask != "" and float(ask) < 1:
      threshold[2] = round(float(ask),2)

   classifier_threshold = 0.50
   ask = input("\nEnter the threshold FACE RECOGNITION CONFIDENCE SCORE to consider face is recognised OR press ENTER for default 0.50: ")
   if ask != "":
      classifier_threshold = float(ask)

   # Loading the classifier model
   with open(classifier_filename, 'rb') as infile:
       (modelSVM, class_names) = pickle.load(infile)
   print('\nLoaded classifier model from file "%s"' % classifier_filename)

   # default webcam which uses infinite loop or set video or image setting
   loop_type = False
   image_input = 0
   total_frames = 0
   save_video = False
   frame_no = 1
   output_video = []
   image = []
   display_output = True
   res = (640, 480)

   # If web cam is selected
   if input_type == "w":
        data_type = 0
        loop_type = True
        # Ask for webcam resolution
        ask = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower()
        if ask != "":
           res = tuple(map(int, ask.split('x')))

   # If image selected, trying to represent it as video with 1 frame
   elif input_type == "i":
        loop_type = False
        total_frames = 0
        data_type = input("\nWrite the image path file to open: ").rstrip().lstrip()
        image = cv2.imread(data_type)
        # Jump directly intocode to go through a single pass
        goto(581)

   # Video is selected
   else:
        loop_type = False
        data_type = input("\nWrite the video path file to open: ").rstrip().lstrip()
        ask = input("\nPress y to save the output video OR simply press ENTER to ignore it: ").lstrip().rstrip().lower()
        if ask == "y":
           save_video = True
        ask = input("\nSimply press ENTER to see the output video frames OR press N to switch off the output display: ").lstrip().rstrip().lower()
        if ask == "n":
           display_output = False

   # Initialize webcam or video
   device = cv2.VideoCapture(data_type)

   # If webcam set resolution
   if input_type == "w":
      device.set(3, res[0])
      device.set(4, res[1])
    
   elif input_type == "v":
      # Finding total number of frames of video.
      total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
      # Shutting down webcam variable
      loop_type = False
      # save video feature.
      if save_video:
         # Finding the file format, size and the fps rate
         fps = device.get(cv2.CAP_PROP_FPS)
         video_format = int(device.get(cv2.CAP_PROP_FOURCC))
         frame_size = (int(device.get(cv2.CAP_PROP_FRAME_WIDTH)), int(device.get(cv2.CAP_PROP_FRAME_HEIGHT)))
         # Creating video writer to save the video after process if needed
         output_video = cv2.VideoWriter("Output_" + data_type, video_format, fps, frame_size)
         
      
   # Start web cam or start video and start creating dataset by user.
   while loop_type or (frame_no <= total_frames):
         
       # If video selected dec counter
       if loop_type == False:
          frame_no += 1
          # Display the progress
          print("\nProgress: %.2f" %(100*frame_no/total_frames) + "%") 

       ret, image = device.read()

       # Run MTCNN model to detect faces
       g2.as_default()
       with tf.Session(graph=g2) as sess:
           # we get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)

       # See if face is detected
       if bb.shape[0] > 0:

         # ALIGNMENT - use the bounding boxes and facial landmarks points to align images
          
         # create a numpy array to feed the network
         img_list = []
         images = np.empty([bb.shape[0], image.shape[0], image.shape[1]])

         for col in range(points.shape[1]):
             aligned_image = affine.align(image, points[:,col])

             # Prewhiten the image for facenet architecture to give better results
             mean = np.mean(aligned_image)
             std  = np.std(aligned_image)
             std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size))
             ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj)
             img_list.append(ready_image)
             images = np.stack(img_list)

         # EMBEDDINGS: Use the processed aligned images for Facenet embeddings
         
         g1.as_default()
         with tf.Session(graph=g1) as sess:
          # Run forward pass on FaceNet to get the embeddings
              images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
              embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
              phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
              feed_dict = { images_placeholder: images, phase_train_placeholder:False }
              embedding = sess.run(embeddings, feed_dict=feed_dict)
          
         
         # PREDICTION: use the classifier to predict the most likely class (person).
         predictions = modelSVM.predict_proba(embedding)
         best_class_indices = np.argmax(predictions, axis=1)
         best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]


         # DRAW: draw bounding boxes, landmarks and predicted names

         if save_video or display_output:
           for i in range(bb.shape[0]):
              cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (255,0, 0), 1)

              # Put name and probability of detection only if given threshold is crossed
              if best_class_probabilities[i] > classifier_threshold:
                 cv2.putText(image, class_names[best_class_indices[i]], (int(bb[i][0]),int(bb[i][1])-7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA)
                 cv2.putText(image, str(round(best_class_probabilities[i]*100, 2) ) + "%", (int(bb[i][0]), int(bb[i][3])+7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA)

           # loop over the (x, y)-coordinates for the facial landmarks
           for col in range(points.shape[1]):
              for i in range(5):
                 cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

       if display_output:
          cv2.imshow("Output", image)
       if save_video:
          output_video.write(image)

       key = cv2.waitKey(1) & 0xFF
       # if the `q` key was pressed, break from the loop
       if key == ord("q"):
          # do a bit of cleanup
          device.release()
          if save_video:
             output_video.release()
          cv2.destroyAllWindows()
          break
Esempio n. 4
0
def main(args):

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.39, 0.39),
                         desiredFaceWidth=256,
                         desiredFaceHeight=256)

    print("[INFO] camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    vs.set(3, 1280)
    vs.set(4, 720)
    time.sleep(2.0)

    while True:
        ret, img = vs.read()

        # we get the bounding boxes as well as the points for the face
        bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet,
                                                   onet, threshold, factor)
        #print("here they are \n")
        #print(points)

        # See if face is detected
        if bb.shape[0] > 0:

            # Draw rectangles on the faces and circle on the the landmarks
            for i in range(bb.shape[0]):
                cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])),
                              (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2)

            # loop over the (x, y)-coordinates for the facial landmarks
            # and draw each of them
            for col in range(points.shape[1]):
                for i in range(5):
                    cv2.circle(img,
                               (int(points[i][col]), int(points[i + 5][col])),
                               1, (255, 0, 0), -1)

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            aligned_image = affine.align(img, points)

            # Show the image only if alignment is there
            cv2.imshow("Alignment", aligned_image)

        cv2.imshow("Output", img)

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
    print(img)
    # load the input image, resize it, and convert it to grayscale
    image = cv2.imread(img)
    image = imutils.resize(image, width=800)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    rects = detector(gray, 2)

    # loop over the face detections
    for rect in rects:
        count += 1
        # extract the ROI of the *original* face, then align the face
        # using facial landmarks
        (x, y, w, h) = rect_to_bb(rect)
        faceOrig = imutils.resize(image[y:y + h, x:x + w], width=256)
        faceAligned = fa.align(image, gray, rect)
        faceAligned = cv2.resize(faceAligned, (160, 160))
        if (args["class"] == "base"):
            if (count < 10):
                cv2.imwrite(
                    "aligned_faces/base_image/user_{}_0{}.jpg".format(
                        uid, count), faceAligned)
            else:
                cv2.imwrite(
                    "aligned_faces/base_image/user_{}_{}.jpg".format(
                        uid, count), faceAligned)
        else:
            if (count < 10):
                cv2.imwrite(
                    "aligned_faces/verify/user_{}_0{}.jpg".format(uid, count),
                    faceAligned)
Esempio n. 6
0
def PatchExtraction(video_path, landmarks_path, output_dir, patch_size=32):
    print("Input: ", video_path)
    print("Output:", output_dir)
    frames = []
    frame_number = []
    if os.path.exists(landmarks_path) == False:
        return
    df = pd.read_csv(landmarks_path)
    cap = cv2.VideoCapture(video_path)
    count = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        # if count % 6 == 0 and df[' success'][count] == 1:
        # if df[' success'][count] == 1:
        if count % 6 == 0 and len(df[' success']) > count:
            if df[' success'][count] == 1:
                frame = frame[:,:,::-1]
                frames.append(frame)
                frame_number.append(count)
        count += 1
    cap.release()

    folders = ["aligned_face", "left_eye", "right_eye", "mouth", "nose"]
    for folder in folders:
        directory = os.path.join(output_dir, folder)
        if not os.path.exists(directory):
            os.makedirs(directory)

    for idx, frame in enumerate(frames):
        x = np.array(df.iloc[frame_number[idx],299:299+68]).reshape(68,-1)
        y = np.array(df.iloc[frame_number[idx],299+68:299+68*2]).reshape(68,-1)
        z = np.ones(68).reshape(68,-1)
        landmarks = np.concatenate((x,y), axis=1)
        aligner = FaceAligner(desiredLeftEye=(0.35, 0.35), desiredFaceWidth=128, desiredFaceHeight=int(128*2))
        aligned_face, M = aligner.align(frame, landmarks)

        landmarks_z = np.concatenate((landmarks, z), axis=1)
        affined_landmarks = np.matmul(landmarks_z, M.transpose())

        regions = ["left_eye", "right_eye", "mouth", "nose"]
        regions_image = []
        for region in regions:
            start, end = FACIAL_LANDMARKS_68_IDXS[region]
            Pts = affined_landmarks[start:end]
            Center = Pts.mean(axis=0)
            try:
                img = extract_patch(aligned_face, Center, patch_size)
            except:
                break
            if img.shape != (32, 32, 3):
                break
            regions_image.append(img)
        
        if len(regions_image) == len(regions):
            for i, region in enumerate(regions):
                filename = os.path.join(output_dir, region, str(frame_number[idx]).zfill(4) + '.bmp')
                img = regions_image[i]
                save(img, filename)
            filename = os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4)  + '.bmp')
            np.save(os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4)  + '.npy'), affined_landmarks)
            save(aligned_face, filename)
Esempio n. 7
0
    k = cv2.waitKey(1)

    if k % 256 == 27:
        # ESC pressed
        print("Quit")
        break
    elif k % 256 == 32:
        # SPACE pressed
        lm = "error"
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        face_rect = []
        detections = get_face_recs(frame)
        for detection in detections:
            clahe_image = clahe.apply(gray)
            aligned = fa.align(clahe_image, detection)
            lm = get_landmarks(aligned)
            face_rect = detection
            break

        if lm is not "error":
            sample = np.array([lm])
            sample.reshape(1,  -1)
            emotion = SVM.predict(sample)
            print("Emotion detected: {}".format(emotion.capitalize()))
            cv2.putText(frame, emotion.capitalize(),
                        (50, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 2)
            cv2.imshow("Frame", frame)


# When everything done, release the capture
Esempio n. 8
0
def main(args):

    print('Creating networks and loading parameters')

    # Building seperate graphs for both the networks
    g1 = tf.Graph()
    g2 = tf.Graph()
    #images_placeholder = tf.placeholder(tf.int32)
    #embeddings = tf.Variable()
    #phase_train_placeholder = tf.placeholder(tf.bool)

    with g1.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with tf.Session() as sess:
            facenet.load_model(args.model)
    #with tf.Graph().as_default():
    #with tf.Session() as sess:

    with g2.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33),
                         desiredFaceWidth=160,
                         desiredFaceHeight=160)

    # Load the model for FaceNet image recognition and get the tensors

    print("[INFO] camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    vs.set(3, 640)
    vs.set(4, 480)
    time.sleep(2.0)

    while True:
        ret, img = vs.read()

        # we get the bounding boxes as well as the points for the face
        g2.as_default()
        with tf.Session(graph=g2) as sess:
            bb, points = align.detect_face.detect_face(img, minsize, pnet,
                                                       rnet, onet, threshold,
                                                       factor)
        #print("here they are \n")
        #print(points)

        # See if face is detected
        if bb.shape[0] > 0:

            # Draw rectangles on the faces and circle on the the landmarks
            for i in range(bb.shape[0]):
                cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])),
                              (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2)

            # loop over the (x, y)-coordinates for the facial landmarks
            # and draw each of them
            for col in range(points.shape[1]):
                for i in range(5):
                    cv2.circle(img,
                               (int(points[i][col]), int(points[i + 5][col])),
                               1, (255, 0, 0), -1)

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            aligned_image = affine.align(img, points)

            # Show the image only if alignment is there
            cv2.imshow("Alignment", aligned_image)

            # Prewhiten the image for facenet architecture to give better results
            mean = np.mean(aligned_image)
            std = np.std(aligned_image)
            std_adj = np.maximum(std, 1.0 / np.sqrt(aligned_image.size))
            facenet_image = np.multiply(np.subtract(aligned_image, mean),
                                        1 / std_adj)
            img_list = []
            img_list.append(facenet_image)
            img_list.append(facenet_image)
            images = np.stack(img_list)

            g1.as_default()
            with tf.Session(graph=g1) as sess:
                # Run forward pass on FaceNet to get the embeddings
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                embedding = sess.run(embeddings, feed_dict=feed_dict)

                print("Here is the embedding \n")
                print(embedding)
                print("\n")

        cv2.imshow("Output", img)

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
Esempio n. 9
0
def main():

  print("\n*********************************************************************************************** \n")
  print("              Welcome to the Face detection and recognition program. \n")
  print("\n*********************************************************************************************** \n")
  print("GUIDELINES TO USE THIS SOFTWARE: \n\nThis code gives the user to:\n\n1) CREATE DATASET using MTCNN face detection and alignment. or\n2) TRAIN FaceNet for face recognition. or \n3) Do both.\n\n The user will multiple times get option to choose webcam (default option) or video file to do face detection and will be asked for output folder, username on folder and image files etc also (default options exists for that too)\n\n **************   IMPORTANT   *************\n1) Whenever webcam or video starts press 's' keyword to start face detection in video or webcam frames and save the faces in the folder for a single user. This dataset creation will stop the moment you release the 's' key. This can be done multiple times.\n\n2) Press 'q' to close it when you are done with one person, and want to detect face for another person. \n\n3) Make sure you press the keywords on the image window and not the terminal window.\n")
  mode = input("Press T to train the facenet for recognition OR \nPress D to first create dataset and then 'maybe' train later: ")

  # Some variables that will be used through out the code
  path = ""
  res = ()
  personNo = 1
  folder_name = ""


  # This means user went for Creating of dataset
  if mode == 'D':
    path = input("Enter the output folder location or simply press ENTER create a dataset folder in this directory only: ")

    if os.path.isdir(path):
     
     # User given path is present.
      path += '/output'
      if os.path.isdir(path):
        print("Directory already exists. Using it \n")
      else:
        if not os.makedirs(path):
          print("Directory successfully made in: " + path + "\n")

     # either user pressed ENTER or gave wrong location.
    else:
       if path == "":
         print("Making an output folder in this directory only. \n")

       else:
           print("No such directory exists. Making an output folder in this current code directory only. \n")

       path = 'output'
       if os.path.isdir(path):
       	 print("Directory already exists. Using it \n")
       else:
          if os.makedirs(path):
       	    print("error in making directory. \n")
            sys.exit()
          else:
             print("Directory successfully made: " + path + "\n")

    # Ask for webcam resolution
    res = tuple(map(int, input("Enter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").split("x")))
    if res == "":
      res = (640, 480)

    # Start MTCNN face detection and pose estimation module.
    
    # Take gpu fraction values
    gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ")
    if gpu_fraction == "":
      gpu_fraction = 0.8
    else:
    	gpu_fraction = round(float(gpu_fraction), 1)

    # Some more MTCNN parameter
    minsize = 20 # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # Three steps's threshold
    factor = 0.709 # scale factor
    
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    # Create an object of face aligner module
    face_size = tuple(map(int, input("Enter desired face width and height in widthxheight format OR press ENTER for default 160x160 pixel: ").split("x")))
    if face_size == "":
      face_size = (160, 160)
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1])



  # This means user went for the train part
  elif mode == 'T':
     train()

  else:
  	print("No correct keyword entered. Exiting")
  	sys.exit()

 # Create dataset was choosen before and so working with taking dataset.
  while True:

    ask = input("\n Enter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ")
    # removing all spaces with underscore
    ask = ask.replace(" ", "_")    

    if ask=="":
   	 folder_name = 'person_' + str(personNo)
    else:
   	  folder_name = ask

    # Creating new user specific variables   	
    personNo += 1
    users_folder = path + "/" + folder_name
    image_no = 0

    # Create folder with the given location and the given username.
    if os.path.isdir(users_folder):
       	 print("Directory already exists. Using it \n")
    else:
      if os.makedirs(path):
       	print("error in making directory. \n")
        sys.exit()
      else:
        print("Directory successfully made: " + users_folder + "\n")

    # Start webcam or videofile according to user.
    data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ")

    # default webcam which uses infinite loop and video variable to find total frames
    loop_type = False
    total_frames = 0
    
    if data_type == "":
       data_type = 0
       loop_type = True

    # Initialize webcam or video
    device = cv2.VideoCapture(data_type)

    # If webcam set resolution
    if data_type == 0:
      device.set(3, res[0])
      device.set(4, res[1])
    else:
       # Finding total number of frames of video.
       total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
      
      # Start web cam and creating dataset by user.
      while loop_type or (total_frames > 0):
      	 total_frames -= 1

         ret, image = device.read()

      	 # Run MTCNN and do face detection until 's' keyword is pressed
      	 if (cv2.waitKey(1) && 0xFF) == ord("s"):

           # DETECT FACES. We get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)
           
           # See if face is detected
           if bb.shape[0] > 0:
             
             # align the detected faces
             for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:,col])
                
                # Save the image
                image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(3) + ".png"
                cv2.imwrite(image_name, aligned_image)
                image_no += 1

             # Draw the bounding boxes and pose landmarks on the image
             # Draw functions to show rectangles on the faces and circle on the the landmarks
             for i in range(bb.shape[0]):
                cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2)

             # loop over the (x, y)-coordinates for the facial landmarks
             # and draw each of them
             for col in range(points.shape[1]):
                for i in range(5):
                   cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

         # Show the output video to user
         cv2.imshow("Output", image)

         # Break this loop if 'q' keyword pressed to go to next user.
         if (cv2.waitKey(1) && 0xFF) == ord("q"):
           device.release()
           cv2.destroyAllWindows()
           break

    # Ask for more user using webcam or video else exit.
    ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ")
    if ask == 'q':
      break

  # This means dataset creating is complete. ASK the user for train now or exit.
  ask = input("Press ENTER to exit or press T keyword to train the data by Facenet model on dataset: ")
  if ask = "T":
  	train()
Esempio n. 10
0
class OpenvinoFaceVectorizer:
    def __init__(self,
                 cpu_lib="/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so",
                 landmarks_xml="openvino_detectors/landmarks-regression/FP32/model.xml",
                 features_xml="openvino_detectors/face-reidentification/FP32/model.xml"):

        # Plugin initialization for specified device and load extensions library if specified
        plugin = IEPlugin(device="CPU")
        plugin.add_cpu_extension(cpu_lib)

        # Read landmarks IR
        landmarks_bin = os.path.splitext(landmarks_xml)[0] + ".bin"
        log.info("Loading landmarks network files:\n\t{}\n\t{}".format(landmarks_xml, landmarks_bin))
        landmarks_net = IENetwork.from_ir(model=landmarks_xml, weights=landmarks_bin)

        # Read features IR
        features_bin = os.path.splitext(features_xml)[0] + ".bin"
        log.info("Loading features network files:\n\t{}\n\t{}".format(features_xml, features_bin))
        features_net = IENetwork.from_ir(model=features_xml, weights=features_bin)
        self.l_in = next(iter(landmarks_net.inputs))
        self.l_out = next(iter(landmarks_net.outputs))
        landmarks_net.batch_size = 1

        self.f_in = next(iter(features_net.inputs))
        self.f_out = next(iter(features_net.outputs))
        features_net.batch_size = 1
        cur = landmarks_net.inputs[self.l_in]
        self.l_n = cur.layout
        self.l_c, self.l_h, self.l_w = cur.shape[1:]
        # self.l_n = NCHW it is 1
        self.l_images = np.ndarray(shape=(1, self.l_c, self.l_h, self.l_w))

        cur = features_net.inputs[self.f_in]
        self.f_n = cur.layout
        self.f_c, self.f_h, self.f_w = cur.shape[1:]

        self.f_images = np.ndarray(shape=(1, self.f_c, self.f_h, self.f_w))

        # Loading models to the plugin
        log.info("Loading models to the plugin")
        self.l_exec_net = plugin.load(network=landmarks_net)
        self.f_exec_net = plugin.load(network=features_net)

        self.face_aligner = FaceAligner(face_width=self.f_w, face_height=self.f_h)
        self.vectors = {}

    def face_to_vector(self, face):
        height, width = face.shape[:-1]
        landmark_face = cv2.resize(face, (self.l_w, self.l_h))
        self.l_images[0] = landmark_face.transpose((2, 0, 1))
        l_res = np.squeeze(self.l_exec_net.infer(inputs={self.l_in: self.l_images})[self.l_out])
        for i in range(10):
            if i % 2 == 0:
                l_res[i] = width * l_res[i]
            else:
                l_res[i] = height * l_res[i]
        aligned_face = self.face_aligner.align(face, l_res)
        self.f_images[0] = aligned_face.transpose((2, 0, 1))
        # self.f_images[0] = cv2.resize(face, (self.f_w, self.f_h)).transpose((2, 0, 1))
        f_res = np.squeeze(self.f_exec_net.infer(inputs={self.f_in: self.f_images})[self.f_out])
        # print(f_res)
        # cv2.imshow('frame', face)
        # cv2.waitKey(1000)
        return np.array(f_res)

    def searcher(self, face_img, top=3):

        face_vector = self.face_to_vector(face_img)
        nearest = PriorityQueue()

        for id_people, faces in self.vectors.items():
            for face in faces:
                similarity = self.face_similarity(face, face_vector)
                nearest.put((similarity, id_people))
                if nearest.qsize() > top:
                    nearest.get()
                # if similarity > max_similarity:
                #     max_similarity = similarity
                #     max_id = id_people
        res = sorted(nearest.queue, key = lambda x:x[0], reverse=True)
        return res

    def add_face(self, face, face_name):
        self.vectors[face_name] = [self.face_to_vector(face)]

    def face_similarity(self, v1, v2):
        return 1.0 - spatial.distance.cosine(v1, v2)