project_face_clustering.py

# -*- coding: utf-8 -*-
"""project_face_clustering.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1gGXJz8RKR2mFABT8suowUkuES90VpB-a
"""

pip install face_recognition

"""# The entire project has two modules
##1. Face Recogntion
##2. Face Clustering

#1. Face Extraction(Recognition) Module
"""

import numpy as np

# for extracting faces from image repository
import face_recognition

import argparse

pip install imutils

#importing necessary libraries

from imutils import paths
import face_recognition
import argparse
import pickle
import cv2
import os

# unzip the data set...
from zipfile import ZipFile
file_name = "/content/face-clustering.zip"
with ZipFile(file_name,'r') as z:
  z.extractall()
  print("Done")

# makig a path to the image source
print("quantifying faces")
imagePaths=list(paths.list_images("/content/face-clustering/dataset"))
data=[]

#extracting the encodings(128-d feature vectors)
for (i,imagePath) in enumerate(imagePaths):
  print("processing image{}/{}".format(i+1,len(imagePaths)))
  print(imagePath)
  image = cv2.imread(imagePath)
  rgb = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
  boxes = face_recognition.face_locations(rgb,model='cnn')
  encodings= face_recognition.face_encodings(rgb,boxes)
  #build a directory of image path,boundingbox,location
  #and facial encodings for the current image
  d=[{"imagePath":imagePath,"loc":box,"encoding":enc}
     for (box,enc) in zip(boxes,encodings)]
  data.extend(d)

# storing the encodings in pickle file
# which will bw later used for face clustering module
print("serialising encodings...")
f = open("face_encodings.pickle","wb")
pickle.dump(data,f)
f.close()

"""## 2 . Face Clustering Module"""

# import the necessary packages
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from imutils import build_montages
import numpy as np
import argparse
import pickle
import cv2

print("[INFO] loading encodings...")
data = pickle.loads(open("/content/face_encodings.pickle", "rb").read())
data = np.array(data)
encodings = [d["encoding"] for d in data]

import tensorflow as tf
# cluster the embeddings
#print('Enter number of clusters:')
#n_clusters=input('Enter number of clusters')
print("[INFO] clustering...")
#for comparision purpose...
clt = DBSCAN(metric="euclidean", n_jobs=100)
#clt =KMeans(n_clusters=5)
clt.fit(encodings)
labels=clt.labels_ 
# determine the total number of unique faces found in the dataset
labelIDs = np.unique(clt.labels_)
numUniqueFaces = len(np.where(labelIDs > -1)[0])
print("[INFO] # unique faces: {}".format(numUniqueFaces))

pip install pytest-shutil

pip install python-resize-image

from resizeimage import resizeimage
import shutil

#To extract full image
print(os.getcwd())
OutputFolderName = "Clustered Faces"
output_directory = os.getcwd()
OutputFolder = os.path.join(output_directory, OutputFolderName)
if not os.path.exists(OutputFolder):
  os.mkdir(OutputFolder)
else:
  shutil.rmtree(OutputFolder) 
  #time.sleep(0.5) 
  os.makedirs(OutputFolder) 

for labelID in labelIDs:
  print("[INFO] faces for face ID: {}".format(labelID))
  FaceFolder = os.path.join(OutputFolder, "Face_" + str(labelID))
  os.mkdir(FaceFolder)
  idxs = np.where(clt.labels_ == labelID)[0]
  portraits=[]
  # loop over the sampled indexes 
  counter = 1
  for i in idxs:
    # load the input image and extract the face  
    image = cv2.imread(data[i]["imagePath"]) 
    portrait = image
    portraits.append(portrait) 
    FaceFilename = "face_" + str(counter) + ".jpg"
    FaceImagePath = os.path.join(FaceFolder, FaceFilename)
    cv2.imwrite(FaceImagePath, portrait)
    counter += 1

# loop over the unique faces an to print montages of each cluster
# to show instant output
import matplotlib.pyplot as plt

for labelID in labelIDs:
  print("[INFO] faces for face ID: {}".format(labelID))
  idxs = np.where(clt.labels_ == labelID)[0]
  idxs = np.random.choice(idxs, size=min(25, len(idxs)),replace=False)
  # initialize the list of faces to include in the montage
  faces = []
  for i in idxs:
    image = cv2.imread(data[i]["imagePath"])
    (top,right,bottom,left) = data[i]["loc"]
    face = image[top:bottom,left:right]
    face = cv2.cvtColor(face,cv2.COLOR_RGB2BGR)
    face = cv2.resize(face,(96,96))
    faces.append(face)
    
  montage = build_montages(faces,(96,96),(5,5))[0]
  title = "Face ID #{}".format(labelID)
  title = "UnKnown Faces" if labelID == -1 else title
  plt.figure()
  plt.imshow(montage)
  cv2.waitKey(0)

print(os.getcwd())
#To extract every individual image
OutputFolderName = "Clustered Faces"
output_directory = os.getcwd()
OutputFolder = os.path.join(output_directory, OutputFolderName)
if not os.path.exists(OutputFolder):
  os.mkdir(OutputFolder)
else:
  shutil.rmtree(OutputFolder) 
  #time.sleep(0.5) 
  os.makedirs(OutputFolder) 

for labelID in labelIDs:
  print("[INFO] faces for face ID: {}".format(labelID))
  FaceFolder = os.path.join(OutputFolder, "Face_" + str(labelID))
  os.mkdir(FaceFolder)
  idxs = np.where(clt.labels_ == labelID)[0]
  portraits=[]
  # loop over the sampled indexes 
  counter = 1
  for i in idxs:
    # load the input image and extract the face ROI 
    image = cv2.imread(data[i]["imagePath"]) 
    (o_top, o_right, o_bottom, o_left) = data[i]["loc"]
    height, width, channel = image.shape
    widthMargin = 100
    heightMargin = 150
    top = o_top - heightMargin 
    if top < 0: top = 0
    bottom = o_bottom + heightMargin 
    if bottom > height: bottom = height 
    left = o_left - widthMargin 
    if left < 0: left = 0
    right = o_right + widthMargin 
    if right > width: right = width
    portrait = image[top:bottom, left:right]
    portraits.append(portrait) 
    #portrait = resizeimage.resize_width(portrait, 400)
    FaceFilename = "face_" + str(counter) + ".jpg"
    FaceImagePath = os.path.join(FaceFolder, FaceFilename)
    cv2.imwrite(FaceImagePath, portrait)
    counter += 1