Python FaceAligner.FaceAlignerの例、face_aligner.FaceAligner.FaceAligner Pythonの例

コード例 #1

0

ファイルを表示

def handle_image_array_faces(image,
                             base_image_name="",
                             output_directory="manual_filter"):
    face_locations = face_recognition.face_locations(
        image, number_of_times_to_upsample=0, model="cnn")
    face_landmarks_list = face_recognition.face_landmarks(image)
    if len(face_locations) != len(face_landmarks_list):
        print("landmarks and face_locations do not match! Found faces: {}".
              format(len(face_locations)))
        save_faces(Image.fromarray(image),
                   face_locations,
                   output_path=output_directory)
        return
    aligner = FaceAligner(output_directory=output_directory)
    for i in range(len(face_landmarks_list)):
        face_location = face_locations[i]
        face_landmarks = face_landmarks_list[i]
        if base_image_name:
            aligner.save_rotated_face(face_location,
                                      face_landmarks,
                                      image,
                                      file_name="{}_{}.jpg".format(
                                          base_image_name, i))
        else:
            aligner.save_rotated_face(face_location,
                                      face_landmarks,
                                      image,
                                      file_name="{}_{}.jpg".format(
                                          get_new_file_name(), i))

コード例 #2

0

ファイルを表示

    def __init__(self,
                 cpu_lib="/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so",
                 landmarks_xml="openvino_detectors/landmarks-regression/FP32/model.xml",
                 features_xml="openvino_detectors/face-reidentification/FP32/model.xml"):

        # Plugin initialization for specified device and load extensions library if specified
        plugin = IEPlugin(device="CPU")
        plugin.add_cpu_extension(cpu_lib)

        # Read landmarks IR
        landmarks_bin = os.path.splitext(landmarks_xml)[0] + ".bin"
        log.info("Loading landmarks network files:\n\t{}\n\t{}".format(landmarks_xml, landmarks_bin))
        landmarks_net = IENetwork.from_ir(model=landmarks_xml, weights=landmarks_bin)

        # Read features IR
        features_bin = os.path.splitext(features_xml)[0] + ".bin"
        log.info("Loading features network files:\n\t{}\n\t{}".format(features_xml, features_bin))
        features_net = IENetwork.from_ir(model=features_xml, weights=features_bin)
        self.l_in = next(iter(landmarks_net.inputs))
        self.l_out = next(iter(landmarks_net.outputs))
        landmarks_net.batch_size = 1

        self.f_in = next(iter(features_net.inputs))
        self.f_out = next(iter(features_net.outputs))
        features_net.batch_size = 1
        cur = landmarks_net.inputs[self.l_in]
        self.l_n = cur.layout
        self.l_c, self.l_h, self.l_w = cur.shape[1:]
        # self.l_n = NCHW it is 1
        self.l_images = np.ndarray(shape=(1, self.l_c, self.l_h, self.l_w))

        cur = features_net.inputs[self.f_in]
        self.f_n = cur.layout
        self.f_c, self.f_h, self.f_w = cur.shape[1:]

        self.f_images = np.ndarray(shape=(1, self.f_c, self.f_h, self.f_w))

        # Loading models to the plugin
        log.info("Loading models to the plugin")
        self.l_exec_net = plugin.load(network=landmarks_net)
        self.f_exec_net = plugin.load(network=features_net)

        self.face_aligner = FaceAligner(face_width=self.f_w, face_height=self.f_h)
        self.vectors = {}

コード例 #3

0

ファイルを表示

def main():
    args = parse_args()
    det_json = args.det_json
    save_dir = args.save_dir
    model_dir = args.mtcnn_model_dir
    gpu_id = args.gpu_id

    aligner = FaceAligner(model_dir, gpu_id=gpu_id)

    index = 0
    with open(det_json, "r") as f:
        for line in f:
            index += 1
            print("Processing img %d" % index)
            line = json.loads(line.strip())

            # 一个url对应一个pts
            url = str(line["url"])
            if not line['det']:
                continue
            pts = line['det'][0]['boundingBox']['pts']

            # 图片以人名为前缀, 若无, 则为neg
            name = str(url.split('/')[-2])
            img_name = url.split('/')[-1]

            sub_save_dir = os.path.join(save_dir, name)
            if not os.path.exists(sub_save_dir):
                os.makedirs(sub_save_dir)

            img = _pull_img(url)
            if img is None:
                continue

            # 只crop一张脸
            face_chip = aligner.get_face_chips(img, [pts],
                                               output_square=default_square)

            save_name = os.path.join(sub_save_dir, img_name)
            cv2.imwrite(save_name, face_chip[0])

コード例 #4

0

ファイルを表示

ファイル: face-idcard-align-crop-zyf.py プロジェクト: walkoncross/mtcnn-caffe-zyf

def main(img_list_file, root_dir, mtcnn_model_dir, save_dir=None):
    if not save_dir:
        save_dir = './aligned_images'

    if not osp.exists(save_dir):
        print('mkdir for aligned faces, aligned root dir: ', save_dir)
        os.makedirs(save_dir)

    aligned_save_dir = osp.join(save_dir, 'aligned_faces')
    if not osp.exists(aligned_save_dir):
        print('mkdir for aligned faces, aligned images dir: ',
              aligned_save_dir)
        os.makedirs(aligned_save_dir)

    #aligner = MtcnnAligner(mtcnn_model_dir, False)
    aligner = FaceAligner(mtcnn_model_dir)

    fp = open(img_list_file, 'r')

    fn_rlt = osp.join(save_dir, 'fd_rlt.json')
    fp_rlt = open(fn_rlt, 'w')
    fp_rlt.write('[\n')

    count = 0
    for line in fp:
        print line
        line_split = line.split()

        img_fn = line_split[0]
        id_num = line_split[1]

        img_fn_split = img_fn.split('/')

        img_fn = osp.join(root_dir, img_fn)

        print 'process image: ', img_fn, " id_num: ", id_num
        #for root,dirs,files in path_walk:
        err_msg = ''

        if not count:
            fp_rlt.write(',\n')

        count = count + 1
        print 'count: ', count

        overlap_thresh_0 = overlap_thresh

        save_subdir = osp.join(aligned_save_dir, img_fn_split[-2])
        save_img_fn = osp.join(save_subdir, img_fn_split[-1])

        if not osp.exists(save_subdir):
            os.makedirs(save_subdir)

        image = cv2.imread(img_fn)

        print image.shape
        boxes, points = aligner.align_face(image, [GT_RECT])

        box = boxes[0]
        pts = points[0]

        facial5points = np.reshape(points, (2, -1))
        #dst_img = warp_and_crop_face(image, facial5points, reference_5pts, output_size)
        dst_img = aligner.get_face_chips(image, [box], [pts])[0]
        cv2.imwrite(save_img_fn, dst_img)

        item = {}

        tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts, 'id': id_num}
        item['faces'] = tmp
        #item['id'] = data[u'url'].line_splitit('/')[-3]
        item['shape'] = image.shape
        json_str = json.dumps(item, indent=2)

        fp_rlt.write(json_str + '\n')
        fp_rlt.flush()

    fp_rlt.write(']\n')
    fp_rlt.close()
    fp.close()

コード例 #5

0

ファイルを表示

ファイル: face_align.py プロジェクト: RajHarry/Face-Verification-based-on-Similarity-Check

# import the necessary packages
from face_aligner import FaceAligner
from helpers import rect_to_bb
import argparse
import glob
import imutils
import dlib
import cv2

count = 0
uid = 69
# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor and the face aligner
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
fa = FaceAligner(predictor, desiredFaceWidth=256)

ap = argparse.ArgumentParser()
ap.add_argument("-c",
                "--class",
                type=str,
                default="all",
                help="test-images or base-image")
args = vars(ap.parse_args())

if (args["class"] == "base"):
    images = glob.glob("test_images/*")
else:
    images = glob.glob("input_dir/*")
for img in images:
    print(img)

コード例 #6

0

ファイルを表示

# Path Arguments
parser.add_argument(
    '--predictor_path',
    type=str,
    required=True,
    help=
    'location of the dlib facial landmark predictor where shape_predictor_68_face_landmarks.dat is located'
)
parser.add_argument('--gallery_path',
                    type=str,
                    required=True,
                    help='location of the gallery')
parser.add_argument('--port', type=int, default=8000, help='which port to use')
args = parser.parse_args()

face_aligner = FaceAligner(args.predictor_path)
face_recognizer = FaceRecognizer(args.gallery_path, OpenCVAlgorithm,
                                 face_aligner)
register_handler = RegisterHandler(args.gallery_path, face_aligner)
recognize_handler = RecognizeHandler(args.gallery_path, face_aligner,
                                     face_recognizer)


class S(BaseHTTPRequestHandler):
    def _set_response(self, message=None):
        self.send_response(200)
        if message is not None:
            #self.send_header('Content-type', 'text/html')
            self.send_header('Content-type', 'application/json')

        self.end_headers()

コード例 #7

0

ファイルを表示

def main(args):

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.39, 0.39),
                         desiredFaceWidth=256,
                         desiredFaceHeight=256)

    print("[INFO] camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    vs.set(3, 1280)
    vs.set(4, 720)
    time.sleep(2.0)

    while True:
        ret, img = vs.read()

        # we get the bounding boxes as well as the points for the face
        bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet,
                                                   onet, threshold, factor)
        #print("here they are \n")
        #print(points)

        # See if face is detected
        if bb.shape[0] > 0:

            # Draw rectangles on the faces and circle on the the landmarks
            for i in range(bb.shape[0]):
                cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])),
                              (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2)

            # loop over the (x, y)-coordinates for the facial landmarks
            # and draw each of them
            for col in range(points.shape[1]):
                for i in range(5):
                    cv2.circle(img,
                               (int(points[i][col]), int(points[i + 5][col])),
                               1, (255, 0, 0), -1)

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            aligned_image = affine.align(img, points)

            # Show the image only if alignment is there
            cv2.imshow("Alignment", aligned_image)

        cv2.imshow("Output", img)

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

コード例 #8

0

ファイルを表示

def create_net(configs):
    use_gpu = False
    roi_scale = 1.0

    CTX.logger.info("===> Input app configs: %s\n", str(configs))
    if not configs:
        configs = {}

    CTX.logger.info(
        "===> Try to load default app configs from: %s and use them to update configs\n",
        DEFAULT_APP_CONFIG_FNAME)
    try:
        fp = open(DEFAULT_APP_CONFIG_FNAME, 'r')
        _configs = json.load(fp)
        fp.close()
        CTX.logger.info("===> Loaded default app configs: %s\n", str(_configs))

        _configs.update(configs)
        configs = _configs

        CTX.logger.info("===> Updated app configs: %s\n", str(configs))

        mtcnn_model_path = ''
        feature_model_path = ''

        if ("model_files" in configs):
            # print 'configs["model_files"]: ', configs["model_files"]
            for k, v in configs["model_files"].iteritems():
                if not mtcnn_model_path and k.startswith("mtcnn"):
                    if osp.isfile(v):
                        mtcnn_model_path = osp.dirname(v)
                    elif osp.isdir(v):
                        mtcnn_model_path = v
                if not feature_model_path and k.startswith("feature"):
                    if osp.isfile(v):
                        feature_model_path = osp.dirname(v)
                    elif osp.isdir(v):
                        feature_model_path = v

        if not mtcnn_model_path:
            raise Exception("Error: empty mtcnn_model_path\n")
        if not feature_model_path:
            raise Exception("Error: empty feature_model_path\n")

        configs["model_params"]["mtcnn_model_path"] = mtcnn_model_path
        configs["model_params"]["feature_model_path"] = feature_model_path
        configs["model_params"]["network_model"] = osp.join(
            feature_model_path, 'model,0')

        use_gpu = configs["use_device"].upper() == 'GPU'
        CTX.logger.info("===> use_gpu: %s", str(use_gpu))
        if 'gpu_id' not in configs["model_params"]:
            configs["model_params"]["gpu_id"] = 0

        if use_gpu:
            CTX.logger.info("===> gpu_id: %s",
                            str(configs["model_params"]["gpu_id"]))

        if 'roi_scale' in configs["model_params"]:
            roi_scale = configs["model_params"]['roi_scale']

    except Exception as e:
        CTX.logger.error("Error when load and update app configs: %s\n",
                         traceback.format_exc())
        return {}, 521, str(e)

    CTX.logger.info("===> Updated app configs: %s\n", str(configs))

    CTX.logger.info(
        "===> Try to load default extractor_config from: %s and update it by configs['model_params']\n",
        DEFAULT_EXTRACTOR_CONFIG_FNAME)

    try:
        fp = open(DEFAULT_EXTRACTOR_CONFIG_FNAME, 'r')
        extractor_config = json.load(fp)
        fp.close()

        CTX.logger.info("===> Loaded feature extractor configs: %s\n",
                        str(extractor_config))

        if 'model_params' in configs:
            extractor_config.update(configs["model_params"])

        # if 'feature_model' in configs["model_params"]:
        #     extractor_config["network_model"] = configs["model_params"]["feature_model"]

        if 'batch_size' in configs:
            extractor_config["batch_size"] = configs["batch_size"]

        if use_gpu:
            extractor_config["cpu_only"] = False
        else:
            extractor_config["cpu_only"] = True
    except Exception as e:
        CTX.logger.error("Error when load and update extractor configs: %s\n",
                         traceback.format_exc())
        return {}, 522, str(e)

    CTX.logger.info("===> Updated feature extractor configs: %s",
                    str(extractor_config))

    try:
        feature_extractor = MxnetFeatureExtractor(extractor_config)
    except Exception as e:
        CTX.logger.error("Error when init face feature extractor: %s\n",
                         traceback.format_exc())

        return {}, 523, str(e)

    try:
        face_aligner = FaceAligner(
            str(configs["model_params"]["mtcnn_model_path"]),
            configs["model_params"]["gpu_id"] if use_gpu else -1)
    except Exception as e:
        CTX.logger.error("Error when init face feature extractor: %s\n",
                         traceback.format_exc())

        return {}, 524, str(e)

    model = {
        "feature_extractor": feature_extractor,
        "face_aligner": face_aligner,
        "batch_size": configs["batch_size"],
        "input_height": extractor_config["input_height"],
        "input_width": extractor_config["input_width"],
        "workspace": configs["workspace"],
        "roi_scale": roi_scale
    }

    return model, 0, 'Success'

コード例 #9

0

ファイルを表示

def dataset_creation():
   path = input("\nEnter the output folder location or simply press ENTER create a dataset folder in this directory only: ").rstrip()

   if os.path.isdir(path): 
     # User given path is present.
     path += '/output'
     if os.path.isdir(path):
       print("Directory already exists. Using it \n")
     else:
       if not os.makedirs(path):
         print("Directory successfully made in: " + path + "\n")

   # either user pressed ENTER or gave wrong location.
   else:
      if path == "":
        print("Making an output folder in this directory only. \n")
      else:
         print("No such directory exists. Making an output folder in this current code directory only. \n")

      path = 'output'
      if os.path.isdir(path):
        print("Directory already exists. Using it \n")
      else:
         if os.makedirs(path):
           print("error in making directory. \n")
           sys.exit()
         else:
            print("Directory successfully made: " + path + "\n")

   # Ask for webcam resolution
   res = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower()
   if res == "":
      res = (640, 480)
   else:
      res = tuple(map(int, res.split('x'))) 
   # Start MTCNN face detection and pose estimation module.
  
   # Take gpu fraction values
   gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip()
   if gpu_fraction == "":
     gpu_fraction = 0.8
   else:
     gpu_fraction = round(float(gpu_fraction), 1)

   # Some more MTCNN parameter
   minsize = 20 # minimum size of face
   threshold = [0.6, 0.7, 0.7]  # Three steps's threshold
   factor = 0.709 # scale factor
  
   with tf.Graph().as_default():
       gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
       sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
       with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

   # Create an object of face aligner module
   face_size = input("\nEnter desired face width and height in WidthxHeight format OR press ENTER for default 160x160 pixel: ").rstrip().lower()
   if face_size == "":
     face_size = (160, 160)
   else:
     face_size = tuple(map(int, face_size.split('x'))) 
   affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1])

   # Create dataset was choosen before and so working with taking dataset.
   while True:
    ask = input("\nEnter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ").rstrip()
    # removing all spaces with underscore
    ask = ask.replace(" ", "_")    

    if ask=="":
     folder_name = 'person' + str(personNo)
    else:
      folder_name = ask

    # Creating new user specific variables    
    personNo += 1
    users_folder = path + "/" + folder_name
    image_no = 1

    # Create folder with the given location and the given username.
    if os.path.isdir(users_folder):
         print("Directory already exists. Using it \n")
    else:
      if os.makedirs(users_folder):
        print("error in making directory. \n")
        sys.exit()
      else:
        print("Directory successfully made: " + users_folder + "\n")

    # Start webcam or videofile according to user.
    data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ").rstrip()

    # default webcam which uses infinite loop and video variable to find total frames
    loop_type = False
    total_frames = 0
    
    if data_type == "":
       data_type = 0
       loop_type = True

    # Initialize webcam or video
    device = cv2.VideoCapture(data_type)

    # If webcam set resolution
    if data_type == 0:
      device.set(3, res[0])
      device.set(4, res[1])
    else:
       # Finding total number of frames of video.
       total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
       # Shutting down webcam variable
       loop_type = False
      
    # Start web cam or start video and start creating dataset by user.
    while loop_type or (total_frames > 0):
         
         # If video selected dec counter
         if loop_type == False:
           total_frames -= 1

         ret, image = device.read()

         # Run MTCNN and do face detection until 's' keyword is pressed
         if (cv2.waitKey(1) & 0xFF) == ord("s"):

           # DETECT FACES. We get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)
           
           # See if face is detected
           if bb.shape[0] > 0:
             
             # align the detected faces
             for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:,col])
                
                # Save the image
                image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(4) + ".png"
                cv2.imwrite(image_name, aligned_image)
                image_no += 1

             # Draw the bounding boxes and pose landmarks on the image
             # Draw functions to show rectangles on the faces and circle on the the landmarks
             for i in range(bb.shape[0]):
                cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2)

             # loop over the (x, y)-coordinates for the facial landmarks
             # and draw each of them
             for col in range(points.shape[1]):
                for i in range(5):
                   cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

         # Show the output video to user
         cv2.imshow("Output", image)

         # Break this loop if 'q' keyword pressed to go to next user.
         if (cv2.waitKey(20) & 0xFF) == ord("q"):
           device.release()
           cv2.destroyAllWindows()
           break

    # Ask for more user using webcam or video else exit.
    ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ")
    ask = ask.rstrip().lstrip().lower()
    if ask != "":
      if ask[0] == 'q':
        break

   # This means dataset creating is complete. ASK the user for train now or exit.
   ask = input("Press ENTER to exit or \nPress T keyword to TRAIN and 'maybe' TEST later by creating a classifier on the facenet model OR \nPress W to test the dataset folder on a classifier model: ").rstrip().lstrip().lower()
   if ask == 't':
     train()

   elif ask == 'w':
      test()

   else:
     if ask == "":
       print("Cleaning and exiting. Thank You \n")
     else:
       print("\n wrong keyword pressed. Cleaning and exiting. \n Thank You \n")

コード例 #10

0

ファイルを表示

def main(json_file, save_dir=None, save_img=True, show_img=True):
    if not osp.exists(json_file):
        print 'Cannot find json file: ' + json_file
        pass

    if save_dir is None:
        save_dir = './fa_facex_rlt'

    save_json = 'mtcnn_align_rlt.json'
    model_path = "../../model"

    fp_json = open(json_file, 'r')
    facex_response = json.load(fp_json)
    fp_json.close()

    if (not facex_response or not isinstance(facex_response, dict)
            or 'facex_det' not in facex_response):
        print 'Invalid json file: ' + json_file
        pass

    facex_det_response = facex_response['facex_det']

    if not osp.exists(save_dir):
        os.makedirs(save_dir)

    fp_rlt = open(osp.join(save_dir, save_json), 'w')
    results = []

    for item in facex_det_response:
        img_path = item['name']
        print '===> Processing image: ' + img_path

        if 'detections' not in item:
            continue

        face_rects = []
        for face in item['detections']:
            face_rects.append(face['pts'])

        img = cv2.imread(img_path)

        aligner = FaceAligner(model_path, False)

        rlt = {}
        rlt["filename"] = img_path
        rlt["faces"] = []
        rlt['face_count'] = 0

        t1 = time.clock()
        bboxes, points = aligner.align_face(img, face_rects)
        t2 = time.clock()

        n_boxes = len(face_rects)
        print(
            "-->Alignment cost %f seconds, processed %d face rects, avg time: %f seconds"
            % ((t2 - t1), n_boxes, (t2 - t1) / n_boxes))

        if bboxes is not None and len(bboxes) > 0:
            for (box, pts) in zip(bboxes, points):
                #                box = box.tolist()
                #                pts = pts.tolist()
                tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts}
                rlt['faces'].append(tmp)

        rlt['face_count'] = len(bboxes)

        rlt['message'] = 'success'
        results.append(rlt)

        spl = osp.split(img_path)
        sub_dir = osp.split(spl[0])[1]
        base_name = spl[1]

        save_img_subdir = osp.join(save_dir, sub_dir)
        if not osp.exists(save_img_subdir):
            os.mkdir(save_img_subdir)


#        save_rect_subdir = osp.join(save_dir, sub_dir)
#        if not osp.exists(save_rect_subdir):
#            os.mkdir(save_rect_subdir)
# print pts

        save_img_fn = osp.join(save_img_subdir, base_name)
        print 'save face chip into ', save_img_fn

        # facial5points = np.reshape(pts, (2, -1))
        # dst_img = warp_and_crop_face(
        #     img, facial5points, reference_5pts, output_size)
        dst_img = aligner.get_face_chips(img, [box], [pts], True)[0]
        cv2.imwrite(save_img_fn, dst_img)

    json.dump(results, fp_rlt, indent=2)
    fp_rlt.close()

コード例 #11

0

ファイルを表示

def main(args):

    print('Creating networks and loading parameters')

    # Building seperate graphs for both the networks
    g1 = tf.Graph()
    g2 = tf.Graph()
    #images_placeholder = tf.placeholder(tf.int32)
    #embeddings = tf.Variable()
    #phase_train_placeholder = tf.placeholder(tf.bool)

    with g1.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with tf.Session() as sess:
            facenet.load_model(args.model)
    #with tf.Graph().as_default():
    #with tf.Session() as sess:

    with g2.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33),
                         desiredFaceWidth=160,
                         desiredFaceHeight=160)

    # Load the model for FaceNet image recognition and get the tensors

    print("[INFO] camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    vs.set(3, 640)
    vs.set(4, 480)
    time.sleep(2.0)

    while True:
        ret, img = vs.read()

        # we get the bounding boxes as well as the points for the face
        g2.as_default()
        with tf.Session(graph=g2) as sess:
            bb, points = align.detect_face.detect_face(img, minsize, pnet,
                                                       rnet, onet, threshold,
                                                       factor)
        #print("here they are \n")
        #print(points)

        # See if face is detected
        if bb.shape[0] > 0:

            # Draw rectangles on the faces and circle on the the landmarks
            for i in range(bb.shape[0]):
                cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])),
                              (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2)

            # loop over the (x, y)-coordinates for the facial landmarks
            # and draw each of them
            for col in range(points.shape[1]):
                for i in range(5):
                    cv2.circle(img,
                               (int(points[i][col]), int(points[i + 5][col])),
                               1, (255, 0, 0), -1)

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            aligned_image = affine.align(img, points)

            # Show the image only if alignment is there
            cv2.imshow("Alignment", aligned_image)

            # Prewhiten the image for facenet architecture to give better results
            mean = np.mean(aligned_image)
            std = np.std(aligned_image)
            std_adj = np.maximum(std, 1.0 / np.sqrt(aligned_image.size))
            facenet_image = np.multiply(np.subtract(aligned_image, mean),
                                        1 / std_adj)
            img_list = []
            img_list.append(facenet_image)
            img_list.append(facenet_image)
            images = np.stack(img_list)

            g1.as_default()
            with tf.Session(graph=g1) as sess:
                # Run forward pass on FaceNet to get the embeddings
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                embedding = sess.run(embeddings, feed_dict=feed_dict)

                print("Here is the embedding \n")
                print(embedding)
                print("\n")

        cv2.imshow("Output", img)

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

コード例 #12

0

ファイルを表示

def main():

  print("\n*********************************************************************************************** \n")
  print("              Welcome to the Face detection and recognition program. \n")
  print("\n*********************************************************************************************** \n")
  print("GUIDELINES TO USE THIS SOFTWARE: \n\nThis code gives the user to:\n\n1) CREATE DATASET using MTCNN face detection and alignment. or\n2) TRAIN FaceNet for face recognition. or \n3) Do both.\n\n The user will multiple times get option to choose webcam (default option) or video file to do face detection and will be asked for output folder, username on folder and image files etc also (default options exists for that too)\n\n **************   IMPORTANT   *************\n1) Whenever webcam or video starts press 's' keyword to start face detection in video or webcam frames and save the faces in the folder for a single user. This dataset creation will stop the moment you release the 's' key. This can be done multiple times.\n\n2) Press 'q' to close it when you are done with one person, and want to detect face for another person. \n\n3) Make sure you press the keywords on the image window and not the terminal window.\n")
  mode = input("Press T to train the facenet for recognition OR \nPress D to first create dataset and then 'maybe' train later: ")

  # Some variables that will be used through out the code
  path = ""
  res = ()
  personNo = 1
  folder_name = ""


  # This means user went for Creating of dataset
  if mode == 'D':
    path = input("Enter the output folder location or simply press ENTER create a dataset folder in this directory only: ")

    if os.path.isdir(path):
     
     # User given path is present.
      path += '/output'
      if os.path.isdir(path):
        print("Directory already exists. Using it \n")
      else:
        if not os.makedirs(path):
          print("Directory successfully made in: " + path + "\n")

     # either user pressed ENTER or gave wrong location.
    else:
       if path == "":
         print("Making an output folder in this directory only. \n")

       else:
           print("No such directory exists. Making an output folder in this current code directory only. \n")

       path = 'output'
       if os.path.isdir(path):
       	 print("Directory already exists. Using it \n")
       else:
          if os.makedirs(path):
       	    print("error in making directory. \n")
            sys.exit()
          else:
             print("Directory successfully made: " + path + "\n")

    # Ask for webcam resolution
    res = tuple(map(int, input("Enter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").split("x")))
    if res == "":
      res = (640, 480)

    # Start MTCNN face detection and pose estimation module.
    
    # Take gpu fraction values
    gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ")
    if gpu_fraction == "":
      gpu_fraction = 0.8
    else:
    	gpu_fraction = round(float(gpu_fraction), 1)

    # Some more MTCNN parameter
    minsize = 20 # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # Three steps's threshold
    factor = 0.709 # scale factor
    
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    # Create an object of face aligner module
    face_size = tuple(map(int, input("Enter desired face width and height in widthxheight format OR press ENTER for default 160x160 pixel: ").split("x")))
    if face_size == "":
      face_size = (160, 160)
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1])



  # This means user went for the train part
  elif mode == 'T':
     train()

  else:
  	print("No correct keyword entered. Exiting")
  	sys.exit()

 # Create dataset was choosen before and so working with taking dataset.
  while True:

    ask = input("\n Enter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ")
    # removing all spaces with underscore
    ask = ask.replace(" ", "_")    

    if ask=="":
   	 folder_name = 'person_' + str(personNo)
    else:
   	  folder_name = ask

    # Creating new user specific variables   	
    personNo += 1
    users_folder = path + "/" + folder_name
    image_no = 0

    # Create folder with the given location and the given username.
    if os.path.isdir(users_folder):
       	 print("Directory already exists. Using it \n")
    else:
      if os.makedirs(path):
       	print("error in making directory. \n")
        sys.exit()
      else:
        print("Directory successfully made: " + users_folder + "\n")

    # Start webcam or videofile according to user.
    data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ")

    # default webcam which uses infinite loop and video variable to find total frames
    loop_type = False
    total_frames = 0
    
    if data_type == "":
       data_type = 0
       loop_type = True

    # Initialize webcam or video
    device = cv2.VideoCapture(data_type)

    # If webcam set resolution
    if data_type == 0:
      device.set(3, res[0])
      device.set(4, res[1])
    else:
       # Finding total number of frames of video.
       total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
      
      # Start web cam and creating dataset by user.
      while loop_type or (total_frames > 0):
      	 total_frames -= 1

         ret, image = device.read()

      	 # Run MTCNN and do face detection until 's' keyword is pressed
      	 if (cv2.waitKey(1) && 0xFF) == ord("s"):

           # DETECT FACES. We get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)
           
           # See if face is detected
           if bb.shape[0] > 0:
             
             # align the detected faces
             for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:,col])
                
                # Save the image
                image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(3) + ".png"
                cv2.imwrite(image_name, aligned_image)
                image_no += 1

             # Draw the bounding boxes and pose landmarks on the image
             # Draw functions to show rectangles on the faces and circle on the the landmarks
             for i in range(bb.shape[0]):
                cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2)

             # loop over the (x, y)-coordinates for the facial landmarks
             # and draw each of them
             for col in range(points.shape[1]):
                for i in range(5):
                   cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

         # Show the output video to user
         cv2.imshow("Output", image)

         # Break this loop if 'q' keyword pressed to go to next user.
         if (cv2.waitKey(1) && 0xFF) == ord("q"):
           device.release()
           cv2.destroyAllWindows()
           break

    # Ask for more user using webcam or video else exit.
    ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ")
    if ask == 'q':
      break

  # This means dataset creating is complete. ASK the user for train now or exit.
  ask = input("Press ENTER to exit or press T keyword to train the data by Facenet model on dataset: ")
  if ask = "T":
  	train()

コード例 #13

0

ファイルを表示

ファイル: test.py プロジェクト: KiXiang/mtcnn-caffe-zyf

img_path = r'C:\zyf\00_Ataraxia\facex\facex_cluster_test_imgs-wlc\3\3.jpg'
face_rect1 = [[490, 353], [767, 353], [767, 757], [490, 757]]

face_rects = [face_rect1]

base_name = osp.basename(img_path)
name, ext = osp.splitext(base_name)
ext = '.png'

#    fp_rlt = open(osp.join(save_dir, save_json), 'w')
#    results = []

img = cv2.imread(img_path)

aligner = FaceAligner(caffe_model_path)

t1 = time.clock()
#    bboxes, points = aligner.align_face(img, face_rects)
#    face_chips = aligner.get_face_chips(img, bboxes, points)
face_chips = aligner.get_face_chips(img, face_rects)
t2 = time.clock()

for i, chip in enumerate(face_chips):

    save_name = osp.join(save_dir, 'face_chip_%s_%d' % (name, i) + ext)
    cv2.imwrite(save_name, chip)

    if show_img:
        cv2.imshow('face_chip', chip)

コード例 #14

0

ファイルを表示

def main(argv):
    args = parse_arguments(argv)
    print '===> args:\n', args

    config = load_config(args.config)
    print '===> config:\n', config

    max_faces = config['max_faces']
    extractor_config = config['face_feature']
    mtcnn_model_path = str(config['mtcnn_model_dir'])

    do_detect = not args.no_detect
    do_align = not args.no_align

    save_dir = args.save_dir
    if not osp.exists(save_dir):
        os.makedirs(save_dir)

    pair_save_dir = osp.join(save_dir, 'img_pairs')
    if not osp.exists(pair_save_dir):
        os.mkdir(pair_save_dir)

    save_img = args.save_image
    show_img = args.show_image

    detector = None
    aligner = None

    if do_detect:
        detector = MtcnnDetector(mtcnn_model_path)

    if do_align:
        if not do_detect:
            aligner = FaceAligner(mtcnn_model_path)
        else:
            aligner = FaceAligner(None)
    else:
        aligner = None

    feature_extractor = CaffeFeatureExtractor(extractor_config)

    ctx_static = {}
    #ctx_static['args'] = args
    ctx_static['detector'] = detector
    ctx_static['aligner'] = aligner
    ctx_static['feature_extractor'] = feature_extractor
    ctx_static['do_detect'] = do_detect
    ctx_static['do_align'] = do_align
    ctx_static['save_img'] = save_img
    ctx_static['show_img'] = show_img
    ctx_static['save_dir'] = save_dir
    ctx_static['max_faces'] = max_faces

    #    result_list = []
    img_cnt = 0
    faces_cnt = 0
    ttl_det_time = 0.0
    ttl_feat_time = 0.0

    ctx_active = {}
    #ctx_active['result_list'] = result_list
    ctx_active['img_cnt'] = img_cnt
    ctx_active['faces_cnt'] = faces_cnt
    ctx_active['ttl_det_time'] = ttl_det_time
    ctx_active['ttl_feat_time'] = ttl_feat_time

    fp = open(args.img_list_file, 'r')
    fp_rlt = open(osp.join(save_dir, 'face_feature.json'), 'w')
    fp_rlt.write('[\n')
    write_comma_flag = False

    while True:
        line = fp.readline().strip()
        print '---> line: ', line
        if not line:
            break

        img_path = get_image_path(line, args.image_root_dir)
        print '---> img_path: ', img_path

        (rlt, features, face_chips) = detect_faces_and_extract_features(
            img_path, ctx_static, ctx_active)
        #        print 'features: ', features
        #        print 'id(features): ', id(features)

        # result_list.append(rlt)
        if write_comma_flag:
            fp_rlt.write(',\n')
        else:
            write_comma_flag = True

        json_str = json.dumps(rlt, indent=2)
        fp_rlt.write(json_str)
        fp_rlt.flush()

        line = fp.readline().strip()
        print '---> line: ', line
        if not line:
            break

        img_path2 = get_image_path(line, args.image_root_dir)
        print '---> img_path2: ', img_path2

        (rlt2, features2, face_chips2) = detect_faces_and_extract_features(
            img_path2, ctx_static, ctx_active)
        #        print 'features2: ', features2
        #        print 'features: ', features
        #
        #        print 'id(features): ', id(features)
        #        print 'id(features2): ', id(features2)
        #
        #        print 'features.data: ', id(features.data)
        #        print 'features2.data: ', id(features2.data)

        # result_list.append(rlt2)
        json_str = json.dumps(rlt2, indent=2)
        fp_rlt.write(',\n' + json_str)
        fp_rlt.flush()

        if rlt['face_count'] and rlt2['face_count']:
            #            sim = calc_similarity(features[0], features2[0])
            #            img_pair = np.hstack((face_chips[0], face_chips2[0]))
            #            img_pair_fn = '%s_%d_vs_%s_%d_%5.4f.jpg' % (osp.basename(img_path), 0, osp.basename(img_path2), 0, sim)
            #            img_pair_fn = osp.join(pair_save_dir, img_pair_fn)
            #            cv2.imwrite(img_pair_fn, img_pair)
            #
            #            print '---> similarity: ', sim

            for j in range(rlt['face_count']):
                for i in range(rlt2['face_count']):
                    sim = calc_similarity(features[j], features2[i])
                    print 'features[%d]: ' % j, features[j]
                    print 'features2[%d]: ' % i, features2[i]

                    img_pair = np.hstack((face_chips[j], face_chips2[i]))

                    img_pair_fn = '%s_%d_vs_%s_%d_%5.4f.jpg' % (osp.basename(
                        img_path), j, osp.basename(img_path2), i, sim)
                    img_pair_fn = osp.join(pair_save_dir, img_pair_fn)

                    sim_txt = '%5.4f' % sim
                    cv2_put_text_to_image(img_pair, sim_txt, 40, 5, 30,
                                          (0, 0, 255))
                    cv2.imwrite(img_pair_fn, img_pair)

                    print '---> similarity: ', sim

    # json.dump(result_list, fp_rlt, indent=2)
    fp_rlt.write('\n]\n')

    fp_rlt.close()
    fp.close()

    if show_img:
        cv2.destroyAllWindows()

コード例 #15

0

ファイルを表示

ファイル: video_input_MTCNN_detect_align_and_save_images_in_folder.py プロジェクト: kartik-nighania/deep_learning_face_detection_and_recognition

def main(args):

    print('Creating networks and loading parameters')
    # Building seperate graphs for both the tf architectures
    #g1 = tf.Graph()
    g2 = tf.Graph()
    '''
    with g1.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with tf.Session() as sess:
        	# Load the model for FaceNet image recognition
            facenet.load_model(args.model)
    '''

    with g2.as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Create an object of face aligner module
    affine = FaceAligner(desiredLeftEye=(0.33, 0.33),
                         desiredFaceWidth=160,
                         desiredFaceHeight=160)

    # Taking the video and creating an object of it.
    print("[INFO] Taking the video input.")
    vs = cv2.VideoCapture(os.path.expanduser(args.video))

    # Finding the file format, size and the fps rate
    fps = vs.get(cv2.CAP_PROP_FPS)
    video_format = int(vs.get(cv2.CAP_PROP_FOURCC))
    frame_size = (int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    total_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
    output_video = cv2.VideoWriter("Output_" + args.video, video_format, fps,
                                   frame_size)

    # Create the output_faces directory by user or default arguments
    path = os.path.expanduser(args.output)
    path = path + "/output_faces"

    if not os.path.isdir(path):
        os.makedirs(path)

    image_numbers = 0

    print("Total number of frames \n" + str(total_frames) + "\n")
    #for i in range(total_frames):
    for i in range(total_frames):

        # Print the present frame / total frames to know how much we have completed
        print("\n" + str(i) + " / " + str(total_frames) + "\n")

        ret, image = vs.read()

        # Run MTCNN model to detect faces
        g2.as_default()
        with tf.Session(graph=g2) as sess:
            # we get the bounding boxes as well as the points for the face
            bb, points = align.detect_face.detect_face(image, minsize, pnet,
                                                       rnet, onet, threshold,
                                                       factor)

        # See if face is detected
        if bb.shape[0] > 0:

            # ALIGNMENT - use the bounding boxes and facial landmarks to align images
            # create a numpy array to feed the network
            img_list = []
            images = np.empty([bb.shape[0], image.shape[0], image.shape[1]])

            for col in range(points.shape[1]):
                aligned_image = affine.align(image, points[:, col])

                if args.show_video == True:
                    cv2.imshow("aligned", aligned_image)

                # Prewhiten the image for facenet architecture to give better results
                #mean = np.mean(aligned_image)
                #std  = np.std(aligned_image)
                #std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size))
                #ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj)
                # Save the found out images
                place = path + "/" + "output_faces_" + str(
                    image_numbers) + ".png"
                print("saved to: " + place + "\n")
                cv2.imwrite(place, aligned_image)
                image_numbers += 1

            # if we want to show or save the video then draw the box and the points on the image
            if args.show_video == True or args.save_video == True:

                for i in range(bb.shape[0]):
                    cv2.rectangle(image, (int(bb[i][0]), int(bb[i][1])),
                                  (int(bb[i][2]), int(bb[i][3])), (0, 255, 0),
                                  2)

                # loop over the (x, y)-coordinates for the facial landmarks
                # and draw each of them
                for col in range(points.shape[1]):
                    for i in range(5):
                        cv2.circle(
                            image,
                            (int(points[i][col]), int(points[i + 5][col])), 1,
                            (255, 0, 0), -1)

        if args.save_video == True:
            output_video.write(image)

        if args.show_video == True:
            cv2.imshow("Output", image)

        # Save the final aligned face image in given format
        """   # Show the image
                #cv2.imshow(str(col), aligned_image)
                img_list.append(ready_image)
                images = np.stack(img_list)


          g1.as_default()
          with tf.Session(graph=g1) as sess:
          # Run forward pass on FaceNet to get the embeddings
              images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
              embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
              phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
              feed_dict = { images_placeholder: images, phase_train_placeholder:False }
              embedding = sess.run(embeddings, feed_dict=feed_dict)
          
              print("Here is the embedding \n")
              print(embedding.shape)
              print("\n")

        """

        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            #if keyboard.is_pressed('q'):
            # do a bit of cleanup
            vs.release()
            output_video.release()
            cv2.destroyAllWindows()
            break

コード例 #16

0

ファイルを表示

ファイル: test.py プロジェクト: walkoncross/mtcnn-mxnet-zyf

    img_path = '../../test_imgs/Marilyn_Monroe_0002.jpg'
    face_rect1 = [[91, 57], [173, 57], [173, 180], [91, 180]]

    face_rects = [face_rect1]

    base_name = osp.basename(img_path)
    name, ext = osp.splitext(base_name)
    ext = '.png'

    #    fp_rlt = open(osp.join(save_dir, save_json), 'w')
    #    results = []

    img = cv2.imread(img_path)

    aligner = FaceAligner(model_path)

    t1 = time.clock()
    # You can align the faces in two steps like this:
    #    bboxes, points = aligner.align_face(img, face_rects)
    #    face_chips = aligner.get_face_chips(img, bboxes, points)

    # OR just align them in one step by calling the following function,
    # which combine last two functions
    face_chips = aligner.get_face_chips(img, face_rects)
    t2 = time.clock()

    for i, chip in enumerate(face_chips):
        print('---> chip.shape: ', chip.shape)
        save_name = osp.join(save_dir, 'face_chip_%s_%d' % (name, i) + ext)
        cv2.imwrite(save_name, chip)

コード例 #17

0

ファイルを表示

ファイル: detect_face_and_extract_features.py プロジェクト: IrvingShu/caffe_feature_extractor

def main(argv):
    args = parse_arguments(argv)
    print '===> args:\n', args

    config = load_config(args.config)
    print '===> config:\n', config

    extractor_config = config['face_feature']
    mtcnn_model_path = str(config['mtcnn_model_dir'])

    do_detect = not args.no_detect
    do_align = not args.no_align

    save_dir = args.save_dir
    if not osp.exists(save_dir):
        os.makedirs(save_dir)

    save_img = args.save_image
    show_img = args.show_image

    detector = None
    aligner = None

    if do_detect:
        detector = MtcnnDetector(mtcnn_model_path)

    if do_align:
        if not do_detect:
            aligner = FaceAligner(mtcnn_model_path)
        else:
            aligner = FaceAligner(None)
    else:
        aligner = None

    feature_extractor = CaffeFeatureExtractor(extractor_config)
    feat_layer = feature_extractor.get_feature_layers()[0]

    fp = open(args.img_list_file, 'r')
    fp_rlt = open(osp.join(save_dir, 'face_feature.json'), 'w')
    fp_rlt.write('[\n')
    write_comma_flag = False

    # result_list = []
    img_cnt = 0
    faces_cnt = 0
    ttl_det_time = 0.0
    ttl_feat_time = 0.0

    for line in fp:
        img_path = line.strip()
        print("\n===>" + img_path)
        if img_path == '':
            print 'empty line, not a file name, skip to next'
            continue

        if img_path[0] == '#':
            print 'skip line starts with #, skip to next'
            continue

        # result_list.append(rlt)
        if write_comma_flag:
            fp_rlt.write(',\n')
        else:
            write_comma_flag = True

        rlt = {}
        rlt["filename"] = img_path
        rlt["faces"] = []
        rlt['face_count'] = 0

        try:
            if args.image_root_dir:
                img = cv2.imread(osp.join(args.image_root_dir, img_path))
            else:
                img = cv2.imread(img_path)
            print '\n---> img.shape: ', img.shape
        except:
            print('failed to load image: ' + img_path)
            #rlt["message"] = "failed to load"
            json_str = json.dumps(rlt, indent=2)
            fp_rlt.write(json_str)
            fp_rlt.flush()
            continue

        if img is None:
            print('failed to load image: ' + img_path)

            rlt["message"] = "failed to load"
            # result_list.append(rlt)
            json_str = json.dumps(rlt, indent=2)
            fp_rlt.write(json_str)
            fp_rlt.flush()
            continue

        img_cnt += 1
        if do_detect:
            t1 = time.clock()

            bboxes, points = detector.detect_face(img)

            t2 = time.clock()
            ttl_det_time += t2 - t1
            print("detect_face() costs %f seconds" % (t2 - t1))

        else:
            print '---> Will not do detection because of option "--no_detect"'
            shp = img.shape
            rect = [0, 0, shp[1] - 1, shp[0] - 1, 1.0]
            bboxes = [rect]
            points = [None]

        n_faces = 0
        if bboxes is not None:
            n_faces = len(bboxes)

        if n_faces > 0:
            for (box, pts) in zip(bboxes, points):
                #                box = box.tolist()
                #                pts = pts.tolist()
                tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts}
                rlt['faces'].append(tmp)

            rlt['face_count'] = n_faces

#        print('output bboxes: ' + str(bboxes))
#        print('output points: ' + str(points))
# toc()

        if do_detect:
            print(
                "\n===> Detect %d images, costs %f seconds, avg time: %f seconds"
                % (img_cnt, ttl_det_time, ttl_det_time / img_cnt))

        print "---> %d faces detected" % n_faces

        if not n_faces:
            continue

        t1 = time.clock()
        if do_align:
            if points is None or points[0] is None:
                face_chips = aligner.get_face_chips(img, bboxes, None)
            else:
                face_chips = aligner.get_face_chips(img, bboxes, points)


#            face_chips = aligner.get_face_chips(img, bboxes, None)

#            face_chips = [im.astype(np.float) for im in face_chips_ubyte]
        else:
            print '---> Will not do alignment because of option "--no_align"'
            face_chips = [img.astype(np.float)]

        features = feature_extractor.extract_features_batch(
            face_chips)[feat_layer]
        t2 = time.clock()
        ttl_feat_time += t2 - t1
        print("Cropping and extracting features for %d faces cost %f seconds" %
              (n_faces, t2 - t1))
        faces_cnt += n_faces

        print(
            "\n===> Extracting features for %d faces, costs %f seconds, avg time: %f seconds"
            % (faces_cnt, ttl_feat_time, ttl_feat_time / faces_cnt))

        for i, box in enumerate(bboxes):
            # feat_file = '%s_%d_rect[%d_%d_%d_%d].npy' % (
            #     osp.basename(img_path), i, box[0], box[1], box[2], box[3])
            # feat_file = osp.join(save_dir, feat_file)
            # np.save(feat_file, features[i])

            base_name = osp.basename(img_path)

            face_fn_prefix = '%s_face_%d' % (osp.splitext(base_name)[0], i)

            feat_file = face_fn_prefix + '.npy'
            np.save(osp.join(save_dir, feat_file), features[i])

            face_chip_fn = face_fn_prefix + '.jpg'
            cv2.imwrite(osp.join(save_dir, face_chip_fn), face_chips[i])

            rlt['faces'][i]['feat'] = feat_file
            rlt['faces'][i]['face_chip'] = face_chip_fn

        rlt['message'] = 'success'
        #        result_list.append(rlt)
        json_str = json.dumps(rlt, indent=2)
        fp_rlt.write(json_str)
        fp_rlt.flush()

        if save_img or show_img:
            draw_faces(img, bboxes, points)

        if save_img:
            save_name = osp.join(save_dir, osp.basename(img_path))
            cv2.imwrite(save_name, img)

        if show_img:
            cv2.imshow('img', img)

            ch = cv2.waitKey(0) & 0xFF
            if ch == 27:
                break

    #json.dump(result_list, fp_rlt, indent=4)
    fp_rlt.write('\n]\n')
    fp_rlt.close()
    fp.close()

    if show_img:
        cv2.destroyAllWindows()

コード例 #18

0

ファイルを表示

def recognize():

   # Taking the parameters for recogniton by the user
   classifier_filename = input("\nEnter the path of the classifier .pkl file or press ENTER if a filename 'classifier.pkl' is present in this code directory itself: ")
   if classifier_filename == "":
      classifier_filename = 'classifier.pkl'
   classifier_filename = os.path.expanduser(classifier_filename)

   model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip()
   if model == "":
      model = "20180402-114759/20180402-114759.pb"

   # Create an object of face aligner module
   image_size = (160, 160)
   ask = input("\nEnter desired face width and height in WidthxHeight format for face aligner to take OR press ENTER for default 160x160 pixel: ").rstrip().lower()
   if ask != "":
      image_size = tuple(map(int, ask.split('x'))) 
   
   # Take gpu fraction values
   gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip()
   if gpu_fraction == "":
      gpu_fraction = 0.8
   else:
      gpu_fraction = round(float(gpu_fraction), 1)

   input_type = input("\nPress I for image input OR\nPress V for video input OR\nPress W for webcam input OR\nPress ENTER for default webcam: ").lstrip().rstrip().lower()
   if input_type == "":
      input_type = 'w'

   # Load the face aligner model
   affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=image_size[0], desiredFaceHeight=image_size[1])

   # Building seperate graphs for both the tf architectures
   g1 = tf.Graph()
   g2 = tf.Graph()

   # Load the model for FaceNet image recognition    
   with g1.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with tf.Session() as sess:
            facenet.load_model(model)

   # Load the model of MTCNN face detection.
   with g2.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

   # Some MTCNN network parameters
   minsize = 20 # minimum size of face
   threshold = [0.6, 0.7, 0.8]  # Three steps's threshold
   factor = 0.709 # scale factor
   ask = input("\nEnter the threshold FACE DETECTION CONFIDENCE SCORE to consider detection by MTCNN OR press ENTER for default 0.80: ")
   if ask != "" and float(ask) < 1:
      threshold[2] = round(float(ask),2)

   classifier_threshold = 0.50
   ask = input("\nEnter the threshold FACE RECOGNITION CONFIDENCE SCORE to consider face is recognised OR press ENTER for default 0.50: ")
   if ask != "":
      classifier_threshold = float(ask)

   # Loading the classifier model
   with open(classifier_filename, 'rb') as infile:
       (modelSVM, class_names) = pickle.load(infile)
   print('\nLoaded classifier model from file "%s"' % classifier_filename)

   # default webcam which uses infinite loop or set video or image setting
   loop_type = False
   image_input = 0
   total_frames = 0
   save_video = False
   frame_no = 1
   output_video = []
   image = []
   display_output = True
   res = (640, 480)

   # If web cam is selected
   if input_type == "w":
        data_type = 0
        loop_type = True
        # Ask for webcam resolution
        ask = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower()
        if ask != "":
           res = tuple(map(int, ask.split('x')))

   # If image selected, trying to represent it as video with 1 frame
   elif input_type == "i":
        loop_type = False
        total_frames = 0
        data_type = input("\nWrite the image path file to open: ").rstrip().lstrip()
        image = cv2.imread(data_type)
        # Jump directly intocode to go through a single pass
        goto(581)

   # Video is selected
   else:
        loop_type = False
        data_type = input("\nWrite the video path file to open: ").rstrip().lstrip()
        ask = input("\nPress y to save the output video OR simply press ENTER to ignore it: ").lstrip().rstrip().lower()
        if ask == "y":
           save_video = True
        ask = input("\nSimply press ENTER to see the output video frames OR press N to switch off the output display: ").lstrip().rstrip().lower()
        if ask == "n":
           display_output = False

   # Initialize webcam or video
   device = cv2.VideoCapture(data_type)

   # If webcam set resolution
   if input_type == "w":
      device.set(3, res[0])
      device.set(4, res[1])
    
   elif input_type == "v":
      # Finding total number of frames of video.
      total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT))
      # Shutting down webcam variable
      loop_type = False
      # save video feature.
      if save_video:
         # Finding the file format, size and the fps rate
         fps = device.get(cv2.CAP_PROP_FPS)
         video_format = int(device.get(cv2.CAP_PROP_FOURCC))
         frame_size = (int(device.get(cv2.CAP_PROP_FRAME_WIDTH)), int(device.get(cv2.CAP_PROP_FRAME_HEIGHT)))
         # Creating video writer to save the video after process if needed
         output_video = cv2.VideoWriter("Output_" + data_type, video_format, fps, frame_size)
         
      
   # Start web cam or start video and start creating dataset by user.
   while loop_type or (frame_no <= total_frames):
         
       # If video selected dec counter
       if loop_type == False:
          frame_no += 1
          # Display the progress
          print("\nProgress: %.2f" %(100*frame_no/total_frames) + "%") 

       ret, image = device.read()

       # Run MTCNN model to detect faces
       g2.as_default()
       with tf.Session(graph=g2) as sess:
           # we get the bounding boxes as well as the points for the face
           bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor)

       # See if face is detected
       if bb.shape[0] > 0:

         # ALIGNMENT - use the bounding boxes and facial landmarks points to align images
          
         # create a numpy array to feed the network
         img_list = []
         images = np.empty([bb.shape[0], image.shape[0], image.shape[1]])

         for col in range(points.shape[1]):
             aligned_image = affine.align(image, points[:,col])

             # Prewhiten the image for facenet architecture to give better results
             mean = np.mean(aligned_image)
             std  = np.std(aligned_image)
             std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size))
             ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj)
             img_list.append(ready_image)
             images = np.stack(img_list)

         # EMBEDDINGS: Use the processed aligned images for Facenet embeddings
         
         g1.as_default()
         with tf.Session(graph=g1) as sess:
          # Run forward pass on FaceNet to get the embeddings
              images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
              embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
              phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
              feed_dict = { images_placeholder: images, phase_train_placeholder:False }
              embedding = sess.run(embeddings, feed_dict=feed_dict)
          
         
         # PREDICTION: use the classifier to predict the most likely class (person).
         predictions = modelSVM.predict_proba(embedding)
         best_class_indices = np.argmax(predictions, axis=1)
         best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]


         # DRAW: draw bounding boxes, landmarks and predicted names

         if save_video or display_output:
           for i in range(bb.shape[0]):
              cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (255,0, 0), 1)

              # Put name and probability of detection only if given threshold is crossed
              if best_class_probabilities[i] > classifier_threshold:
                 cv2.putText(image, class_names[best_class_indices[i]], (int(bb[i][0]),int(bb[i][1])-7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA)
                 cv2.putText(image, str(round(best_class_probabilities[i]*100, 2) ) + "%", (int(bb[i][0]), int(bb[i][3])+7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA)

           # loop over the (x, y)-coordinates for the facial landmarks
           for col in range(points.shape[1]):
              for i in range(5):
                 cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1)

       if display_output:
          cv2.imshow("Output", image)
       if save_video:
          output_video.write(image)

       key = cv2.waitKey(1) & 0xFF
       # if the `q` key was pressed, break from the loop
       if key == ord("q"):
          # do a bit of cleanup
          device.release()
          if save_video:
             output_video.release()
          cv2.destroyAllWindows()
          break

コード例 #19

0

ファイルを表示

ファイル: mtcnn_align_crop_96x112_for_vggface2_train.py プロジェクト: walkoncross/mtcnn-caffe-zyf

def main(nsplits,
         split_id,
         list_file,
         img_root_dir,
         mtcnn_model_dir,
         save_dir=None):
    if not save_dir:
        save_dir = './aligned_root_dir'

    if not osp.exists(save_dir):
        print('mkdir for aligned root dir: ', save_dir)
        os.makedirs(save_dir)

    save_aligned_dir = osp.join(save_dir, 'aligned_imgs')
    if not osp.exists(save_aligned_dir):
        print('mkdir for aligned/cropped face imgs: ', save_dir)
        os.makedirs(save_aligned_dir)

    save_rects_dir = osp.join(save_dir, 'face_rects')
    if not osp.exists(save_rects_dir):
        print('mkdir for face rects/landmarks: ', save_rects_dir)
        os.makedirs(save_rects_dir)

    # aligner = MtcnnAligner(mtcnn_model_dir, False)
    aligner = FaceAligner(mtcnn_model_dir, False)

    fp = open(list_file, 'r')
    all_lines = fp.readlines()
    fp.close()

    total_line_cnt = len(all_lines)
    print('--->%d imgs in total' % total_line_cnt)

    if nsplits < 2:
        if split_id > 0:
            print('===> Will only process first %d imgs' % split_id)
            start_line = 0
            end_line = split_id
        else:
            print('===> Will process all of the images')
            start_line = 0
            end_line = total_line_cnt
    else:
        assert (split_id < nsplits)
        lines_per_split = float(total_line_cnt) / nsplits
        start_line = int(lines_per_split * split_id)
        end_line = int(lines_per_split * (split_id + 1))
        if end_line + 1 >= total_line_cnt:
            end_line = total_line_cnt

        print('===> Will only process imgs in the range [%d, %d)]' %
              (start_line, end_line))

    count = start_line

    for line in all_lines[start_line:end_line]:
        line = line.strip()
        print count

        count = count + 1
        img_fn = osp.join(img_root_dir, line)

        print('===> Processing img: ' + img_fn)
        img = cv2.imread(img_fn)
        ht = img.shape[0]
        wd = img.shape[1]

        print 'image.shape:', img.shape
        # GT_RECT = [0,0,img.shape[0],img.shape[1]]
        GT_RECT = [
            int(wd * 0.25),
            int(ht * 0.25),
            int(wd * 0.75),
            int(ht * 0.72)
        ]

        # print 'face rect: ', gt
        # boxes, points = aligner.align_face(img, [GT_RECT])
        boxes, points = aligner.align_face(img, [rect])

        box = boxes[0]
        pts = points[0]

        spl = osp.split(line)
        sub_dir = spl[0]
        base_name = spl[1]

        save_img_subdir = osp.join(save_aligned_dir, sub_dir)
        if not osp.exists(save_img_subdir):
            os.mkdir(save_img_subdir)

        save_rect_subdir = osp.join(save_rects_dir, sub_dir)
        if not osp.exists(save_rect_subdir):
            os.mkdir(save_rect_subdir)
        # print pts

        save_img_fn = osp.join(save_img_subdir, base_name)

        facial5points = np.reshape(pts, (2, -1))
        # dst_img = warp_and_crop_face(
        #     img, facial5points, reference_5pts, output_size)
        dst_img = aligner.get_face_chips(img, [box], [pts])[0]
        cv2.imwrite(save_img_fn, dst_img)

        save_rect_fn = osp.join(save_rect_subdir,
                                osp.splitext(base_name)[0] + '.txt')
        fp_rect = open(save_rect_fn, 'w')
        for it in box:
            fp_rect.write('%5.2f\t' % it)
        fp_rect.write('\n')

        for i in range(5):
            fp_rect.write('%5.2f\t%5.2f\n' %
                          (facial5points[0][i], facial5points[1][i]))
        fp_rect.close()

コード例 #20

0

ファイルを表示

ファイル: mtcnn_align_crop_96x112_for_facescrub.py プロジェクト: walkoncross/mtcnn-caffe-zyf

def main(nsplits,
         split_id,
         list_file,
         img_root_dir,
         mtcnn_model_dir,
         save_dir=None,
         rects_fn=None):
    if not save_dir:
        save_dir = './facescrub_mtcnn_aligned'

    if not osp.exists(save_dir):
        print('mkdir for aligned root dir: ', save_dir)
        os.makedirs(save_dir)

    save_aligned_dir = osp.join(save_dir, 'aligned_imgs')
    if not osp.exists(save_aligned_dir):
        print('mkdir for aligned/cropped face imgs: ', save_dir)
        os.makedirs(save_aligned_dir)

    save_rects_dir = osp.join(save_dir, 'face_rects')
    if not osp.exists(save_rects_dir):
        print('mkdir for face rects/landmarks: ', save_rects_dir)
        os.makedirs(save_rects_dir)

    aligner = FaceAligner(mtcnn_model_dir)

    #fp = open(list_file, 'r')
    #all_lines = fp.readlines()
    #fp.close()

    rects_list = load_rect_list(rects_fn)
    all_lines = rects_list

    total_line_cnt = len(all_lines)
    print('--->%d imgs in total' % total_line_cnt)

    if nsplits < 2:
        if split_id > 0:
            print('===> Will only process first %d imgs' % split_id)
            start_line = 0
            end_line = split_id
        else:
            print('===> Will process all of the images')
            start_line = 0
            end_line = total_line_cnt
    else:
        assert (split_id < nsplits)
        lines_per_split = float(total_line_cnt) / nsplits
        start_line = int(lines_per_split * split_id)
        end_line = int(lines_per_split * (split_id + 1))
        if end_line + 1 >= total_line_cnt:
            end_line = total_line_cnt

        print('===> Will only process imgs in the range [%d, %d)]' %
              (start_line, end_line))

    count = start_line

    fp_log = open(osp.join(save_dir, 'missing_imgs_split_%d.txt' % split_id),
                  'w')

    for line in all_lines[start_line:end_line]:
        #line = line.strip()
        print count

        count = count + 1
        img_fn = osp.join(img_root_dir, line['image'])

        print('===> Processing img: ' + img_fn)
        img = cv2.imread(img_fn)
        if img is None:
            print 'falied to read image: ', img_fn
            fp_log.write(img_fn + '\n')
            continue

        ht = img.shape[0]
        wd = img.shape[1]

        print 'image.shape:', img.shape

        spl = osp.split(line['image'])
        #sub_dir = osp.split(spl[0])[1]
        sub_dir = spl[0]
        print 'sub_dir: ', sub_dir

        if CHINESE_2_PINYIN:
            sub_dir = pinyin.get(sub_dir, format="strip")
            # replace the dot sign in names
            sub_dir = sub_dir.replace(u'\xb7', '-').encode('utf-8')

        base_name = osp.splitext(spl[1])[0]

        save_img_subdir = osp.join(save_aligned_dir, sub_dir)
        if not osp.exists(save_img_subdir):
            os.mkdir(save_img_subdir)

        save_rect_subdir = osp.join(save_rects_dir, sub_dir)
        if not osp.exists(save_rect_subdir):
            os.mkdir(save_rect_subdir)
        # print pts

        save_rects_fn = osp.join(save_rect_subdir, base_name + '.txt')
        fp_rect = open(save_rects_fn, 'w')

        #rect = get_rects_for_image(rects_list, base_name)
        rect = line['pts']
        # boxes, points = aligner.align_face(img, [rect])
        boxes, points = aligner.align_face(img, [rect])
        nfaces = len(boxes)
        fp_rect.write('%d\n' % nfaces)

        for i in range(nfaces):
            box = boxes[i]
            pts = points[i]

            if i:
                save_img_fn = osp.join(save_img_subdir,
                                       base_name + '_%d.jpg' % (i + 1))
            else:
                save_img_fn = osp.join(save_img_subdir, base_name + '.jpg')

            facial5points = np.reshape(pts, (2, -1))
            # dst_img = warp_and_crop_face(
            #     img, facial5points, reference_5pts, output_size)
            dst_img = aligner.get_face_chips(img, [box], [pts])[0]

            cv2.imwrite(save_img_fn, dst_img)
            print 'aligend face saved into: ', save_img_fn

            for it in box:
                fp_rect.write('%5.2f\t' % it)
            fp_rect.write('\n')

            for i in range(5):
                fp_rect.write('%5.2f\t%5.2f\n' %
                              (facial5points[0][i], facial5points[1][i]))

        fp_rect.close()
    fp_log.close()

コード例 #21

0

ファイルを表示

def PatchExtraction(video_path, landmarks_path, output_dir, patch_size=32):
    print("Input: ", video_path)
    print("Output:", output_dir)
    frames = []
    frame_number = []
    if os.path.exists(landmarks_path) == False:
        return
    df = pd.read_csv(landmarks_path)
    cap = cv2.VideoCapture(video_path)
    count = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        # if count % 6 == 0 and df[' success'][count] == 1:
        # if df[' success'][count] == 1:
        if count % 6 == 0 and len(df[' success']) > count:
            if df[' success'][count] == 1:
                frame = frame[:,:,::-1]
                frames.append(frame)
                frame_number.append(count)
        count += 1
    cap.release()

    folders = ["aligned_face", "left_eye", "right_eye", "mouth", "nose"]
    for folder in folders:
        directory = os.path.join(output_dir, folder)
        if not os.path.exists(directory):
            os.makedirs(directory)

    for idx, frame in enumerate(frames):
        x = np.array(df.iloc[frame_number[idx],299:299+68]).reshape(68,-1)
        y = np.array(df.iloc[frame_number[idx],299+68:299+68*2]).reshape(68,-1)
        z = np.ones(68).reshape(68,-1)
        landmarks = np.concatenate((x,y), axis=1)
        aligner = FaceAligner(desiredLeftEye=(0.35, 0.35), desiredFaceWidth=128, desiredFaceHeight=int(128*2))
        aligned_face, M = aligner.align(frame, landmarks)

        landmarks_z = np.concatenate((landmarks, z), axis=1)
        affined_landmarks = np.matmul(landmarks_z, M.transpose())

        regions = ["left_eye", "right_eye", "mouth", "nose"]
        regions_image = []
        for region in regions:
            start, end = FACIAL_LANDMARKS_68_IDXS[region]
            Pts = affined_landmarks[start:end]
            Center = Pts.mean(axis=0)
            try:
                img = extract_patch(aligned_face, Center, patch_size)
            except:
                break
            if img.shape != (32, 32, 3):
                break
            regions_image.append(img)
        
        if len(regions_image) == len(regions):
            for i, region in enumerate(regions):
                filename = os.path.join(output_dir, region, str(frame_number[idx]).zfill(4) + '.bmp')
                img = regions_image[i]
                save(img, filename)
            filename = os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4)  + '.bmp')
            np.save(os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4)  + '.npy'), affined_landmarks)
            save(aligned_face, filename)