예제 #1
0
def main():
    align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor))
    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
    dataset = facenet.get_dataset(FLAGS.input_dir)
    # Scale the image such that the face fills the frame when cropped to crop_size
    scale = float(FLAGS.face_size) / FLAGS.image_size
    for cls in dataset:
        output_class_dir = os.path.join(os.path.expanduser(FLAGS.output_dir), cls.name)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        for image_path in cls.image_paths:
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            if not os.path.exists(output_filename):
                print(image_path)
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    if FLAGS.use_new_alignment:
                        aligned = align.align_new(FLAGS.image_size, img, landmarkIndices=landmarkIndices, 
                                              skipMulti=True, scale=scale)
                    else:
                        aligned = align.align(FLAGS.image_size, img, landmarkIndices=landmarkIndices, 
                                              skipMulti=True, scale=scale)
                    if aligned is not None:
                        misc.imsave(output_filename, aligned)
예제 #2
0
def main(args):
    sleep(random.random())
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(
        output_dir, 'bounding_boxes_%05d.txt' % random_key)

    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir,
                                               filename + '.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim < 2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = facenet.to_rgb(img)
                        img = img[:, :, 0:3]

                        bounding_boxes, _ = align.detect_face.detect_face(
                            img, minsize, pnet, rnet, onet, threshold, factor)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces > 0:
                            det = bounding_boxes[:, 0:4]
                            det_arr = []
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces > 1:
                                if args.detect_multiple_faces:
                                    for i in range(nrof_faces):
                                        det_arr.append(np.squeeze(det[i]))
                                else:
                                    bounding_box_size = (
                                        det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                                  det[:, 1])
                                    img_center = img_size / 2
                                    offsets = np.vstack([
                                        (det[:, 0] + det[:, 2]) / 2 -
                                        img_center[1],
                                        (det[:, 1] + det[:, 3]) / 2 -
                                        img_center[0]
                                    ])
                                    offset_dist_squared = np.sum(
                                        np.power(offsets, 2.0), 0)
                                    index = np.argmax(
                                        bounding_box_size -
                                        offset_dist_squared * 2.0
                                    )  # some extra weight on the centering
                                    det_arr.append(det[index, :])
                            else:
                                det_arr.append(np.squeeze(det))

                            for i, det in enumerate(det_arr):
                                det = np.squeeze(det)
                                bb = np.zeros(4, dtype=np.int32)
                                bb[0] = np.maximum(
                                    det[0] * (1 - args.margin / 2), 0)
                                bb[1] = np.maximum(
                                    det[1] * (1 - args.margin / 2), 0)
                                bb[2] = np.minimum(
                                    det[2] * (1 + args.margin / 2),
                                    img_size[1])
                                bb[3] = np.minimum(
                                    det[3] * (1 + args.margin / 2),
                                    img_size[0])
                                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                                new_size = list(cropped.shape[0:2])
                                if cropped.shape[0] > cropped.shape[1]:
                                    new_size[0] = int(args.image_size *
                                                      cropped.shape[0] /
                                                      cropped.shape[1])
                                    new_size[1] = args.image_size
                                else:
                                    new_size[0] = args.image_size
                                    new_size[1] = int(args.image_size *
                                                      cropped.shape[1] /
                                                      cropped.shape[0])
                                scaled = misc.imresize(cropped,
                                                       new_size,
                                                       interp='bilinear')
                                nrof_successfully_aligned += 1
                                filename_base, file_extension = os.path.splitext(
                                    output_filename)
                                if args.detect_multiple_faces:
                                    output_filename_n = "{}_{}{}".format(
                                        filename_base, i, file_extension)
                                else:
                                    output_filename_n = "{}{}".format(
                                        filename_base, file_extension)
                                misc.imsave(output_filename_n, scaled)
                                text_file.write('%s %d %d %d %d\n' %
                                                (output_filename_n, bb[0],
                                                 bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
예제 #3
0
def one_by_one(rel_path, url=False):
    print('Start Recognition!')
    prevTime = 0
    # TODO: support multiple url
    if url: img_list = [None]
    else: img_list = glob.glob(os.path.join(rel_path, '*'))
    results = list()
    # cnt = 0
    # ok_list = list()
    for img_path in img_list:  # for each image in the list
        res = None
        # print('===', url)
        if url:

            try:
                rsp = urlget(rel_path)
                # print(rsp)
                if rsp.status_code == 200:
                    frame = np.array(Image.open(BytesIO(rsp.content)))
                else:
                    print('status code: ', rsp.status_code)
                    exit(-1)
            except Exception as e:
                print(repr(e))
                exit(-1)

        else:
            frame = cv2.imread(img_path)
        # ret, frame = video_capture.read()

        # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
        if frame is None:
            print(
                f'failure in reading image {img_path}, do not use chinese characters in file name!'
            )
            continue
        curTime = time.time()  # calc fps
        timeF = frame_interval

        if (c % timeF == 0):  # detect faces in the current image
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(
                frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:

                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]

                cropped = []
                scaled = []
                scaled_reshape = []
                bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                for i in range(nrof_faces):  # crop all the faces
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                            frame[0]) or bb[i][3] >= len(frame):
                        print('face is out of range!')
                        continue

                    cropped.append(frame[bb[i][1]:bb[i][3],
                                         bb[i][0]:bb[i][2], :])
                    cropped[0] = facenet.flip(cropped[0], False)
                    scaled.append(
                        facenet.imresize(cropped[0], (image_size, image_size),
                                         interp='bilinear'))
                    scaled[0] = cv2.resize(
                        scaled[0], (input_image_size, input_image_size),
                        interpolation=cv2.INTER_CUBIC)
                    scaled[0] = facenet.prewhiten(scaled[0])
                    scaled_reshape.append(scaled[0].reshape(
                        -1, input_image_size, input_image_size, 3))

                    feed_dict = {
                        images_placeholder: scaled_reshape[0],
                        phase_train_placeholder: False
                    }
                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                    predictions = model.predict_proba(emb_array)
                    best_class_indices = np.argmax(predictions, axis=1)
                    best_class_probabilities = predictions[
                        np.arange(len(best_class_indices)), best_class_indices]
                    if i == 0:
                        res = best_class_indices[0]
                        # ok_list.append(cnt)
                        # cnt += 1
                    cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                  (bb[i][2], bb[i][3]), (0, 255, 0),
                                  2)  # boxing face

                    # plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20
                    # print('result: ', best_class_indices[0])
                    if show_flag:
                        for H_i in class_names:
                            if class_names[best_class_indices[0]] == H_i:
                                result_names = class_names[
                                    best_class_indices[0]]
                                cv2.putText(frame,
                                            result_names, (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1, (0, 0, 255),
                                            thickness=1,
                                            lineType=2)
            else:
                print('No face detected.')
                exit(-1)

        sec = curTime - prevTime
        prevTime = curTime
        fps = 1 / (sec)
        str = 'FPS: %2.3f' % fps
        text_fps_x = len(frame[0]) - 150
        text_fps_y = 20
        if show_flag:
            cv2.putText(frame,
                        str, (text_fps_x, text_fps_y),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        1, (0, 0, 0),
                        thickness=1,
                        lineType=2)
            # c+=1
            cv2.imshow('Video', frame)

            if cv2.waitKey(0) & 0xFF == ord('q'):
                break
        a, b, m, n = bb[0]
        if res is not None:
            results.append([res] + list(predictions[0]) + [m - a, n - b])
        else:
            # results.append([res]*10)
            print(f'cannot detect any face for {img_path}, skip')
            continue

    # video_capture.release()
    # #video writer
    # out.release()
    try:
        cv2.destroyAllWindows()
    except:
        pass
    # pred = np.zeros_like(img_list)
    # print(len(ok_list),len(results))
    # pred[ok_list] = results
    # print(pred)
    if len(results) == 0:
        return None
    results = np.array(results)
    # print(results.shape)
    # print(results)
    # labels = [class_names[int(i)] if i is not None else None for i in results[:,0]]
    # comb = np.concatenate([np.array(img_list).reshape((-1,1)),np.array(labels).reshape((-1,1)), results[:,1:]], axis=1)#list(zip(img_list, results))
    # pd.DataFrame(comb).to_csv(args.output_file + '.csv', index=False, header=header)
    comb = results[:, 1:]  # 1,9
    df = pd.DataFrame(comb)
    ret = df.apply(proc_line, axis=1)
    # return df.iloc[:,:-2].values, ret.values
    return ret.values
예제 #4
0
def main(args):
  
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)
    
    print('Creating networks and loading parameters')
    
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, '../../data/')
    
    minsize = 20 # minimum size of face
    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
    factor = 0.709 # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
    
    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir, filename+'.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim<2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = facenet.to_rgb(img)
                        img = img[:,:,0:3]
    
                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces>0:
                            det = bounding_boxes[:,0:4]
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces>1:
                                bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
                                img_center = img_size / 2
                                offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
                                offset_dist_squared = np.sum(np.power(offsets,2.0),0)
                                index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
                                det = det[index,:]
                            det = np.squeeze(det)
                            bb = np.zeros(4, dtype=np.int32)
                            bb[0] = np.maximum(det[0]-args.margin/2, 0)
                            bb[1] = np.maximum(det[1]-args.margin/2, 0)
                            bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
                            bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
                            cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
                            scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
                            nrof_successfully_aligned += 1
                            misc.imsave(output_filename, scaled)
                            text_file.write('%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            
    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def _main():

    args = get_args()
    

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            # pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/')

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            frame_interval = 3
            batch_size = 1000
            image_size = 182
            input_image_size = 160

            print('Loading feature extraction model')
            modeldir = './models/facenet/20190310-055158'
            facenet.load_model(modeldir)

            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename = './myclassifier/my_classifier.pkl'
            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)
                print('load classifier file-> %s' % classifier_filename_exp)

            video_capture = cv2.VideoCapture(0)
            c = 0

            print('Start Recognition!')
            prevTime = 0
            myYolo = YOLO(args)
            while True:
                ret, frame = video_capture.read()

                # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

                curTime = time.time()    # calc fps
                timeF = frame_interval

                if (c % timeF == 0):
                    find_results = []

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    #print(frame.shape[0])
                    #print(frame.shape[1])
                    
                    image = Image.fromarray(frame)
                    img, bounding_boxes = myYolo.detect_image(image)

                    # Remove the bounding boxes with low confidence
                    nrof_faces = len(bounding_boxes)
                    ## Use MTCNN to get the bounding boxes
                    # bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
                    # nrof_faces = bounding_boxes.shape[0]
                    #print('Detected_FaceNum: %d' % nrof_faces)

                    if nrof_faces > 0:
                        # det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        # cropped = []
                        # scaled = []
                        # scaled_reshape = []
                        bb = np.zeros((nrof_faces,4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = bounding_boxes[i][0]
                            bb[i][1] = bounding_boxes[i][1]
                            bb[i][2] = bounding_boxes[i][2]
                            bb[i][3] = bounding_boxes[i][3]

                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                print('face is inner of range!')
                                continue

                            # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            # cropped[0] = facenet.flip(cropped[0], False)
                            # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
                            # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
                            #                        interpolation=cv2.INTER_CUBIC)
                            # scaled[0] = facenet.prewhiten(scaled[0])
                            # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
                            # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}

                            cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3]))
                            cropped = facenet.flip(cropped, False)
                            scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear'))
                            scaled = cv2.resize(scaled, (input_image_size,input_image_size),
                                                interpolation=cv2.INTER_CUBIC)
                            scaled = facenet.prewhiten(scaled)
                            scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3))
                            feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False}

                            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)

                            predictions = model.predict_proba(emb_array)
                            best_class_indices = np.argmax(predictions, axis=1)
                            best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                            print(best_class_probabilities)
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20

                            # for H_i in HumanNames:
                            #     if HumanNames[best_class_indices[0]] == H_i:
                            result_names = class_names[best_class_indices[0]] if best_class_probabilities[0] > 0.45 else "Unknown"
                            #print(result_names)
                            cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                        1, (0, 0, 255), thickness=1, lineType=2)
                    else:
                        print('Unable to align')

                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                str = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame, str, (text_fps_x, text_fps_y),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
                # c+=1
                cv2.imshow('Video', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            # #video writer
            # out.release()
            cv2.destroyAllWindows()
예제 #6
0
def main(argv=None):
    align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor))
    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
    output_dir = os.path.expanduser(FLAGS.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    store_revision_info(src_path, output_dir, ' '.join(argv))
    dataset = facenet.get_dataset(FLAGS.input_dir)
    # Scale the image such that the face fills the frame when cropped to crop_size
    scale = float(FLAGS.face_size) / FLAGS.image_size
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        for image_path in cls.image_paths:
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    if FLAGS.use_new_alignment:
                        aligned = align.align_new(FLAGS.image_size, img, landmarkIndices=landmarkIndices, 
                                              skipMulti=True, scale=scale)
                    else:
                        aligned = align.align(FLAGS.image_size, img, landmarkIndices=landmarkIndices, 
                                              skipMulti=True, scale=scale)
                    if aligned is not None:
                        print(image_path)
                        misc.imsave(output_filename, aligned)
                    elif FLAGS.prealigned_path:
                        # Face detection failed. Use center crop from pre-aligned dataset
                        class_name = os.path.split(output_class_dir)[1]
                        image_path_without_ext = os.path.join(os.path.expanduser(FLAGS.prealigned_path), 
                                                              class_name, filename)
                        # Find the extension of the image
                        exts = ('jpg', 'png', 'gif')
                        for ext in exts:
                            temp_path = image_path_without_ext + '.' + ext
                            image_path = ''
                            if os.path.exists(temp_path):
                                image_path = temp_path
                                break
                        try:
                            img = misc.imread(image_path)
                        except (IOError, ValueError, IndexError) as e:
                            errorMessage = '{}: {}'.format(image_path, e)
                            print(errorMessage)
                        else:
                            scaled = misc.imresize(img, FLAGS.prealigned_scale, interp='bilinear')
                            sz1 = scaled.shape[1]/2
                            sz2 = FLAGS.image_size/2
                            cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
                            print(image_path)
                            misc.imsave(output_filename, cropped)
예제 #7
0
    def collect_data(self):
        output_dir = os.path.expanduser(self.output_datadir)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        dataset = facenet.get_dataset(self.input_datadir)
        with tf.Graph().as_default():
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
            sess = tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options, log_device_placement=False))
            with sess.as_default():
                pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy')

        minsize = 20  # minimum size of face
        threshold = [0.6, 0.7, 0.7]  # three steps's threshold
        factor = 0.709  # scale factor
        margin = 44
        image_size = 182

        # Add a random key to the filename to allow alignment using multiple processes
        random_key = np.random.randint(0, high=99999)
        bounding_boxes_filename = os.path.join(
            output_dir, 'bounding_boxes_%05d.txt' % random_key)

        with open(bounding_boxes_filename, "w") as text_file:
            nrof_images_total = 0
            nrof_successfully_aligned = 0
            for cls in dataset:
                output_class_dir = os.path.join(output_dir, cls.name)
                if not os.path.exists(output_class_dir):
                    os.makedirs(output_class_dir)
                for image_path in cls.image_paths:
                    nrof_images_total += 1
                    filename = os.path.splitext(
                        os.path.split(image_path)[1])[0]
                    output_filename = os.path.join(output_class_dir,
                                                   filename + '.png')
                    print("Image: %s" % image_path)
                    if not os.path.exists(output_filename):
                        try:
                            img = misc.imread(image_path)
                        except (IOError, ValueError, IndexError) as e:
                            errorMessage = '{}: {}'.format(image_path, e)
                            print(errorMessage)
                        else:
                            if img.ndim < 2:
                                print('Unable to align "%s"' % image_path)
                                text_file.write('%s\n' % (output_filename))
                                continue
                            if img.ndim == 2:
                                img = facenet.to_rgb(img)
                                print('to_rgb data dimension: ', img.ndim)
                            img = img[:, :, 0:3]

                            bounding_boxes, _ = detect_face.detect_face(
                                img, minsize, pnet, rnet, onet, threshold,
                                factor)
                            nrof_faces = bounding_boxes.shape[0]
                            print('No of Detected Face: %d' % nrof_faces)
                            if nrof_faces > 0:
                                det = bounding_boxes[:, 0:4]
                                img_size = np.asarray(img.shape)[0:2]
                                if nrof_faces > 1:
                                    bounding_box_size = (
                                        det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                                  det[:, 1])
                                    img_center = img_size / 2
                                    offsets = np.vstack([
                                        (det[:, 0] + det[:, 2]) / 2 -
                                        img_center[1],
                                        (det[:, 1] + det[:, 3]) / 2 -
                                        img_center[0]
                                    ])
                                    offset_dist_squared = np.sum(
                                        np.power(offsets, 2.0), 0)
                                    index = np.argmax(
                                        bounding_box_size -
                                        offset_dist_squared * 2.0
                                    )  # some extra weight on the centering
                                    det = det[index, :]
                                det = np.squeeze(det)
                                bb_temp = np.zeros(4, dtype=np.int32)

                                bb_temp[0] = det[0]
                                bb_temp[1] = det[1]
                                bb_temp[2] = det[2]
                                bb_temp[3] = det[3]

                                cropped_temp = img[bb_temp[1]:bb_temp[3],
                                                   bb_temp[0]:bb_temp[2], :]
                                scaled_temp = misc.imresize(
                                    cropped_temp, (image_size, image_size),
                                    interp='bilinear')

                                nrof_successfully_aligned += 1
                                misc.imsave(output_filename, scaled_temp)
                                text_file.write(
                                    '%s %d %d %d %d\n' %
                                    (output_filename, bb_temp[0], bb_temp[1],
                                     bb_temp[2], bb_temp[3]))
                            else:
                                print('Unable to align "%s"' % image_path)
                                text_file.write('%s\n' % (output_filename))

        return (nrof_images_total, nrof_successfully_aligned)
예제 #8
0
def main(args):

    # count = 1

    gpuid = 0
    detector = RetinaFace(
        '/home/tmt/Documents/insightface/RetinaFace/model/retinaface-R50/', 0,
        gpuid, 'net3')

    sleep(random.random())
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)

    print('Creating networks and loading parameters')

    # with tf.Graph().as_default():
    #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
    #     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
    #     with sess.as_default():
    #         pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(
        output_dir, 'bounding_boxes_%05d.txt' % random_key)

    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)
        for cls in tqdm(dataset):
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            for image_path in cls.image_paths:
                thresh = 0.8
                scales = [1024, 1980]
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir,
                                               filename + '.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        # img = misc.imread(image_path)
                        img = cv2.imread(image_path)
                        print('image shape', img.shape)
                        im_shape = img.shape
                        target_size = scales[0]
                        max_size = scales[1]
                        im_size_min = np.min(im_shape[0:2])
                        im_size_max = np.max(im_shape[0:2])
                        #im_scale = 1.0
                        # if im_size_min>target_size or im_size_max>max_size:
                        im_scale = float(target_size) / float(im_size_min)
                        # prevent bigger axis from being more than max_size:
                        if np.round(im_scale * im_size_max) > max_size:
                            im_scale = float(max_size) / float(im_size_max)

                        print('im_scale', im_scale)

                        scales = [im_scale]
                        flip = False

                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim < 2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = facenet.to_rgb(img)
                        img = img[:, :, 0:3]
                        bounding_boxes, landmarks = detector.detect(
                            img, thresh, scales=scales, do_flip=flip)

                        # bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)

                        print('-------------bounding----------------',
                              bounding_boxes)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces > 0:
                            det = bounding_boxes[:, 0:4]
                            det_arr = []
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces > 1:
                                if args.detect_multiple_faces:
                                    for i in range(nrof_faces):
                                        det_arr.append(np.squeeze(det[i]))
                                else:
                                    bounding_box_size = (
                                        det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                                  det[:, 1])
                                    img_center = img_size / 2
                                    offsets = np.vstack([
                                        (det[:, 0] + det[:, 2]) / 2 -
                                        img_center[1],
                                        (det[:, 1] + det[:, 3]) / 2 -
                                        img_center[0]
                                    ])
                                    offset_dist_squared = np.sum(
                                        np.power(offsets, 2.0), 0)
                                    # some extra weight on the centering
                                    index = np.argmax(bounding_box_size -
                                                      offset_dist_squared *
                                                      2.0)
                                    det_arr.append(det[index, :])
                            else:
                                det_arr.append(np.squeeze(det))

                            for i, det in enumerate(det_arr):
                                det = np.squeeze(det)
                                bb = np.zeros(4, dtype=np.int32)
                                bb[0] = np.maximum(det[0] - args.margin / 2, 0)
                                bb[1] = np.maximum(det[1] - args.margin / 2, 0)
                                bb[2] = np.minimum(det[2] + args.margin / 2,
                                                   img_size[1])
                                bb[3] = np.minimum(det[3] + args.margin / 2,
                                                   img_size[0])
                                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                                scaled = misc.imresize(
                                    cropped,
                                    (args.image_size, args.image_size),
                                    interp='bilinear')
                                nrof_successfully_aligned += 1
                                filename_base, file_extension = os.path.splitext(
                                    output_filename)
                                if args.detect_multiple_faces:
                                    output_filename_n = "{}_{}{}".format(
                                        filename_base, i, file_extension)
                                else:
                                    output_filename_n = "{}{}".format(
                                        filename_base, file_extension)
                                misc.imsave(output_filename_n, scaled)
                                text_file.write('%s %d %d %d %d\n' %
                                                (output_filename_n, bb[0],
                                                 bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
예제 #9
0
def align_frames(input_dir,
                 output_dir,
                 image_size=182,
                 margin=44,
                 gpu_memory_fraction=1.0):
    sleep(random.random())
    output_dir = os.path.expanduser(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    image_paths = facenet.get_image_paths(input_dir)

    tf.logging.set_verbosity(tf.logging.ERROR)
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face

    # first step shallow cnn to detect face windows
    # second step deep cnn to throw out non face windows
    # third step detect face landmarks
    # trying to raise second step threshold
    threshold = [0.6, 0.7, 0.7
                 ]  # threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold

    factor = 0.709  # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(
        output_dir, 'bounding_boxes_%05d.txt' % random_key)
    bar = progressbar.ProgressBar(maxval=len(image_paths),
                                  widgets=[
                                      progressbar.Bar('=', '[', ']'), ' ',
                                      progressbar.Percentage()
                                  ])
    bar.start()
    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        for image_path in image_paths:
            nrof_images_total += 1
            bar.update(nrof_images_total)
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename_prefix = os.path.join(output_dir, filename)
            # print(image_path)
            if not os.path.exists(output_filename_prefix):
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim < 2:
                        # print('Unable to align "%s"' % image_path)
                        text_file.write('%s\n' % (output_filename_prefix))
                        continue
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    img = img[:, :, 0:3]

                    bounding_boxes, _ = detect_face.detect_face(
                        img, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    if nrof_faces > 0:
                        for i in range(nrof_faces):  # NEW
                            det = bounding_boxes[i, 0:4]
                            img_size = np.asarray(img.shape)[0:2]
                            '''if nrof_faces>1:
                                bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
                                img_center = img_size / 2
                                offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
                                offset_dist_squared = np.sum(np.power(offsets,2.0),0)
                                index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
                                det = det[index,:]'''
                            det = np.squeeze(det)
                            bb = np.zeros(4, dtype=np.int32)
                            bb[0] = np.maximum(det[0] - margin / 2, 0)
                            bb[1] = np.maximum(det[1] - margin / 2, 0)
                            bb[2] = np.minimum(det[2] + margin / 2,
                                               img_size[1])
                            bb[3] = np.minimum(det[3] + margin / 2,
                                               img_size[0])
                            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                            scaled = misc.imresize(cropped,
                                                   (image_size, image_size),
                                                   interp='bilinear')
                            nrof_successfully_aligned += 1
                            output_filename = output_filename_prefix + '_' + str(
                                i) + '.png'
                            misc.imsave(output_filename, scaled)
                            text_file.write(
                                '%s %d %d %d %d\n' %
                                (output_filename, bb[0], bb[1], bb[2], bb[3]))
                    else:
                        # print('Unable to align "%s"' % image_path)
                        text_file.write('%s\n' % (output_filename_prefix))
    bar.finish()
예제 #10
0
def main(args):
    if len(args) < 2:
        print("Usage: " + args[0] + " <image>")
        return

    img_path = args[1]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:

            np.random.seed(666)

            # 0. Read image
            try:
                img = misc.imread(img_path)
            except (IOError, ValueError, IndexError) as e:
                errorMessage = '{}: {}'.format(img_path, e)
                print(errorMessage)
                return

            if img.ndim < 2:
                print('Unable to align "%s"' % image_path)
                return
            elif img.ndim == 2:
                img = facenet.to_rgb(img)
            elif len(img.shape) > 2 and img.shape[2] == 4:
                img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
            [h, w] = np.asarray(img.shape)[0:2]

            # 1. Detect Face
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

            minsize = 20  # minimum size of face
            threshold = [0.8, 0.85, 0.85]  # three steps's threshold
            factor = 0.709  # scale factor

            bounding_boxes, _ = align.detect_face.detect_face(
                img, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            faces = []
            boxes = []
            if nrof_faces > 0:
                for i in range(nrof_faces):
                    det = np.squeeze(bounding_boxes[i, 0:4])

                    #y0 = int(det[1] * h)
                    #y1 = int(det[3] * h)
                    #x0 = int(det[0] * w)
                    #x1 = int(det[2] * w)
                    x0 = max(int(det[0]) - 20, 0)
                    x1 = min(int(det[2]) + 20, w - 1)
                    y0 = max(int(det[1]) - 20, 0)
                    y1 = min(int(det[3]) + 20, h - 1)

                    [x0, y0, x1, y1] = get_square_box(x0, y0, x1, y1, w, h)
                    print(
                        str(x0) + " " + str(y0) + " " + str(x1) + " " +
                        str(y1))
                    cropped = img[y0:y1, x0:x1, :]
                    scaled = misc.imresize(cropped, (160, 160),
                                           interp='bilinear')
                    prew = facenet.prewhiten(scaled)
                    faces.append(prew)
                    boxes.append([x0, y0, x1, y1])
                    misc.imsave("roi" + str(i) + ".png", prew)

            # 2. Recognize Face

            # Load the model
            print('Loading feature extraction model')
            facenet.load_model(
                '../models/facenet/20170512-110547/20170512-110547.pb')
            # facenet.load_model('../models/facenet/20170511-185253/20170511-185253.pb')

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            emb_array = np.zeros((len(faces), embedding_size))

            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            feed_dict = {
                images_placeholder: faces,
                phase_train_placeholder: False
            }
            emb_array[:, :] = sess.run(embeddings, feed_dict=feed_dict)

            # Load embeddings from file and concatenate with computed embeddings
            # with open('../models/emb_array.bin', 'rb') as infile:
            #    emb_array_cls = pickle.load(infile)
            # print(emb_array_cls)
            # emb_arrys = np.concatenate((emb_array, emb_array_cls), axis=0)

            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            with open('../models/facenet/lfw_classifier-20170512-110547.pkl',
                      'rb') as infile:
                #            with open('../models/facenet/lfw_classifier-20170511-185253.pkl', 'rb') as infile:
                (model, class_names) = pickle.load(infile)

                predictions = model.predict_proba(emb_array)

                # Print all prediction in sorted order
                #                sorted_class_indices = np.argsort(predictions, axis=1)
                #                for i in range(len(predictions)):
                #                    for j in range(len(class_names)):
                #                        print('%.4f %s' % (predictions[i][sorted_class_indices[i][j]], class_names[sorted_class_indices[i][j]]))
                #                    print("----------")

                #                print(predictions)

                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]

                #predictions = model.predict(emb_array)
                #best_class_indices = predictions
                #best_class_probabilities = predictions

                for i in range(len(best_class_indices)):
                    print('%4d  %s: %.3f' %
                          (i, class_names[best_class_indices[i]],
                           best_class_probabilities[i]))
                    vis_util.draw_bounding_box_on_image_array(
                        img, boxes[i][1], boxes[i][0], boxes[i][3],
                        boxes[i][2], "red", 3, [
                            class_names[best_class_indices[i]],
                            "{:.3f}".format(best_class_probabilities[i])
                        ], False)

            cv2.imwrite("img.png", img)
예제 #11
0
def main(args):
    sleep(random.random())
    # 如果还没有输出文件夹,则创建
    # 设置对齐后的人脸图像存放的路径
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # 在日志目录的文本文件中存储一些Git修订信息
    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    # 在output_dir文件夹下创建revision_info.txt文件,里面存的是执行该命令时的参数信息
    # 当前使用的tensorflow版本,git hash,git diff
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    # 获取数据集下所有人名和其人名目录下是所有图片,
    # 放到ImageClass类中,再将类存到dataset列表里

    dataset = facenet.get_dataset(args.input_dir)
    
    print('Creating networks and loading parameters')
    '''2、建立MTCNN网络,并预训练(即使用训练好的网络初始化参数)'''
    with tf.Graph().as_default():
        # 设置Session的GPU参数,每条线程分配多少显存
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            # 获取P-Net,R-Net,O-Net网络
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
    
    minsize = 20 # minimum size of face最小尺寸
    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold阈值
    factor = 0.709 # scale factor 比例因子

    # Add a random key to the filename to allow alignment using multiple processes

    # 获取一个随机数,用于创建下面的文件名
    random_key = np.random.randint(0, high=99999)
    # 将图片和求得的相应的Bbox保存到bounding_boxes_XXXXX.txt文件里
    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
    '''3、每个图片中人脸所在的边界框写入记录文件中'''
    with open(bounding_boxes_filename, "w") as text_file:
        # 处理图片的总数量
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        # 是否对所有图片进行洗牌
        if args.random_order:
            random.shuffle(dataset)
        # 获取每一个人,以及对应的所有图片的绝对路径
        for cls in dataset:
            # 每一个人对应的输出文件夹
            output_class_dir = os.path.join(output_dir, cls.name)
            # 如果目的文件夹里还没有相应的人名的文件夹,则创建相应文件夹
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            #遍历每一张图片
            for image_path in cls.image_paths:
                nrof_images_total += 1
                # 对齐后的图片文件名
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir, filename+'.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        # 读取图片文件
                        img = misc.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim<2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = facenet.to_rgb(img)
                        img = img[:,:,0:3]
                        # img = misc.imresize(img,0.8)
                        #plt.imshow(img)
                        #plt.show()
                        # 检测人脸,bounding_boxes可能包含多张人脸框数据,
                        # 一张人脸框有5个数据,第一和第二个数据表示框左上角坐标,第三个第四个数据表示框右下角坐标,
                        # 最后一个数据应该是可信度

                        # 人脸检测 bounding_boxes:表示边界框 形状为[n,5] 5对应x1,y1,x2,y2,score
                        # _:人脸关键点坐标 形状为 [n,10]
                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                        # ###################仿射变换###########################
                        rows,cols,hn = img.shape
                        _new = np.transpose(_)  # (10,2)->(2,10)
                        for i in range(len(_new)):
                            # print("左眼的位置(%s,%s)" %(_new[i,0],_new[i,5]))
                            # print("右眼的位置(%s,%s)" %(_new[i,1],_new[i,6]))
                            eye_center_x = (_new[i, 0] + _new[i, 1]) * 0.5
                            eye_center_y = (_new[i, 5] + _new[i, 6]) * 0.5
                            dy = _new[i, 5] - _new[i, 6]
                            dx = _new[i, 0] - _new[i, 1]
                            angle = math.atan2(dy, dx) * 180.0 / math.pi + 180.0
                            #print("旋转角度为%s" % angle)
                            M = cv2.getRotationMatrix2D((eye_center_x, eye_center_y), angle, 1)
                            dst = cv2.warpAffine(img, M, (cols, rows))
                        ####################################################
                        bounding_boxes, _ = align.detect_face.detect_face(dst, minsize,
                                                                          pnet, rnet, onet,
                                                                          threshold, factor)


                        # 获得的人脸数量(#边界框个数)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces>0:
                            # [n,4] 人脸框
                            det = bounding_boxes[:,0:4]
                            # 保存所有人脸框
                            det_arr = []
                            # 原图片大小
                            img_size = np.asarray(dst.shape)[0:2]
                            #img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces>1:
                                # 一张图片中检测多个人脸
                                if args.detect_multiple_faces:
                                    # 如果要检测多张人脸的话
                                    for i in range(nrof_faces):
                                        det_arr.append(np.squeeze(det[i]))
                                else:
                                    # 即使有多张人脸,也只要一张人脸就够了
                                    # 获取人脸框的大小
                                    bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
                                    # 原图片中心坐标
                                    img_center = img_size / 2
                                    # 求人脸框中心点相对于图片中心点的偏移,
                                    # (det[:,0]+det[:,2])/2和(det[:,1]+det[:,3])/2组成的坐标其实就是人脸框中心点
                                    offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
                                    # 求人脸框中心到图片中心偏移的平方和
                                    # 假设offsets=[[   4.20016056  145.02849352 -134.53862838] [ -22.14250919  -26.74770141  -30.76835772]]
                                    # 则offset_dist_squared=[  507.93206189 21748.70346425 19047.33436466]
                                    offset_dist_squared = np.sum(np.power(offsets,2.0),0)
                                    # 用人脸框像素大小减去偏移平方和的两倍,得到的结果哪个大就选哪个人脸框
                                    # 其实就是综合考虑了人脸框的位置和大小,优先选择框大,又靠近图片中心的人脸框
                                    index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
                                    det_arr.append(det[index,:])
                            else:
                                # 只有一个人脸框的话,那就没得选了
                                det_arr.append(np.squeeze(det))
                            # 遍历每一个人脸框
                            for i, det in enumerate(det_arr):
                                # [4,]  边界框扩大margin区域,并进行裁切
                                det = np.squeeze(det)
                                bb = np.zeros(4, dtype=np.int32)
                                # 边界框周围的裁剪边缘,就是我们这里要裁剪的人脸框要比MTCNN获取的人脸框大一点,
                                # 至于大多少,就由margin参数决定了
                                bb[0] = np.maximum(det[0]-args.margin/2, 0)
                                bb[1] = np.maximum(det[1]-args.margin/2, 0)
                                bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
                                bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
                                # 裁剪人脸框,再缩放
                                cropped = dst[bb[1]:bb[3],bb[0]:bb[2],:]
                                #cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                                # 缩放到指定大小,并保存图片,以及边界框位置信息
                                scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
                                nrof_successfully_aligned += 1
                                filename_base, file_extension = os.path.splitext(output_filename)#分离文件名和扩展名
                                if args.detect_multiple_faces:
                                    output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
                                else:
                                    output_filename_n = "{}{}".format(filename_base, file_extension)
                                # 保存图片
                                misc.imsave(output_filename_n, scaled)
                                # 记录信息到bounding_boxes_XXXXX.txt文件里
                                text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            
    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
예제 #12
0
def main(args):
    #Load MTCNN model for detecting and aligning Faces in the Captured Photos
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    nrof_successfully_aligned = 0

    # Save faces files locally just to varify. You may want to remove this once your system is set up.
    output_filename = 'd:\PhotoCaptured.png'

    with tf.Graph().as_default():
        with tf.Session() as sess:
            # args.seed defaulted to 666
            np.random.seed(seed=666)

        # Load the model once
        print('Loading feature extraction model')

        # Use your path where you have saved pretrained facenet model
        facenet.load_model('./models/20170512-110547.pb')

        # Get input and output tensors
        images_placeholder = tf.get_default_graph().get_tensor_by_name(
            "input:0")
        embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(
            "phase_train:0")
        embedding_size = embeddings.get_shape()[1]

        # your custom classifier trained the last layer with your own image database. Please refer to Facenet repo for training custom classifier
        classifier_filename_exp = os.path.expanduser(
            './models/my_classifier.pkl')

        # Classify images
        print('Testing classifier')
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        print('Loaded classifier model from file "%s"' %
              classifier_filename_exp)

    #Start Video Capture
    video_capture = cv2.VideoCapture(0)

    #All the pre-loading is done. Now loop through capturing photos and recognizing faces in the frames
    while True:

        try:
            ret, frame = video_capture.read()
            img = frame

        except (IOError, ValueError, IndexError) as e:
            print("Error")
        else:
            if img.ndim < 2:
                print('Unable to align "%s"' % image_path)
            if img.ndim == 2:
                img = facenet.to_rgb(img)
            img = img[:, :, 0:3]

            bounding_boxes, box_cord = align.detect_face.detect_face(
                img, minsize, pnet, rnet, onet, threshold, factor)

            nrof_faces = bounding_boxes.shape[0]
            #Define npArray of 3x2 and assign scaled to it. XXXXXXXXXXXXXXxx
            #face_array = np.array(160,160,3)
            face_list = []
            print('Number of faces ******* %s', nrof_faces)
            #for rectangle in range(0,nrof_faces):
            #cv2.rectangle(img,box_cord[rectangle],(0,255,0),5)
            print('Type of Box Cord ******* %s', type(box_cord))
            print('shape of Box Cord ******* %s', box_cord.shape)
            # Display the resulting frame
            cv2.imshow('Video', img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                det_arr = []
                img_size = np.asarray(img.shape)[0:2]
                if nrof_faces > 1:
                    #if args.detect_multiple_faces:
                    for i in range(nrof_faces):
                        det_arr.append(np.squeeze(det[i]))
                else:
                    det_arr.append(np.squeeze(det))

                for i, det in enumerate(det_arr):
                    det = np.squeeze(det)
                    bb = np.zeros(4, dtype=np.int32)
                    # Hardcoding
                    # args.margin = 32 image_size 160
                    bb[0] = np.maximum(det[0] - 32 / 2, 0)
                    bb[1] = np.maximum(det[1] - 32 / 2, 0)
                    bb[2] = np.minimum(det[2] + 32 / 2, img_size[1])
                    bb[3] = np.minimum(det[3] + 32 / 2, img_size[0])
                    cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                    scaled = misc.imresize(cropped, (160, 160),
                                           interp='bilinear')
                    nrof_successfully_aligned += 1
                    filename_base, file_extension = os.path.splitext(
                        output_filename)

                    #if args.detect_multiple_faces: #Try keeping it in nparray insted of writing
                    output_filename_n = "{}_{}{}".format(
                        filename_base, i, file_extension)
                    #else:
                    #output_filename_n = "{}{}".format(filename_base, file_extension)
                    misc.imsave(output_filename_n, scaled)
                    print('type of scaled************', type(scaled))
                    #Appending each face to face_array
                    face_list.append(scaled)
            else:
                print('No Image or - Unable to align')
                continue

            #Invoke Classifier Code

            # Run forward pass to calculate embeddings
            print('Calculating features for images')

            nrof_images = nrof_faces
            nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / 1000))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                #start_index = i * args.batch_size - Hardcoded Batch Size
                start_index = i * 1000
                #end_index = min((i + 1) * args.batch_size, nrof_images)
                end_index = min((i + 1) * 1000, nrof_images)

                images = Face_load_data(face_list, False, False, 160)

                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)

            predictions = model.predict_proba(emb_array)
            best_class_indices = np.argmax(predictions, axis=1)
            best_class_probabilities = predictions[
                np.arange(len(best_class_indices)), best_class_indices]

            #Print Face recognization result for each Face in the Frame
            for i in range(len(best_class_indices)):
                print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]],
                                         best_class_probabilities[i]))

    video_capture.release()
예제 #13
0
            print(class_names)

        print('Loaded classifier model from file "%s"' %
              classifier_filename_exp)

        video_capture = cv2.VideoCapture(0)
        capture_interval = 5
        capture_count = 0
        frame_count = 0

        while True:
            ret, frame = video_capture.read()
            # if(capture_count % capture_interval == 0):
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if gray.ndim == 2:
                gray = facenet.to_rgb(gray)

                bounding_boxes, points = detect_face.detect_face(
                    gray, minsize, pnet, rnet, onet, threshold, factor)
                nrof_faces = bounding_boxes.shape[0]

            for face_position in bounding_boxes:
                face_position = face_position.astype(int)

                cropped = gray[face_position[1]:face_position[3],
                               face_position[0]:face_position[2], :]

                if cropped.shape[0] == 0 or cropped.shape[1] == 0:
                    continue

                scaled = cv2.resize(cropped, (image_size, image_size),
예제 #14
0
def main(args):
    sleep(random.random())
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    dataset = facenet.get_dataset(args.input_dir)

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    nrof_images_total = 0
    nrof_successfully_aligned = 0
    if args.random_order:
        random.shuffle(dataset)
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
            if args.random_order:
                random.shuffle(cls.image_paths)
        for image_path in cls.image_paths:
            nrof_images_total += 1
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename + '.png')
            print(image_path)
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim < 2:
                        print('Unable to align "%s"' % image_path)
                        continue
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    img = img[:, :, 0:3]

                    bounding_boxes, landmarks = detect_face.detect_face(
                        img, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        landmark = landmarks
                        img_size = np.asarray(img.shape)[0:2]
                        if nrof_faces > 1:
                            bounding_box_size = (det[:, 2] - det[:, 0]) * (
                                det[:, 3] - det[:, 1])
                            img_center = img_size / 2
                            offsets = np.vstack([
                                (det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                (det[:, 1] + det[:, 3]) / 2 - img_center[0]
                            ])
                            offset_dist_squared = np.sum(
                                np.power(offsets, 2.0), 0)
                            index = np.argmax(
                                bounding_box_size - offset_dist_squared *
                                2.0)  # some extra weight on the centering
                            det = det[index, :]
                            landmark = landmark[:, index]
                        det = np.squeeze(det)
                        landmark = np.squeeze(landmark)

                        if args.align_face_image == 'off':
                            bb = np.zeros(4, dtype=np.int32)
                            bb[0] = np.maximum(det[0] - args.margin / 2, 0)
                            bb[1] = np.maximum(det[1] - args.margin / 2, 0)
                            bb[2] = np.minimum(det[2] + args.margin / 2,
                                               img_size[1])
                            bb[3] = np.minimum(det[3] + args.margin / 2,
                                               img_size[0])
                            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                            scaled = misc.imresize(
                                cropped, (args.image_size, args.image_size),
                                interp='bilinear')
                            nrof_successfully_aligned += 1
                            misc.imsave(output_filename, scaled)
                        else:
                            cv_img = cv2.imread(image_path)
                            cv_img = face_alignment(cv_img, args.image_size,
                                                    landmark)
                            cv2.imwrite(output_filename, cv_img)
                            nrof_successfully_aligned += 1

                        if args.landmark_image == 'on':
                            cv_img_landmark = cv2.imread(image_path)
                            # TODO : Write marked image with landmark and bounding box
                    else:
                        print('Unable to align "%s"' % image_path)

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
            output_filename = os.path.join(output_class_dir, filename + '.png')
            print(image_path)
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                    print('read data dimension: ', img.ndim)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim < 2:
                        print('Unable to align "%s"' % image_path)
                        text_file.write('%s\n' % (output_filename))
                        continue
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                        print('to_rgb data dimension: ', img.ndim)
                    img = img[:, :, 0:3]
                    print('after data dimension: ', img.ndim)

                    bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    print('detected_face: %d' % nrof_faces)
                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(img.shape)[0:2]
                        if nrof_faces > 1:
                            bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
                            img_center = img_size / 2
                            offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                                 (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
예제 #16
0
def detect_faces(args):
    """
    :param img_path: input image for face recognition
    :return: the bounding box and the cropped faces
    """

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    nrof_successfully_aligned = 0

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    try:
        img = misc.imread(args.img_dir[0])
    except (IOError, ValueError, IndexError) as e:
        errorMessage = '{}: {}'.format(args.img_dir, e)
        print(errorMessage)
    else:
        if img.ndim < 2:
            print('Unable to align "%s"' % args.img_dir)
        if img.ndim == 2:
            img = facenet.to_rgb(img)
        img = img[:, :, 0:3]

    bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet,
                                                      onet, threshold, factor)
    nrof_faces = bounding_boxes.shape[0]
    print('number of faces is {}'.format(nrof_faces))
    if nrof_faces > 0:
        det = bounding_boxes[:, 0:4]
        det_arr = []
        img_size = np.asarray(img.shape)[0:2]
        if nrof_faces > 1:
            if args.detect_multiple_faces:
                for i in range(nrof_faces):
                    det_arr.append(np.squeeze(det[i]))
                    # print('type of det_arr {} and {}'.format(type(det_arr), det_arr))
            else:
                bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                               det[:, 1])
                img_center = img_size / 2
                offsets = np.vstack([
                    (det[:, 0] + det[:, 2]) / 2 - img_center[1],
                    (det[:, 1] + det[:, 3]) / 2 - img_center[0]
                ])
                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                index = np.argmax(bounding_box_size - offset_dist_squared *
                                  2.0)  # some extra weight on the centering
                det_arr.append(det[index, :])
        else:
            det_arr.append(np.squeeze(det))
        # print('det shape is {}'.format(np.shape(det_arr)))

        #####  save cropped faces in images
        scaled = []
        for i, det in enumerate(det_arr):
            det = np.squeeze(det)
            bb = np.zeros(4, dtype=np.int32)
            bb[0] = np.maximum(det[0] - args.margin / 2, 0)
            bb[1] = np.maximum(det[1] - args.margin / 2, 0)
            bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1])
            bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0])
            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
            crop_img = misc.imresize(cropped,
                                     (args.image_size, args.image_size),
                                     interp='bilinear')
            # print(type(crop_img))
            prewhitened = facenet.prewhiten(crop_img)
            # print(np.shape(crop_img))
            scaled.append(prewhitened)
            nrof_successfully_aligned += 1
            # print('size of scaled faces is {}'.format(np.shape(scaled)))
    else:
        print('Unable to align "%s"' % args.img_dir)
    return img, det_arr, scaled
예제 #17
0
def main(args):
    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
            facenet.load_model(args.model_trained)

            # Get input and output tensors
            images_placeholder = \
                tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = \
                tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = \
                tf.get_default_graph().get_tensor_by_name("phase_train:0")
            # (?, 128)
            print(">>> Embedding size: ", embeddings.get_shape())

            labels, class_names, embed_arrays = joblib.load(
                args.model_filename)
            # Classify images
            model = joblib.load(args.classifier_filename)

            # =================================================================
            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor

            # Get a reference to webcam #0 (the default one)
            video_capture = cv2.VideoCapture(0)
            while True:
                ret, img = video_capture.read()
                nrof_successfully_aligned = 0

                if img.ndim == 2:
                    img = facenet.to_rgb(img)
                img = img[:, :, 0:3]
                # print("Ndim: %d" % img.ndim)
                # print("Shape: %d" % img.shape)

                bounding_boxes, _ = align.detect_face.detect_face(
                    img, minsize, pnet, rnet, onet, threshold, factor)
                nrof_faces = bounding_boxes.shape[0]
                if nrof_faces > 0:
                    det = bounding_boxes[:, 0:4]
                    det_arr = []
                    img_size = np.asarray(img.shape)[0:2]
                    if nrof_faces > 1:
                        if args.detect_multiple_faces:
                            for i in range(nrof_faces):
                                det_arr.append(np.squeeze(det[i]))
                        else:
                            bounding_box_size = (det[:, 2] - det[:, 0]) * \
                                                (det[:, 3] - det[:, 1])
                            img_center = img_size / 2
                            offsets = np.vstack([
                                (det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                (det[:, 1] + det[:, 3]) / 2 - img_center[0]
                            ])
                            offset_dist_squared = np.sum(
                                np.power(offsets, 2.0), 0)
                            # some extra weight on the centering
                            index = np.argmax(bounding_box_size -
                                              offset_dist_squared * 2.0)
                            det_arr.append(det[index, :])
                    else:
                        det_arr.append(np.squeeze(det))

                    for i, det in enumerate(det_arr):
                        det = np.squeeze(det)
                        bb = np.zeros(4, dtype=np.int32)
                        bb[0] = np.maximum(det[0] - args.margin / 2, 0)
                        bb[1] = np.maximum(det[1] - args.margin / 2, 0)
                        bb[2] = np.minimum(det[2] + args.margin / 2,
                                           img_size[1])
                        bb[3] = np.minimum(det[3] + args.margin / 2,
                                           img_size[0])

                        # CROP IMAGE
                        print(">>> Resize image")
                        img_croped = misc.imresize(
                            img[bb[1]:bb[3], bb[0]:bb[2], :],
                            (args.image_size, args.image_size),
                            interp='bilinear')
                        img_croped = facenet.load_test_web_data(
                            img_croped, False, False, args.image_size)
                        # RUN
                        print(">>> Feed dict")
                        feed_dict = {
                            images_placeholder: img_croped,  # ndarray
                            phase_train_placeholder: False
                        }
                        emb_array = sess.run(embeddings, feed_dict=feed_dict)
                        # print(emb_array)

                        nrof_successfully_aligned += 1

                        # left, top, right, bottom
                        print(bb[0], bb[1], bb[2], bb[3])
                        cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]),
                                      (0, 0, 255), 2)
                        # Draw a label with a name below the face
                        cv2.rectangle(img, (bb[0], bb[3] - 35), (bb[2], bb[1]),
                                      (0, 0, 255))

                        print("\t>>> Embed shape: ", emb_array.shape)
                        distances, indexes = model.kneighbors(
                            emb_array.reshape(1, -1), return_distance=True)
                        print(emb_array.tolist())

                        # PREDICTION
                        predictions = model.predict(emb_array)
                        print("\t>>> Index (non threshold): ",
                              class_names[predictions[0]])
                        print("\t>>> Predictions (non threshold): ",
                              predictions[0])

                        checked = any(d < CONST_DIST for d in distances[0])
                        print("\t>>> Distance: ", distances[0])
                        font = cv2.FONT_HERSHEY_DUPLEX
                        if checked:
                            # max_dist = np.argmin(distances[0])
                            # class_name = \
                            #     class_names[labels[indexes[0][max_dist]]]
                            # cv2.putText(
                            #     img, "{} ".format(i) + class_name,
                            #     (bb[0] + 6, bb[3] - 6), font,
                            #     1.0, (255, 255, 255), 1
                            # )
                            cv2.putText(
                                img,
                                "{} ".format(i) + class_names[predictions[0]],
                                (bb[0] + 6, bb[3] - 6), font, 1.0,
                                (255, 255, 255), 1)
                            print("\t>>> Label: %s" %
                                  class_names[predictions[0]])
                        else:
                            cv2.putText(img, "{} ".format(i) + "---",
                                        (bb[0] + 6, bb[3] - 6), font, 1.0,
                                        (255, 255, 255), 1)
                            print("\t>>> Label: %s" % "Unknown")

                        # emb_array = np.array(emb_array).reshape(1, -1)
                        # print(">>> Prediction")

                        # predictions = model.predict(emb_array)
                        # print("Predictions: ", predictions[0])
                        # font = cv2.FONT_HERSHEY_DUPLEX
                        # cv2.putText(
                        #     img, class_names[predictions[0]],
                        #     (bb[0] + 6, bb[3] - 6), font,
                        #     1.0, (255, 255, 255), 1
                        # )
                        # print("Label: %s" % class_names[predictions[0]])

                        del img_croped, emb_array
                    del det, det_arr
                else:
                    print('Unable to align')

                cv2.imshow('Video', img)
                print("Show frame")

                # Hit 'q' on the keyboard to quit!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

    video_capture.release()
    cv2.destroyAllWindows()
예제 #18
0
        print('Start Recognition!')
        prevTime = 0
        while True:
            ret, frame = video_capture.read()

            frame = cv2.resize(frame, (0, 0), fx=0.3,
                               fy=0.3)  #resize frame (optional)

            curTime = time.time()  # calc fps
            timeF = frame_interval

            if (c % timeF == 0):
                find_results = []

                if frame.ndim == 2:
                    frame = facenet.to_rgb(frame)
                frame = frame[:, :, 0:3]
                #print(frame.shape[0])
                #print(frame.shape[1])

                # Use YOLO to get bounding boxes
                blob = cv2.dnn.blobFromImage(frame,
                                             1 / 255, (IMG_WIDTH, IMG_HEIGHT),
                                             [0, 0, 0],
                                             1,
                                             crop=False)

                # Sets the input to the network
                net.setInput(blob)

                # Runs the forward pass to get output of the output layers
예제 #19
0
def main(argv=None):
    align = align_dlib.AlignDlib(os.path.expanduser(FLAGS.dlib_face_predictor))
    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
    output_dir = os.path.expanduser(FLAGS.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(argv))
    dataset = facenet.get_dataset(FLAGS.input_dir)
    random.shuffle(dataset)
    # Scale the image such that the face fills the frame when cropped to crop_size
    scale = float(FLAGS.face_size) / FLAGS.image_size
    nrof_images_total = 0
    nrof_prealigned_images = 0
    nrof_successfully_aligned = 0
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        random.shuffle(cls.image_paths)
        for image_path in cls.image_paths:
            nrof_images_total += 1
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename + '.png')
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    if FLAGS.use_new_alignment:
                        aligned = align.align_new(
                            FLAGS.image_size,
                            img,
                            landmarkIndices=landmarkIndices,
                            skipMulti=False,
                            scale=scale)
                    else:
                        aligned = align.align(FLAGS.image_size,
                                              img,
                                              landmarkIndices=landmarkIndices,
                                              skipMulti=False,
                                              scale=scale)
                    if aligned is not None:
                        print(image_path)
                        nrof_successfully_aligned += 1
                        misc.imsave(output_filename, aligned)
                    elif FLAGS.prealigned_path:
                        # Face detection failed. Use center crop from pre-aligned dataset
                        class_name = os.path.split(output_class_dir)[1]
                        image_path_without_ext = os.path.join(
                            os.path.expanduser(FLAGS.prealigned_path),
                            class_name, filename)
                        # Find the extension of the image
                        exts = ('jpg', 'png')
                        for ext in exts:
                            temp_path = image_path_without_ext + '.' + ext
                            image_path = ''
                            if os.path.exists(temp_path):
                                image_path = temp_path
                                break
                        try:
                            img = misc.imread(image_path)
                        except (IOError, ValueError, IndexError) as e:
                            errorMessage = '{}: {}'.format(image_path, e)
                            print(errorMessage)
                        else:
                            scaled = misc.imresize(img,
                                                   FLAGS.prealigned_scale,
                                                   interp='bilinear')
                            sz1 = scaled.shape[1] / 2
                            sz2 = FLAGS.image_size / 2
                            cropped = scaled[(sz1 - sz2):(sz1 + sz2),
                                             (sz1 - sz2):(sz1 + sz2), :]
                            print(image_path)
                            nrof_prealigned_images += 1
                            misc.imsave(output_filename, cropped)
                    else:
                        print('Unable to align "%s"' % image_path)

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
    print('Number of pre-aligned images: %d' % nrof_prealigned_images)
예제 #20
0
def align_image(input_image,
                image_size=182,
                margin=44,
                gpu_memory_fraction=0.5):
    sleep(random.random())
    #print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    img = input_image
    if img.ndim < 2:
        print('Unable to align "%s"' % image_path)
        text_file.write('%s\n' % (output_filename))
        #continue
    if img.ndim == 2:
        img = facenet.to_rgb(img)
    img = img[:, :, 0:3]

    bounding_boxes, points = align.detect_face.detect_face(
        img, minsize, pnet, rnet, onet, threshold, factor)
    nrof_faces = bounding_boxes.shape[0]
    if nrof_faces > 0:
        det = bounding_boxes[:, 0:4]
        img_size = np.asarray(img.shape)[0:2]
        if nrof_faces > 1:
            bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                           det[:, 1])
            img_center = img_size / 2
            offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                 (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
            index = np.argmax(bounding_box_size - offset_dist_squared *
                              2.0)  # some extra weight on the centering
            det = det[index, :]
        det = np.squeeze(det)
        bb = np.zeros(4, dtype=np.int32)
        bb[0] = np.maximum(det[0] - margin / 2, 0)
        bb[1] = np.maximum(det[1] - margin / 2, 0)
        bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
        bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
        cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
        #bb2 = dlib.rectangle(left=int(bb[0]), top=int(bb[1]), right=int(bb[2]), bottom=int(bb[3]))
        temp_x = (int(bb[0]) + int(bb[2]) + 1)
        if temp_x < 0:
            temp_x -= 1
        temp_y = (int(bb[1]) + int(bb[3] + 1))
        if temp_y < 0:
            temp_y -= 1
        bb2_center_x = temp_x / 2
        scaled = (extract_image_chips.extract_image_chips(
            img, np.transpose(points), image_size, 0.37))[0]
        #scaled = misc.imresize(scaled, (image_size, image_size), interp='bilinear')
        return scaled, bb2_center_x
    else:
        print("no face detected..\n")
        return img
def main(args):
    sleep(random.random())

    # get the complete path of output directory
    output_dir = os.path.expanduser(args.output_dir)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # get the path of this program file
    src_path, _ = os.path.split(os.path.realpath(__file__))

    # Store some git revision info in a text file in the log directory
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))

    # get_dataset is a function which return a list of special objects
    # this kind of object has two element, one is the class name,
    # and the other is the path of all entry belonging to this class
    dataset = facenet.get_dataset(args.input_dir)
    # dataset = facenet.get_dataset_from_difference_sources(args.input_dir)

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(
        output_dir, 'bounding_boxes_%05d.txt' % random_key)

    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)  # classes shuffle
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)  # images shuffle
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir,
                                               filename + '.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                        # img = cv2.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        # make sure that all images are normal
                        # ----------------------------------------------
                        if img.ndim < 2:  # an normal image should has at least two dimension(width and high)
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:  # an image which has only one channel
                            img = facenet.to_rgb(img)
                        img = img[:, :, 0:3]
                        # ----------------------------------------------

                        # bounding_boxes: faces of all person in this image
                        # _: five landmarks of each person in this image
                        bounding_boxes, _ = align.detect_face.detect_face(
                            img, minsize, pnet, rnet, onet, threshold, factor)
                        nrof_faces = bounding_boxes.shape[0]

                        # at least one face
                        if nrof_faces > 0:
                            det = bounding_boxes[:, 0:4]
                            img_size = np.asarray(img.shape)[0:2]
                            # more than one faces in this image
                            #  -------------------------------------------------------------------
                            # if nrof_faces > 1:
                            #     bounding_box_size = (det[:, 2]-det[:, 0])*(det[:, 3]-det[:, 1])  # width * high
                            #     img_center = img_size / 2
                            #
                            #     # how far is each person(center of face) from the center of this image
                            #     offsets = np.vstack([(det[:, 0]+det[:, 2])/2-img_center[1], (det[:, 1]+det[:, 3])/2-img_center[0]])
                            #     offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                            #
                            #     # choose the most import person in this image
                            #     index = np.argmax(bounding_box_size-offset_dist_squared*2.0)  # some extra weight on the centering
                            #     det = det[index, :]
                            # -------------------------------------------------------------------
                            for det_no in range(nrof_faces):
                                each_det = np.squeeze(det[det_no])
                                bb = np.zeros(4, dtype=np.int32)
                                bb[0] = np.maximum(
                                    each_det[0] - args.margin / 2, 0)
                                bb[1] = np.maximum(
                                    each_det[1] - args.margin / 2, 0)
                                bb[2] = np.minimum(
                                    each_det[2] + args.margin / 2, img_size[1])
                                bb[3] = np.minimum(
                                    each_det[3] + args.margin / 2, img_size[0])
                                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                                scaled = misc.imresize(
                                    cropped,
                                    (args.image_size, args.image_size),
                                    interp='bilinear')

                                if nrof_faces > 1:
                                    output_filename = os.path.join(
                                        output_class_dir,
                                        filename + '_%d.png' % (det_no))
                                misc.imsave(output_filename, scaled)
                                text_file.write('%s %d %d %d %d\n' %
                                                (output_filename, bb[0], bb[1],
                                                 bb[2], bb[3]))
                            nrof_successfully_aligned += 1
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
예제 #22
0
            output_filename = os.path.join(output_class_dir, filename + '.png')
            print(image_path)
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                    print('read data dimension: ', img.ndim)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim < 2:
                        print('Unable to align "%s"' % image_path)
                        text_file.write('%s\n' % (output_filename))
                        continue
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                        print('to_rgb data dimension: ', img.ndim)
                    img = img[:, :, 0:3]
                    print('after data dimension: ', img.ndim)

                    bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    print('detected_face: %d' % nrof_faces)
                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(img.shape)[0:2]
                        if nrof_faces > 1:
                            bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
                            img_center = img_size / 2
                            offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                                 (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
예제 #23
0
def face_image(filename):
    img_path = filename
    modeldir = './models/20170511-185253.pb'
    classifier_filename = './class/classifier.pkl'
    npy = './npy'
    train_img = "./train_img"
    find_final_results = []  #final results
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            frame_interval = 3
            batch_size = 1000
            image_size = 182
            input_image_size = 160

            HumanNames = os.listdir(train_img)
            HumanNames.sort()

            print('Loading feature extraction model')
            facenet.load_model(modeldir)

            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)

            # video_capture = cv2.VideoCapture("akshay_mov.mp4")
            c = 0

            print('Start Recognition!')
            prevTime = 0
            # ret, frame = video_capture.read()
            frame = cv2.imread(img_path, 0)

            frame = cv2.resize(frame, (0, 0), fx=0.5,
                               fy=0.5)  #resize frame (optional)

            curTime = time.time() + 1  # calc fps
            timeF = frame_interval

            if (c % timeF == 0):
                find_results = []

                if frame.ndim == 2:
                    frame = facenet.to_rgb(frame)
                frame = frame[:, :, 0:3]
                bounding_boxes, _ = detect_face.detect_face(
                    frame, minsize, pnet, rnet, onet, threshold, factor)
                nrof_faces = bounding_boxes.shape[0]
                print('Face Detected: %d' % nrof_faces)

                if nrof_faces > 0:

                    det = bounding_boxes[:, 0:4]
                    img_size = np.asarray(frame.shape)[0:2]

                    cropped = []
                    scaled = []
                    scaled_reshape = []
                    bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                    for i in range(nrof_faces):
                        emb_array = np.zeros((1, embedding_size))

                        bb[i][0] = det[i][0]
                        bb[i][1] = det[i][1]
                        bb[i][2] = det[i][2]
                        bb[i][3] = det[i][3]

                        # inner exception
                        if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                                frame[0]) or bb[i][3] >= len(frame):
                            print('face is too close')
                            continue

                        if len(cropped) >= i:
                            cropped.append(frame[bb[i][1]:bb[i][3],
                                                 bb[i][0]:bb[i][2], :])
                            cropped[i] = facenet.flip(cropped[i], False)
                            scaled.append(
                                misc.imresize(cropped[i],
                                              (image_size, image_size),
                                              interp='bilinear'))
                            scaled[i] = cv2.resize(
                                scaled[i],
                                (input_image_size, input_image_size),
                                interpolation=cv2.INTER_CUBIC)
                            scaled[i] = facenet.prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(
                                -1, input_image_size, input_image_size, 3))
                            feed_dict = {
                                images_placeholder: scaled_reshape[i],
                                phase_train_placeholder: False
                            }
                            emb_array[0, :] = sess.run(embeddings,
                                                       feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)
                            print("predictions: ", predictions)
                            best_class_indices = np.argmax(predictions, axis=1)
                            #print(best_class_indices)
                            best_class_probabilities = predictions[
                                np.arange(len(best_class_indices)),
                                best_class_indices]
                            print(best_class_probabilities)
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                          (bb[i][2], bb[i][3]), (0, 255, 0),
                                          2)  #boxing face

                            #plot result idx under box
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20
                            #print(best_class_indices[0] > 0.80)
                            if predictions[0][best_class_indices[0]] > 0.80:
                                print('Result Indices: ',
                                      best_class_indices[0])
                                print(HumanNames)
                                for H_i in HumanNames:
                                    # print(H_i)
                                    if HumanNames[
                                            best_class_indices[0]] == H_i:
                                        result_names = HumanNames[
                                            best_class_indices[0]]
                                        if result_names not in find_final_results:
                                            find_final_results.append(
                                                result_names)
                                        cv2.putText(
                                            frame,
                                            result_names, (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            2, (0, 255, 0),
                                            thickness=2,
                                            lineType=2)
                            else:
                                print('Result Indices: ', -1)
                                print(HumanNames)
                                cv2.putText(frame,
                                            "Unkown", (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            2, (0, 255, 0),
                                            thickness=2,
                                            lineType=2)
                else:
                    print('Unable to align')
            frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
            cv2.imshow('Image', frame)

            if cv2.waitKey(1000000) & 0xFF == ord('q'):
                sys.exit("Thanks")
            return (find_final_results)
            cv2.destroyAllWindows()
def main(args):
    align = align_dlib.AlignDlib(os.path.expanduser(args.dlib_face_predictor))
    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)
    random.shuffle(dataset)
    # Scale the image such that the face fills the frame when cropped to crop_size
    scale = float(args.face_size) / args.image_size
    nrof_images_total = 0
    nrof_prealigned_images = 0
    nrof_successfully_aligned = 0
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        random.shuffle(cls.image_paths)
        for image_path in cls.image_paths:
            nrof_images_total += 1
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim == 2:
                        img = facenet.to_rgb(img)
                    if args.use_center_crop:
                        scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
                        sz1 = scaled.shape[1]/2
                        sz2 = args.image_size/2
                        aligned = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
                    else:
                        aligned = align.align(args.image_size, img, landmarkIndices=landmarkIndices, 
                                              skipMulti=False, scale=scale)
                    if aligned is not None:
                        print(image_path)
                        nrof_successfully_aligned += 1
                        misc.imsave(output_filename, aligned)
                    elif args.prealigned_dir:
                        # Face detection failed. Use center crop from pre-aligned dataset
                        class_name = os.path.split(output_class_dir)[1]
                        image_path_without_ext = os.path.join(os.path.expanduser(args.prealigned_dir), 
                                                              class_name, filename)
                        # Find the extension of the image
                        exts = ('jpg', 'png')
                        for ext in exts:
                            temp_path = image_path_without_ext + '.' + ext
                            image_path = ''
                            if os.path.exists(temp_path):
                                image_path = temp_path
                                break
                        try:
                            img = misc.imread(image_path)
                        except (IOError, ValueError, IndexError) as e:
                            errorMessage = '{}: {}'.format(image_path, e)
                            print(errorMessage)
                        else:
                            scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
                            sz1 = scaled.shape[1]/2
                            sz2 = args.image_size/2
                            cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
                            print(image_path)
                            nrof_prealigned_images += 1
                            misc.imsave(output_filename, cropped)
                    else:
                        print('Unable to align "%s"' % image_path)
                            
    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
    print('Number of pre-aligned images: %d' % nrof_prealigned_images)
예제 #25
0
def crop_my_baby(vid_image):
    ##    print(vid_image)
    sleep(random.random())

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20
    threshold = [0.6, 0.7, 0.7]
    factor = 0.709

    random_key = np.random.randint(0, high=99999)
    ##    bounding_boxes_filename = os.path.join(
    scaled = preprocess_image(vid_image)
    try:
        ##        img = misc.imread(vid_image)
        img = vid_image
    except (IOError, ValueError, IndexError) as e:
        errorMessage = '{}: {}'.format(vid_image, e)
        print(errorMessage)
    else:
        if img.ndim == 2:
            img = facenet.to_rgb(img)
        img = img[:, :, 0:3]
        ##        print(img)

        bounding_boxes, _ = align.detect_face.detect_face(
            img, minsize, pnet, rnet, onet, threshold, factor)

        nrof_faces = bounding_boxes.shape[0]
        #        print('nrof_faces')
        #        print(nrof_faces)

        if nrof_faces > 0:
            print('nrof_faces' + str(nrof_faces))
            det = bounding_boxes[:, 0:4]
            det_arr = []
            img_size = np.asarray(img.shape)[0:2]
            if nrof_faces > 1:
                for i in range(nrof_faces):
                    det_arr.append(np.squeeze(det[i]))
            else:
                det_arr.append(np.squeeze(det))

            for i, det in enumerate(det_arr):
                det = np.squeeze(det)
                bb = np.zeros(4, dtype=np.int32)
                bb[0] = np.maximum(det[0] - 22, 0)
                bb[1] = np.maximum(det[1] - 22, 0)
                bb[2] = np.minimum(det[2] + 22, img_size[1])
                bb[3] = np.minimum(det[3] + 22, img_size[0])
                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                scaled = misc.imresize(cropped, (160, 160), interp='bilinear')
                print('Success aligning')
                print('scaled_type' + str(type(scaled)) + 'scaled_dim')
                print(scaled.shape)
        else:
            print('scaled_type' + str(type(scaled)) + 'scaled_dim')
            print(scaled.shape)
    return scaled
        print('Start Recognition!')
        prevTime = 0
        while True:
            ret, frame = video_capture.read()

            # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

            curTime = time.time()    # calc fps
            timeF = frame_interval

            if (c % timeF == 0):
                find_results = []

                if frame.ndim == 2:
                    frame = facenet.to_rgb(frame)
                frame = frame[:, :, 0:3]
                #print(frame.shape[0])
                #print(frame.shape[1])

                # Use YOLO to get bounding boxes
                blob = cv2.dnn.blobFromImage(frame, 1 / 255, (IMG_WIDTH, IMG_HEIGHT), [0, 0, 0], 1, crop=False)

                # Sets the input to the network
                net.setInput(blob)

                # Runs the forward pass to get output of the output layers
                outs = net.forward(get_outputs_names(net))

                # Remove the bounding boxes with low confidence
                bounding_boxes = post_process(frame, outs, CONF_THRESHOLD, NMS_THRESHOLD)
예제 #27
0
def batch_inp(rel_path):
    print('Start Recognition!')
    prevTime = 0
    img_list = glob.glob(os.path.join(rel_path, '*'))
    results = list()
    cnt = 0
    ok_ind = list()
    for img_path in img_list:  # for each image in the list
        res = None
        frame = cv2.imread(img_path)
        # ret, frame = video_capture.read()

        # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

        curTime = time.time()  # calc fps
        timeF = frame_interval

        if (c % timeF == 0):  # detect faces in the current image
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(
                frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]
                scaled_reshape = []

                bb = [int(np.round(i)) for i in det[0]]
                # inner exception
                if bb[0] <= 0 or bb[1] <= 0 or bb[2] >= len(
                        frame[0]) or bb[3] >= len(frame):
                    print('face is out of range!')
                    continue

                cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :]
                cropped = facenet.flip(cropped, False)
                scaled = facenet.imresize(cropped, (image_size, image_size),
                                          interp='bilinear')
                scaled = cv2.resize(scaled,
                                    (input_image_size, input_image_size),
                                    interpolation=cv2.INTER_CUBIC)
                scaled = facenet.prewhiten(scaled)
                scaled_reshape.append(
                    scaled.reshape(input_image_size, input_image_size, 3))
                ok_ind.append(cnt)
        cnt += 1

    feed_dict = {
        images_placeholder: scaled_reshape,
        phase_train_placeholder: False
    }
    emb_array = sess.run(embeddings, feed_dict=feed_dict)  # n,n_emb
    predictions = model.predict_proba(emb_array)
    best_class_indices = np.argmax(predictions, axis=1)  # n,1
    # best_class_probabilities = np.max(predictions, axis=1)

    results = np.zeros_like(img_list)
    results[ok_ind] = [class_names[i] for i in best_class_indices]
    comb = list(zip(img_list, results))
    pd.DataFrame(comb).to_csv('test_results.csv')