Python MtcnnDetector Exemples, mtcnn.core.detect.MtcnnDetector Python Exemples

Exemple #1

0

Afficher le fichier

def test_onet(inoutDir, outputDir, model):
    pnet, rnet, onet_jiang = create_mtcnn_net(
        p_model_path="./original_model/pnet_epoch.pt",
        r_model_path="./original_model/rnet_epoch.pt",
        o_model_path="./original_model/" + model + ".pt",
        use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet_jiang,
                                   min_face_size=24)

    files = os.listdir(inoutDir)
    i = 0
    for image in files:
        i += 1
        image = os.path.join(inoutDir, image)

        img = cv2.imread(image)
        img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        landmarks2_jiang = mtcnn_detector.detect_onet_raw(img)

        vis_face_test(img_bg, landmarks2_jiang,
                      outputDir + model + "-" + str(i) + ".jpg")
        if i == 50:
            break

Exemple #2

0

Afficher le fichier

def gen_onet_data(data_dir,
                  anno_file,
                  pnet_model_file,
                  rnet_model_file,
                  prefix_path='',
                  use_cuda=True,
                  vis=False):

    pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file,
                                     r_model_path=rnet_model_file,
                                     use_cuda=use_cuda)
    mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12)

    imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path)
    imdb = imagedb.load_imdb()
    image_reader = TestImageLoader(imdb, 1, False)

    all_boxes = list()
    batch_idx = 0

    print('size:%d' % image_reader.size)
    for databatch in image_reader:
        if batch_idx % 50 == 0:
            print("%d images done" % batch_idx)

        im = databatch

        t = time.time()

        # pnet detection = [x1, y1, x2, y2, score, reg]
        p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im)

        # rnet detection
        boxes, boxes_align = mtcnn_detector.detect_rnet(im=im,
                                                        dets=p_boxes_align)

        if boxes_align is None:
            all_boxes.append(np.array([]))
            batch_idx += 1
            continue
        if vis:
            rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
            vision.vis_two(rgb_im, boxes, boxes_align)

        t1 = time.time() - t
        t = time.time()
        all_boxes.append(boxes_align)
        batch_idx += 1

    save_path = './model_store'

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
    with open(save_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    gen_onet_sample_data(data_dir, anno_file, save_file, prefix_path)

Exemple #3

0

Afficher le fichier

def gen_rnet_data(data_dir, anno_dir, pnet_model_file, use_cuda=True):
    ''' Generate the train data of RNet with trained-PNet '''

    # load trained pnet model
    pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file,
                                  use_cuda=use_cuda)
    mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12)

    # load original_anno_file, length = 12880
    anno_file = os.path.join(anno_dir, 'anno_store/wide_anno_train.txt'
                             )  # TODO :: [local_wide_anno, wide_anno_train]
    imagedb = ImageDB(anno_file, mode='test', prefix_path='')
    imdb = imagedb.load_imdb()

    image_reader = TestImageLoader(imdb, 1, False)
    print('size:%d' % image_reader.size)

    batch_idx, all_boxes = 0, list()

    for databatch in image_reader:

        if (batch_idx + 1) % 100 == 0:
            print("%d images done" % (batch_idx + 1))
        im = databatch

        # obtain boxes and aligned boxes
        boxes_align = mtcnn_detector.detect_pnet(im=im)  # Time costly

        if boxes_align is None:
            all_boxes.append(np.array([]))
            batch_idx += 1
            continue

        # if vis:
        #     rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
        #     vision.vis_two(rgb_im, boxes[:100, :], boxes_align[:100, :])

        all_boxes.append(boxes_align)
        batch_idx += 1

    save_path = os.path.join(anno_dir, 'rnet')

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
    with open(save_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    gen_rnet_sample_data(data_dir, anno_dir, save_file)

Exemple #4

0

Afficher le fichier

Fichier : mtcnn_test.py Projet : lozim/mtcnn_face_flask

def index():
    pnet, rnet, onet = create_mtcnn_net(
        p_model_path="./original_model/pnet_epoch.pt",
        r_model_path="./original_model/rnet_epoch.pt",
        o_model_path="./original_model/onet_epoch.pt",
        use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24)

    #get_json=flask.request.get_json(force=True)

    print(request.data)
    base_data = request.json['image']

    img = base64_to_image(base_data)
    bboxs, landmarks = mtcnn_detector.detect_face(img)

    #初始化一下json
    res = {}
    faces = {}
    if bboxs.shape[0] < 1:
        res["success"] = False
        res["faces_detected"] = faces
        return flask.jsonify(res)
    else:
        res["success"] = True

    #这里开始处理一幅图中有多个人脸的情况
    for i in range(bboxs.shape[0]):
        x1 = int(bboxs[i][0])
        x2 = int(bboxs[i][2])
        y1 = int(bboxs[i][1])
        y2 = int(bboxs[i][3])
        face = img[y1:y2, x1:x2]

        face_name = "face_" + str(i)
        return_base64 = image_to_base64(face)
        faces[face_name] = return_base64

    res["faces_detected"] = faces
    return flask.jsonify(res)

Exemple #5

0

Afficher le fichier

def test(inoutDir, outputDir, model):  # 原模型的P，R，net + 自行训练后的Onet，展示并保存检测后的图片
    pnet, rnet, onet_jiang = create_mtcnn_net(
        p_model_path="./original_model/pnet_epoch.pt",
        r_model_path="./original_model/rnet_epoch.pt",
        o_model_path="./original_model/" + model + ".pt",
        use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet_jiang,
                                   min_face_size=24)
    files = os.listdir(inoutDir)
    i = 0
    for image in files:
        i += 1
        image = os.path.join("./lfpw_test/", image)

        img = cv2.imread(image)
        img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        bboxs, landmarks1 = mtcnn_detector.detect_face(img)  # 原始的图片用原始网络检测

        vis_face(img_bg, bboxs, landmarks1,
                 outputDir + model + "-" + str(i) + ".jpg")  # 保存图片

Exemple #6

0

Afficher le fichier

    def detect(self):
        pnet, rnet, onet = create_mtcnn_net(p_model_path=self.p_net_m,
                                            r_model_path=self.r_net_m,
                                            o_model_path=self.o_net_m,
                                            use_cuda=True)
        mtcnn_detector = MtcnnDetector(pnet=pnet,
                                       rnet=rnet,
                                       onet=onet,
                                       min_face_size=24,
                                       threshold=[0.1, 0.1, 0.1])

        event_list = os.listdir(self.image_dir)
        for event in event_list:
            print(event)
            event_dir = os.path.join(self.image_dir, event)
            res_dir = os.path.join(self.result_dir, event)
            if not os.path.exists(res_dir):
                os.makedirs(res_dir)
            images_list = os.listdir(event_dir)
            for images in images_list:
                images_path = os.path.join(event_dir, images)
                img = cv2.imread(images_path)
                bboxs, landmarks = mtcnn_detector.detect_face(img)
                if bboxs.shape[0] != 0:
                    bboxs[:, 2] = bboxs[:, 2] - bboxs[:, 0]
                    bboxs[:, 3] = bboxs[:, 3] - bboxs[:, 1]
                    bboxs[:, :4] = np.round(bboxs[:, :4])
                """ print(bboxs)
                save_name = 'r_304.jpg'
                vis_face(img,bboxs,landmarks, save_name) """
                fpath = os.path.join(res_dir, images[:-4] + '.txt')
                f = open(fpath, 'w')
                f.write(images[:-4] + '\n')
                f.write(str(bboxs.shape[0]) + '\n')
                for i in range(bboxs.shape[0]):
                    f.write('{:.0f} {:.0f} {:.0f} {:.0f} {:.3f}\n'.format(
                        bboxs[i, 0], bboxs[i, 1], bboxs[i, 2], bboxs[i, 3],
                        bboxs[i, 4]))
                f.close()

Exemple #7

0

Afficher le fichier

import cv2
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face

if __name__ == '__main__':
    #original model
    """ p_model_path = "./original_model/pnet_epoch.pt"
    r_model_path = "./original_model/rnet_epoch.pt"
    o_model_path = "./original_model/onet_epoch.pt" """
    #trained model
    p_model_path = "./original_model/pnet_epoch_train.pt"
    r_model_path = "./original_model/rnet_epoch_train.pt"
    o_model_path = "./original_model/onet_epoch_train.pt"
    pnet, rnet, onet = create_mtcnn_net(p_model_path=p_model_path,
                                        r_model_path=r_model_path,
                                        o_model_path=o_model_path,
                                        use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=[0.6, 0.7, 0.7])

    img = cv2.imread("1.jpg")
    img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    bboxs, landmarks = mtcnn_detector.detect_face(img)
    # print box_align
    save_name = 'r_1.jpg'
    vis_face(img_bg, bboxs, landmarks, save_name)

Exemple #8

0

Afficher le fichier

class VisSingleCase(object):
    def __init__(self, args):

        self.model = MtcnnDetector(args)

    def _fliter_doc_bbox(self, bboxes, landmarks):
        ''' Filter the face_box on card '''

        area = (bboxes[:, 2] - bboxes[:, 0] + 1) * (bboxes[:, 3] -
                                                    bboxes[:, 1] + 1) * -1
        area_index = area.argsort()
        bbox = bboxes[area_index[0]]
        landmark = landmarks[area_index[0]]
        #prob_index = (bboxes[-1]*-1).argsort() # assist
        #         if bboxes.shape[0] == 2 or area_index[0] == prob_index[0]:
        #             bbox = bboxes[area_index[0]]
        #             landmark = landmark[area_index[0]]

        return bbox, landmark

    def _fetch_block(self, bbox, landmark, lmk_flag=True):
        ''' Crop the chin_block of the detected face '''

        if lmk_flag:
            landmark = landmark.reshape(-1, 2)
            left_down = landmark[6]  # default : 6
            nose_point = landmark[33]  # point-34 | nose
            right_down = landmark[10]  # default : 10
            chin_point = landmark[8]  # point-9  | chin

            x1, y1 = int(left_down[0]), int(nose_point[1])
            x2, y2 = int(right_down[0]), int(chin_point[1])
        else:
            x1, y1 = int(bbox[0]), int(bbox[1])
            x2, y2 = int(bbox[2]), int(bbox[3])

        return (x1, y1, x2, y2)

    def _vis_result(self, img, bbox, landmark):
        ''' Visual the detect-result and block '''

        pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(pil_img)
        block = self._fetch_block(bbox, landmark)

        draw.rectangle([(bbox[0], bbox[1]), (bbox[2], bbox[3])],
                       outline='yellow')
        draw.rectangle([(block[0], block[1]), (block[2], block[3])],
                       outline='red')

        pts = landmark.reshape((68, 2))
        for idx in range(68):
            point = (pts[idx, 0] - 1, pts[idx, 1] - 1, pts[idx, 0] + 1,
                     pts[idx, 1] + 1)
            draw.ellipse(point, fill=None, outline='green')
        pil_img.show()

    def check_case(self, img_path):
        ''' Check the single case '''

        try:
            img = cv2.imread(img_path)
            bboxes, landmarks = self.model.detect_face(img, verbose=False)
            print((img_path, bboxes.shape))
        except Exception as e:
            print(e)
        else:
            if bboxes.shape[0] == 0:
                print('No face detected in %s' % img_path)
            else:
                bbox, landmark = self._fliter_doc_bbox(bboxes, landmarks)
                self._vis_result(img, bbox, landmark)

Exemple #9

0

Afficher le fichier

Fichier : mtcnn_test.py Projet : 124451/eye_copen_close_other

                        for j in range(point_nums):
                            cv2.circle(frame, (int(landmarks_one[j, 0]),
                                               int(landmarks_one[j, 1])), 2,
                                       (255, 0, 0), -1)
                out.write(frame)
            else:
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()


if __name__ == '__main__':
    point_nums = 24
    threshold = [0.6, 0.7, 0.7]  # [0.99, 0.1, 0.6]  #
    pnet, rnet, onet = create_mtcnn_net(
        p_model_path=r'model_store/final/pnet_epoch_19.pt',
        r_model_path=r'model_store/final/rnet_epoch_7.pt',
        o_model_path=r'model_store/final/onet_epoch_92.pt',
        use_cuda=True)

    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=threshold)
    videos_root_path = 'test_video/'
    save_path_root = 'result_video'
    detect_video(mtcnn_detector, videos_root_path, save_path_root)

Exemple #10

0

Afficher le fichier

import argparse
from mtcnn.core.detect import MtcnnDetector,create_mtcnn_net
import cv2
import time
from  mtcnn.config import *

pnet, rnet, onet = create_mtcnn_net(p_model_path=PNET_MODEL_PATH,r_model_path=RNET_MODEL_PATH,
                                 o_model_path=ONET_MODEL_PATH,use_cuda=False)
mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=48)



cap = cv2.VideoCapture(0)
f = 0
stime = time.time()
while (True):
    ret, frame = cap.read()  # 读取一帧的图像
    frame = cv2.resize(frame,(480,360))
    # frame = cv2.imread('face.jpg')
    boxes, boxes_align = mtcnn_detector.detect_pnet(im=frame)
    rboxes, rboxes_align = mtcnn_detector.detect_rnet(im=frame, dets=boxes_align)
    # oboxes,olandmark = mtcnn_detector.detect_onet(im=frame,dets=rboxes_align)
    if rboxes_align is not None:
        for box in rboxes_align:
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 128), 2)
    if f % 20 == 0:
        fps = int(20/(time.time()-stime))
        f = 0
        stime = time.time()
    cv2.putText(frame, '{:d}fps'.format(fps), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(255, 0, 255), 2)
    cv2.imshow('Face Recognition', frame)

Exemple #11

0

Afficher le fichier

    def __init__(self, args):

        self.model = MtcnnDetector(args)

Exemple #12

0

Afficher le fichier

Fichier : demo.py Projet : Ontheway361/occlusion_detection

    #     parser.add_argument('--onet_file',  type=str,  default='model/onet.pt')
    #     parser.add_argument('--onet_file',  type=str,  default='model/onet_v1/onet_epoch_7.pt')
    #     parser.add_argument('--onet_file',  type=str,  default='model/onet_v2/onet_epoch_6.pt')
    parser.add_argument('--onet_file',
                        type=str,
                        default='model/onet_v3/onet_epoch_10.pt')
    parser.add_argument('--use_cuda', type=bool, default=True)  # TODO
    parser.add_argument('--gpu_ids', type=list, default=[0, 1])  # TODO
    parser.add_argument('--prob_thres', type=list, default=[0.6, 0.7, 0.7])

    args = parser.parse_args()

    return args


if __name__ == '__main__':

    imglists = [s.split('.')[0] for s in os.listdir('aku_imgs/')]
    mtcnn_detector = MtcnnDetector(parse_args())

    for img_name in imglists:

        try:
            img = cv2.imread('aku_imgs/%s.jpg' % img_name)
            bboxs, landmarks = mtcnn_detector.detect_face(img, verbose=False)
            save_name = 'result/r3_%s.jpg' % img_name
            print('save img name : %s' % save_name)
            visual_face(img, bboxs, landmarks, save_name)
        except Exception as e:
            print(e)

Exemple #13

0

Afficher le fichier

Fichier : test4mtcnn_camera.py Projet : LiuHF1999/FaceRecognitionProj

                cv2.circle(frame,(int(landmarks_one[j,0]),int(landmarks_one[j,1])),radius=2,color=(0, 0, 255))

def save_face(image, tag, num):
  # DATA_TRAIN为抓取的人脸存放目录
    DATA_TRAIN = './Data/FaceID'
    img_name = os.path.join(DATA_TRAIN, str(tag), '{}_{}.jpg'.format(int(time.time()), num))
    # 保存人脸图像到指定的位置, 其中会创建一个tag对应的目录，用于后面的分类训练
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(img_name, image)


if __name__ == '__main__':
    pnet, rnet, onet = create_mtcnn_net(p_model_path="./original_model/pnet_epoch.pt",
                                        r_model_path="./original_model/rnet_epoch.pt",
                                        o_model_path="./original_model/onet_epoch.pt", use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24)

    window_name = 'main'
    camera_idx = 0
    cv2.namedWindow(window_name)
    # 视频来源，可以来自一段已存好的视频，也可以直接来自摄像头
    cap = cv2.VideoCapture(camera_idx, cv2.CAP_DSHOW)
    while cap.isOpened():
        # 读取一帧数据
        ok, frame = cap.read()
        if not ok:
            break
        # 抓取人脸的方法, 后面介绍
        catch_face(frame)
        # 输入'q'退出程序
        cv2.imshow(window_name, frame)

Exemple #14

0

Afficher le fichier

Fichier : landmark_48_data.py Projet : lsding/FaceRecognition

def gen_landmark48_data(data_dir,
                        anno_file,
                        pnet_model_file,
                        rnet_model_file,
                        prefix_path='',
                        use_cuda=True,
                        vis=False):

    anno_file = os.path.join(data_dir, anno_file)
    pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file,
                                     r_model_path=rnet_model_file,
                                     use_cuda=use_cuda)
    mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12)

    imagedb = ImageDB(anno_file,
                      mode="test",
                      prefix_path=os.path.join(data_dir, 'img'))
    imdb = imagedb.load_imdb()
    image_reader = TestImageLoader(imdb, 1, False)

    all_boxes = list()
    batch_idx = 0

    for databatch in image_reader:
        if batch_idx % 500 == 0:
            print("%d images done" % batch_idx)
        im = databatch

        if im.shape[0] >= 1200 or im.shape[1] >= 1200:
            all_boxes.append(np.array([]))
            batch_idx += 1
            continue

        t = time.time()

        p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im)

        boxes, boxes_align = mtcnn_detector.detect_rnet(im=im,
                                                        dets=p_boxes_align)

        if boxes_align is None:
            all_boxes.append(np.array([]))
            batch_idx += 1
            continue
        if vis:
            rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
            vision.vis_two(rgb_im, boxes, boxes_align)

        t1 = time.time() - t
        t = time.time()
        all_boxes.append(boxes_align)
        batch_idx += 1

    save_path = config.MODEL_STORE_DIR

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    save_file = os.path.join(save_path, "detections_celeba.pkl")
    with open(save_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

Exemple #15

0

Afficher le fichier

Fichier : mtcnn_test.py Projet : khannb/Computer-Insight-A05

import cv2
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face

import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':
    use_cuda = True
    pnet, rnet, onet = create_mtcnn_net(
        p_model_path="./original_model/pnet_epoch.pt",
        r_model_path="./original_model/rnet_epoch.pt",
        o_model_path="./original_model/onet_epoch.pt",
        use_cuda=use_cuda)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24)

    img = cv2.imread("./img/part2_002268.jpg")
    img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #b, g, r = cv2.split(img)
    #img2 = cv2.merge([r, g, b])

    bboxs, landmarks = mtcnn_detector.detect_face(img)
    print(bboxs)
    # print box_align
    save_file = './img/result.jpg'
    bboxs = mtcnn_detector.box_expand(bboxs, 0.3, 0.25)
    vis_face(img_bg, bboxs, landmarks, save_file)

Exemple #16

0

Afficher le fichier

import cv2
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face

if __name__ == '__main__':
    pnet, rnet, onet = create_mtcnn_net(
        p_model_path="./original_model/pnet_epoch.pt",
        r_model_path="./original_model/rnet_epoch.pt",
        o_model_path="./original_model/onet_epoch.pt",
        use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24)

    img = cv2.imread("./s_l.jpg")
    img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # b, g, r = cv2.split(img)
    # img2 = cv2.merge([r, g, b])

    bboxs, landmarks = mtcnn_detector.detect_face(img)
    # print box_align
    save_name = 'r_4.jpg'
    vis_face(img_bg, bboxs, landmarks, save_name)

Exemple #17

0

Afficher le fichier

def gen_rnet_data(data_dir,
                  anno_file,
                  pnet_model_file,
                  prefix_path='',
                  use_cuda=True,
                  vis=False):
    """
    :param data_dir: train data
    :param anno_file:
    :param pnet_model_file:
    :param prefix_path:
    :param use_cuda:
    :param vis:
    :return:
    """

    # load trained pnet model
    pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file,
                                  use_cuda=use_cuda)
    mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12)

    # load original_anno_file, length = 12880
    imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path)
    imdb = imagedb.load_imdb()
    image_reader = TestImageLoader(imdb, 1, False)

    all_boxes = list()
    batch_idx = 0

    print('size:%d' % image_reader.size)
    for databatch in image_reader:
        if batch_idx % 100 == 0:
            print("%d images done" % batch_idx)
        im = databatch

        t = time.time()

        # obtain boxes and aligned boxes
        boxes, boxes_align = mtcnn_detector.detect_pnet(im=im)
        if boxes_align is None:
            all_boxes.append(np.array([]))
            batch_idx += 1
            continue
        if vis:
            rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
            vision.vis_two(rgb_im, boxes, boxes_align)

        t1 = time.time() - t
        t = time.time()
        all_boxes.append(boxes_align)
        batch_idx += 1
        # if batch_idx == 100:
        # break
        # print("shape of all boxes {0}".format(all_boxes))
        # time.sleep(5)

    # save_path = model_store_path()
    # './model_store'
    save_path = './model_store'

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
    with open(save_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    gen_rnet_sample_data(data_dir, anno_file, save_file, prefix_path)

Exemple #18

0

Afficher le fichier

Fichier : signal_eye_test_with_zhang_landmark.py Projet : 124451/eye_copen_close_other

def dete_picture():

    eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"}
    point_nums = 24
    threshold = [0.6, 0.7, 0.7]
    data_trans = Transforms.Compose([
        Transforms.Resize((24, 24)),
        Transforms.ToTensor(),
        Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)),
        # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)),
    ])
    mixnet = MixNet(input_size=(24, 24), num_classes=3)
    # eye_class_dict = {0:"open_eye",1:"close_eye"}
    # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth")
    weight_dict = torch.load(
        "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/relabel_mix_24_24_20210302/Mixnet_epoch_59.pth"
    )
    new_state_dict = OrderedDict()
    for k, v in weight_dict.items():
        name = k[7:]
        new_state_dict[name] = v

    mixnet.load_state_dict(new_state_dict)
    # stat(net,(3,48,48))
    mixnet.to('cuda:0')
    mixnet.eval()

    pnet, rnet, onet = create_mtcnn_net(
        p_model_path=r'model_store/final/pnet_epoch_19.pt',
        r_model_path=r'model_store/final/rnet_epoch_7.pt',
        o_model_path=r'model_store/final/onet_epoch_92.pt',
        use_cuda=True)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=threshold)
    img_file = "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/test_video/caiji_0123"
    img_save = "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/relabel_img_result_adma_01"
    img_path = [
        os.path.join(img_file, file_name)
        for file_name in glob.glob(os.path.join(img_file, "*.jpg"))
    ]

    # videos_root_path = 'test_video/DMS_RAW_Nebula_20201201-143038_518.mp4'
    # save_path_root = 'result_video/24_24_DMS_RAW_Nebula_20201201-143038_518.avi'

    # cap = cv2.VideoCapture(videos_root_path)
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # fps = cap.get(cv2.CAP_PROP_FPS)
    # size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # tpa
    # fname = os.path.splitext(os.path.split(tpa)[1])[0]
    # save_path = os.path.join("/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)",fname+".avi")
    # out = cv2.VideoWriter(save_path_root, fourcc, fps, size)
    for img_p in tqdm(img_path):
        frame = cv2.imread(img_p)

        copy_frame = frame.copy()
        left_right_eye = []
        bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame,
                                                                rgb=True)

        if landmarks is not None:
            for i in range(landmarks.shape[0]):
                landmarks_one = landmarks[i, :]
                landmarks_one = landmarks_one.reshape((point_nums, 2))
                left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :])
                xmin = np.min(left_eye[:, 0])
                ymin = np.min(left_eye[:, 1])
                xmax = np.max(left_eye[:, 0])
                ymax = np.max(left_eye[:, 1])
                left_right_eye.append([xmin, ymin, xmax, ymax])
                # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)

                right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :])
                xmin = np.min(right_eye[:, 0])
                ymin = np.min(right_eye[:, 1])
                xmax = np.max(right_eye[:, 0])
                ymax = np.max(right_eye[:, 1])
                left_right_eye.append([xmin, ymin, xmax, ymax])
                # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)
                for j in [*left_eye, *right_eye]:
                    cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0),
                               -1)

            crop_img = []
            for xmin, ymin, xmax, ymax in left_right_eye:
                w, h = xmax - xmin, ymax - ymin
                # 随机扩展大小0.05-0.15
                k = 0.1
                ratio = h / w
                if ratio > 1:
                    ratio = ratio - 1
                    xmin -= (ratio / 2 * w + k * h)
                    ymin -= (k * h)
                    xmax += (ratio / 2 * w + k * h)
                    ymax += (k * h)

                else:
                    ratio = w / h - 1
                    xmin -= (k * w)
                    ymin -= (ratio / 2 * h + k * w)
                    xmax += (k * w)
                    ymax += (ratio / 2 * h + k * w)
                cv2.rectangle(frame, (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 255, 255), 2)
                temp_img = copy_frame[int(ymin):int(ymax), int(xmin):int(xmax)]
                # temp_img = cv2.resize(temp_img,(24,24))
                crop_img.append(temp_img)
            if len(crop_img) < 2:
                img_name = os.path.split(img_p)[-1]
                cv2.imwrite(os.path.join(img_save, img_name), frame)
                # out.write(frame)
                continue
            # compose_img = np.hstack((crop_img[0],crop_img[1]))
            result_buff = []
            score_buff = []
            for i in crop_img:
                i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)

                compose_img = Image.fromarray(i)
                img = data_trans(compose_img)
                img = img.unsqueeze(0)
                with torch.no_grad():
                    outputs = mixnet(img.to('cuda:0'))
                    spft_max = torch.nn.functional.softmax(outputs, dim=1)
                    score_buff.append(spft_max.cpu().numpy())
                    # 0,1->data,id
                    score, result = torch.max(spft_max, 1)
                    result_buff.append([result.item(), score])
            bias = 30
            eye_bias = 100
            for i in range(2):
                t_result = result_buff[i][0]
                if 0 == t_result:
                    # eye_class = "close_eye"
                    # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \
                    # ,thickness=2)
                    eye_class = "open_eye:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \
                    ,thickness=2)
                elif 1 == t_result:
                    # eye_class = "open_eye"
                    # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \
                    # ,thickness=2)

                    eye_class = "close_eye:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \
                    ,thickness=2)
                else:
                    eye_class = "other:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \
                    ,thickness=2)
                # bias += 30
                eye_bias = 0
                # left_eye
                left_eye_open, left_eye_close, left_eye_other = score_buff[0][
                    0]
                cv2.putText(frame,"left_open:{:.2f}".format(left_eye_open) ,(10, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_close:{:.2f}".format(left_eye_close) ,(10, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_other:{:.2f}".format(left_eye_other) ,(10, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)

                #right_eye
                right_eye_open, right_eye_close, right_eye_other = score_buff[
                    1][0]
                cv2.putText(frame,"left_open:{:.2f}".format(right_eye_open) ,(200, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_close:{:.2f}".format(right_eye_close) ,(200, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_other:{:.2f}".format(right_eye_other) ,(200, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
            # eye_class = "open_eye" if 0 in t_result else "close_eye"
        img_name = os.path.split(img_p)[-1]
        cv2.imwrite(os.path.join(img_save, img_name), frame)

Exemple #19

0

Afficher le fichier

Fichier : signal_eye_test_with_zhang_landmark.py Projet : 124451/eye_copen_close_other

def show_with_camera():

    eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"}
    point_nums = 24
    threshold = [0.6, 0.7, 0.7]
    data_trans = Transforms.Compose([
        Transforms.Resize((24, 24)),
        Transforms.ToTensor(),
        Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)),
        # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)),
    ])
    mixnet = MixNet(input_size=(24, 24), num_classes=3)
    # eye_class_dict = {0:"open_eye",1:"close_eye"}
    # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth")
    weight_dict = torch.load(
        "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/mix_mbhk_change_signal_eye_24_24/Mixnet_epoch_59.pth"
    )
    new_state_dict = OrderedDict()
    for k, v in weight_dict.items():
        name = k[7:]
        new_state_dict[name] = v

    mixnet.load_state_dict(new_state_dict)
    # stat(net,(3,48,48))
    mixnet.to('cuda:0')
    mixnet.eval()

    pnet, rnet, onet = create_mtcnn_net(
        p_model_path=r'model_store/final/pnet_epoch_19.pt',
        r_model_path=r'model_store/final/rnet_epoch_7.pt',
        o_model_path=r'model_store/final/onet_epoch_92.pt',
        use_cuda=True)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=threshold)
    videos_root_path = 'test_video/20200506143954001_0.avi'
    save_path_root = 'result_video/camera_test_20210301.avi'

    cap = cv2.VideoCapture(0)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = cap.get(cv2.CAP_PROP_FPS)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # tpa
    # fname = os.path.splitext(os.path.split(tpa)[1])[0]
    # save_path = os.path.join("/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)",fname+".avi")
    out = cv2.VideoWriter(save_path_root, fourcc, fps, size)
    while True:
        ret, frame = cap.read()

        if ret:
            copy_frame = frame.copy()
            left_right_eye = []
            bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame,
                                                                    rgb=True)

            if landmarks is not None:
                for i in range(landmarks.shape[0]):
                    landmarks_one = landmarks[i, :]
                    landmarks_one = landmarks_one.reshape((point_nums, 2))
                    left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :])
                    xmin = np.min(left_eye[:, 0])
                    ymin = np.min(left_eye[:, 1])
                    xmax = np.max(left_eye[:, 0])
                    ymax = np.max(left_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])
                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)

                    right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :])
                    xmin = np.min(right_eye[:, 0])
                    ymin = np.min(right_eye[:, 1])
                    xmax = np.max(right_eye[:, 0])
                    ymax = np.max(right_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])
                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)
                    for j in [*left_eye, *right_eye]:
                        cv2.circle(frame, (int(j[0]), int(j[1])), 2,
                                   (255, 0, 0), -1)

                crop_img = []
                for xmin, ymin, xmax, ymax in left_right_eye:
                    w, h = xmax - xmin, ymax - ymin
                    # 随机扩展大小0.05-0.15
                    k = 0.1
                    ratio = h / w
                    if ratio > 1:
                        ratio = ratio - 1
                        xmin -= (ratio / 2 * w + k * h)
                        ymin -= (k * h)
                        xmax += (ratio / 2 * w + k * h)
                        ymax += (k * h)

                    else:
                        ratio = w / h - 1
                        xmin -= (k * w)
                        ymin -= (ratio / 2 * h + k * w)
                        xmax += (k * w)
                        ymax += (ratio / 2 * h + k * w)
                    cv2.rectangle(frame, (int(xmin), int(ymin)),
                                  (int(xmax), int(ymax)), (0, 255, 255), 2)
                    temp_img = copy_frame[int(ymin):int(ymax),
                                          int(xmin):int(xmax)]
                    # temp_img = cv2.resize(temp_img,(24,24))
                    crop_img.append(temp_img)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
                if len(crop_img) < 2:
                    cv2.imshow("test", frame)
                    tget_in = cv2.waitKey(10)
                    # print(ord('q'),tget_in)
                    if tget_in == ord('q'):
                        print("get out")
                        break
                    out.write(frame)
                    continue
                # compose_img = np.hstack((crop_img[0],crop_img[1]))
                t_result = []
                for i in crop_img:
                    i = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
                    i = cv2.cvtColor(i, cv2.COLOR_GRAY2RGB)

                    compose_img = Image.fromarray(i)
                    img = data_trans(compose_img)
                    img = img.unsqueeze(0)
                    with torch.no_grad():
                        outputs = mixnet(img.to('cuda:0'))
                        result = torch.max(outputs, 1)[1]
                        t_result.append(result.item())
                if 0 in t_result:
                    eye_class = "open_eye"
                    cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \
                    ,thickness=2)
                elif 1 in t_result:
                    eye_class = "close_eye"
                    cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \
                    ,thickness=2)
                else:
                    eye_class = "other"
                    cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255) \
                    ,thickness=2)
                cv2.imshow("test", frame)
                tget_in = cv2.waitKey(10)
                if tget_in == ord('q'):
                    print("get out")
                    break
                # eye_class = "open_eye" if 0 in t_result else "close_eye"

                # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \
                #     if 0 in t_result else (255,255,0),thickness=2)
            out.write(frame)
        else:
            print("finish")
            break

Exemple #20

0

Afficher le fichier

Fichier : signal_eye_test_with_zhang_landmark.py Projet : 124451/eye_copen_close_other

def dete_signal_video():

    eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"}
    point_nums = 24
    threshold = [0.6, 0.7, 0.7]
    data_trans = Transforms.Compose([
        Transforms.Resize((24, 24)),
        Transforms.ToTensor(),
        Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)),
        # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)),
    ])
    mixnet = MixNet(input_size=(24, 24), num_classes=3)
    # eye_class_dict = {0:"open_eye",1:"close_eye"}
    # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth")
    weight_dict = torch.load(
        "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/relabel_04_mix_SGD_mutillabel_24_24_20210302/Mixnet_epoch_49.pth"
    )
    new_state_dict = OrderedDict()
    for k, v in weight_dict.items():
        name = k[7:]
        new_state_dict[name] = v

    mixnet.load_state_dict(new_state_dict)
    # stat(net,(3,48,48))
    mixnet.to('cuda:0')
    mixnet.eval()

    pnet, rnet, onet = create_mtcnn_net(
        p_model_path=r'model_store/final/pnet_epoch_19.pt',
        r_model_path=r'model_store/final/rnet_epoch_7.pt',
        o_model_path=r'model_store/final/onet_epoch_92.pt',
        use_cuda=True)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=threshold)
    videos_root_path = 'test_video/hhh/02_65_6504_0_be4ba2aeac264ed992aae74c15b91b18.mp4'
    save_path_root = 'result_video/debug_test.avi'

    cap = cv2.VideoCapture(videos_root_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = cap.get(cv2.CAP_PROP_FPS)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # tpa
    fname = os.path.splitext(os.path.split(videos_root_path)[1])[0]
    save_path = os.path.join(
        "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)",
        fname + ".avi")
    out = cv2.VideoWriter(save_path_root, fourcc, fps, size)
    while True:
        ret, frame = cap.read()

        if ret:
            copy_frame = frame.copy()
            left_right_eye = []
            bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame,
                                                                    rgb=True)
            temp_path, trmp_name = os.path.split(save_path)
            # trmp_name = os.path.splitext(trmp_name)[0] + "{:04d}.jpg".format(img_count)
            # tsave_path = os.path.join(temp_path, trmp_name)
            if landmarks is not None:
                eye_wild_buf = []
                for i in range(landmarks.shape[0]):
                    landmarks_one = landmarks[i, :]
                    landmarks_one = landmarks_one.reshape((point_nums, 2))
                    left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :])
                    xmin = np.min(left_eye[:, 0])
                    ymin = np.min(left_eye[:, 1])
                    xmax = np.max(left_eye[:, 0])
                    ymax = np.max(left_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])

                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)

                    right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :])
                    xmin = np.min(right_eye[:, 0])
                    ymin = np.min(right_eye[:, 1])
                    xmax = np.max(right_eye[:, 0])
                    ymax = np.max(right_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])
                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)
                    #绘制眼睛点
                    # for j in [*left_eye,*right_eye]:
                    #     cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0), -1)

                crop_img = []
                for xmin, ymin, xmax, ymax in left_right_eye:
                    w, h = xmax - xmin, ymax - ymin
                    # 随机扩展大小0.05-0.15
                    k = 0.1
                    ratio = h / w
                    if ratio > 1:
                        ratio = ratio - 1
                        xmin -= (ratio / 2 * w + k * h)
                        ymin -= (k * h)
                        xmax += (ratio / 2 * w + k * h)
                        ymax += (k * h)

                    else:
                        ratio = w / h - 1
                        xmin -= (k * w)
                        ymin -= (ratio / 2 * h + k * w)
                        xmax += (k * w)
                        ymax += (ratio / 2 * h + k * w)
                    eye_wild_buf.append(w)
                    cv2.rectangle(frame, (int(xmin), int(ymin)),
                                  (int(xmax), int(ymax)), (0, 255, 255), 1)
                    # 输出眼睛像素的长宽

                    temp_img = copy_frame[int(ymin):int(ymax),
                                          int(xmin):int(xmax)]
                    # temp_img = cv2.resize(temp_img,(24,24))
                    crop_img.append(temp_img)
                if len(crop_img) < 2:

                    cv2.imwrite(tsave_path, frame)
                    # out.write(frame)
                    continue
                # compose_img = np.hstack((crop_img[0],crop_img[1]))
            result_buff = []
            score_buff = []
            for i in crop_img:
                i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
                t1 = time.time()
                compose_img = Image.fromarray(i)
                img = data_trans(compose_img)
                img = img.unsqueeze(0)
                with torch.no_grad():
                    outputs = mixnet(img.to('cuda:0'))
                    spft_max = torch.nn.functional.softmax(outputs, dim=1)
                    # 左眼右眼，分别三个类别的分数
                    score_buff.append(spft_max.cpu().numpy())
                    # 0,1->data,id
                    score, result = torch.max(spft_max, 1)
                    # result:最大值的id score:最大值的分数
                    result_buff.append([result.item(), score])
                run_time = time.time() - t1
                #0.005819
            bias = 30
            eye_bias = 100
            for i in range(2):
                t_result = result_buff[i][0]
                #眼睛抠图的宽度
                eye_w = eye_wild_buf[i]
                cv2.putText(frame,"w:{}".format(int(eye_w)),(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-50),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \
                    ,thickness=2)
                if 0 == t_result:
                    # eye_class = "close_eye"
                    # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \
                    # ,thickness=2)
                    eye_class = "open_eye:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \
                    ,thickness=2)
                elif 1 == t_result:
                    # eye_class = "open_eye"
                    # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \
                    # ,thickness=2)

                    eye_class = "close_eye:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \
                    ,thickness=2)
                else:
                    eye_class = "other:{:.2f}".format(
                        result_buff[i][1].cpu().item())
                    cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \
                    ,thickness=2)
                # bias += 30
                eye_bias = 0
                # left_eye
                left_eye_open, left_eye_close, left_eye_other = score_buff[0][
                    0]
                cv2.putText(frame,"left_open:{:.2f}".format(left_eye_open) ,(10, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_close:{:.2f}".format(left_eye_close) ,(10, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_other:{:.2f}".format(left_eye_other) ,(10, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)

                #right_eye
                right_eye_open, right_eye_close, right_eye_other = score_buff[
                    1][0]
                cv2.putText(frame,"left_open:{:.2f}".format(right_eye_open) ,(200, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_close:{:.2f}".format(right_eye_close) ,(200, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
                cv2.putText(frame,"left_other:{:.2f}".format(right_eye_other) ,(200, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \
                    ,thickness=2)
            # 计算最大概率的标号
            max_id,max_score = (result_buff[0][0],result_buff[0][1].cpu().item()) if \
                result_buff[0][1].cpu().item()>result_buff[1][1].cpu().item() else (result_buff[1][0],result_buff[1][1].cpu().item())
            # 测试信息
            eye_wild_buf_info = "w:[{:.2f},{:.2f}]".format(
                eye_wild_buf[0], eye_wild_buf[1])
            # 测试时那个眼镜框最大
            max_wilde_left_right = 0 if eye_wild_buf[0] > eye_wild_buf[1] else 1
            # 获得最大宽度框的id和分数
            # 宽度最大的 id 和分数 宽度第二大的 id和分数
            max_wilde_id,max_wilde_score,max_wiled_second_id,max_wilde_second_score = (result_buff[0][0],result_buff[0][1].cpu().item(),result_buff[1][0],result_buff[1][1].cpu().item()) if \
                max_wilde_left_right==0 else (result_buff[1][0],result_buff[1][1].cpu().item(),result_buff[0][0],result_buff[0][1].cpu().item())

            score_buff_info = "score:[left: {:.2f}] [right: {:.2f}]".format(
                score_buff[0][0][2], score_buff[1][0][2])
            cv2.putText(frame,eye_wild_buf_info,(400,80),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \
                ,thickness=2)
            cv2.putText(frame,score_buff_info,(400,100),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \
            ,thickness=2)

            # 如果

            # if np.any(np.array(eye_wild_buf[:2])<19.0 )and max_score < 0.9 or np.any(np.array(eye_wild_buf[:2])<17.0 ) or np.any(np.array([score_buff[0][0][2],score_buff[1][0][2]])>= 0.5) and \
            #     max_score<0.9 or max_id==2:
            # 添加最大框                                                                                                            概率最大id=2 宽度最大的id=2
            # if (eye_wild_buf[max_wilde_left_right]<17.0 ) or ((max_wilde_score>= 0.5) and \
            #     max_wilde_id==2 and max_wilde_second_score<0.85)  or max_id==2 and (max_wilde_score < 0.8 and max_wilde_id != 2) or (max_id==2 and max_wilde_id == 2 and(max_wilde_second_score<0.8) ) or \
            #         (max_wilde_id == 2 and max_wiled_second_id==2 and (max_wilde_second_score>0.5 or max_wilde_score>0.5)) or ( eye_wild_buf[ 0 if max_wilde_left_right else 1]<17.0 ) or \
            #             ((eye_wild_buf[ 0 if max_wilde_left_right else 1]>23 and max_wilde_second_score>0.8 and max_wilde_id==2) or \
            #                 (eye_wild_buf[max_wilde_left_right]>23 and max_wilde_score >0.8 and max_wiled_second_id==2)):
            # 左眼右眼宽度大于23 且概率大于0.8 且id=2
            # 存在小于17像素的框且最大宽度的分数小于0.8
            # 存在other概率大于0.5
            # 存在小于10像素直接判断为other


            if ((eye_wild_buf[ 0 if max_wilde_left_right else 1]>23 and max_wilde_second_score>0.8 and max_wiled_second_id==2) or \
                (eye_wild_buf[ max_wilde_left_right]>23 and max_wilde_score >0.8 and max_wilde_score==2) or \
                (np.any(np.array(eye_wild_buf[:2])<17.0) and (max_wilde_score<0.8)) or
                ((max_wilde_id==2 and max_wilde_score>0.5 and max_wilde_second_score<0.9) or (max_wiled_second_id==2 and max_wilde_second_score>0.5 and max_wilde_score<0.9)) or\
                (np.any(np.array(eye_wild_buf[:2])<10.0))
                    ):
                # 如果像素小于19且最大概率的眼睛小于0.9 或 任何一个像素小于12 且 max分数小于0.9 或 other
                # 2.任意一个other>=50
                cv2.putText(frame,"other",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \
                ,thickness=2)
            # elif np.any(np.array([score_buff[0][0][1],score_buff[1][0][1]])>= 0.85)  \
            #      or (max_id==1 and max_score>0.750):
            elif (max_wilde_id==1 and max_wilde_score>=0.80)  \
                    or (max_id==1 and max_score>0.750):
                # elif (max_wilde_score >= 0.85) and max_wilde_id==1  \
                #      or (max_wilde_id==1 and max_wilde_score>0.750):
                # 任意一个闭眼概率大于0.9
                # 最大值是闭眼且概率大于0.75
                cv2.putText(frame,"close",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \
                ,thickness=2)
            else:
                cv2.putText(frame,"open",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \
                ,thickness=2)

                # cv2.imshow("frame",frame)

            out.write(frame)
        else:

            print("finish")
            break

Exemple #21

0

Afficher le fichier

Fichier : landmark_test.py Projet : JiangShaoYin/Onet_alignment

    def test_Onet_without_PRnet(self, annotation, outputDir, test_moudel, xxyy,
                                savePic):
        imagedb = ImageDB(annotation)
        gt_imdb = imagedb.load_imdb()
        pnet, rnet, onet_jiang = create_mtcnn_net(
            p_model_path="./original_model/pnet_epoch.pt",
            r_model_path="./original_model/rnet_epoch.pt",
            o_model_path="./original_model/" + test_moudel + ".pt",
            use_cuda=False)
        mtcnn_detector = MtcnnDetector(pnet=pnet,
                                       rnet=rnet,
                                       onet=onet_jiang,
                                       min_face_size=24)

        test_data = TrainImageReader(gt_imdb,
                                     48,
                                     batch_size=100,
                                     shuffle=False)  # 读入1个batch的数据
        # train_data.reset()
        total_errors = 0

        cnt = 0
        for i, (images, (gt_labels, gt_bboxes,
                         gt_landmarks)) in enumerate(test_data):  # 取1个batch
            list_imgs = [images[i, :, :, :]
                         for i in range(images.shape[0])]  # 100张图片

            list_bboxes = [gt_bboxes[i, :] for i in range(gt_bboxes.shape[0])]
            list_gt_landmarks = [
                gt_landmarks[i, :] for i in range(gt_landmarks.shape[0])
            ]
            mix = list(zip(list_imgs, list_bboxes, list_gt_landmarks))
            batch_errors = []

            for img, gt_bbox, gt_landmark in mix:  # 取1个图片
                if xxyy:
                    bboxs, landmarks = mtcnn_detector.detect_onet_xxyy(
                        img, gt_bbox)  # 原始的图片用原始网络检测,xxyy
                else:
                    bboxs, landmarks = mtcnn_detector.detect_onet(
                        img, gt_bbox)  # 原始的图片用原始网络检测,xxyy

                if landmarks.size:
                    cnt += 1
                    bboxs = bboxs[:1]  # 多个检测框保留第一个
                    landmarks = landmarks[:1]
                    if savePic:
                        vis_face(img, bboxs, landmarks,
                                 self.output_dir + str(cnt) + ".jpg")  # 保存图片
                    gt_landmark = np.array(gt_landmark).reshape(5, 2)
                    landmarks = np.array(landmarks).reshape(5, 2)

                    normDist = np.linalg.norm(gt_landmark[1] -
                                              gt_landmark[0])  # 左右眼距离
                    error = np.mean(
                        np.sqrt(np.sum(
                            (landmarks - gt_landmark)**2, axis=1))) / normDist

                    batch_errors.append(error)

            batch_errors = np.array(batch_errors).sum()
            total_errors += batch_errors
            print("%s:   %s pics mean error is %s" %
                  (datetime.datetime.now(), cnt, total_errors / cnt))
            if cnt > 999:
                print("%s:%s pics mean error is %s" %
                      (datetime.datetime.now(), cnt, total_errors / cnt))
                f = open("landmark_test.txt", "a+")
                f.write("%s, moudel_name:%s.pt, %s pics mean error is %s\n" %
                        (datetime.datetime.now(), test_moudel, cnt,
                         np.array(total_errors).reshape(1, -1).sum() / cnt))
                f.close()
                return

        print("%s:%s pics mean error is %s" %
              (datetime.datetime.now(), cnt, total_errors / cnt))

Exemple #22

0

Afficher le fichier

Fichier : occ_utils.py Projet : Ontheway361/occlusion_detection

class HistOccBlock(object):


    def __init__(self, args):

        self.args      = args
        self.imgs_list = None
        self.model     = MtcnnDetector(self.args)
        self.pdf_list  = None


    def _prepare_folder(self):
        ''' Prepare the face-model, imgs_list '''

        # imgs_list
        folder_path = os.path.join(self.args.data_dir, self.args.folder)
        imgs_list   = []
        for img_name in os.listdir(folder_path):

            # idx = folder.split('_')[-1]
            # img_name = '%s_%d.jpg' % (self.args.img_type, int(idx))
            imgs_list.append(os.path.join(folder_path, img_name))
        self.imgs_list = imgs_list
        print('there are %d imgs in %s' % (len(imgs_list), self.args.folder))
        return imgs_list


    def _prepare_csv(self):
        ''' Prepare the face-model, imgs_list '''

        csv_file = os.path.join(self.args.data_dir, 'csv_raw', self.args.csv_file)
        df_csv = pd.read_csv(csv_file)
        print('csv_file.shape : ', df_csv.shape)
        df_test  = None

        if self.args.check_mode == 'pos':
            check_mode = -1
        elif self.args.check_mode == 'neg':
            check_mode = 0
        else:
            check_mode = 1
            print('attention, evaluate-mode was started ...')

        if check_mode < 1:
            df_csv = df_csv[df_csv['anno_label'] == check_mode]
            imgs_list = []
            for idx, row in df_csv.iterrows():

                img_name = '/'.join(row['img_path'].split('/')[-2:])
                img_path = os.path.join(self.args.data_dir, img_name)
                imgs_list.append(img_path)
            self.imgs_list = imgs_list
        else:
            df_test = []
            for idx, row in df_csv.iterrows():

                img_name = '/'.join(row['img_path'].split('/')[-2:])
                img_path = os.path.join(self.args.data_dir, img_name)
                df_test.append([img_path, row['img_type'], row['anno_label']])
            df_test = pd.DataFrame(df_test, columns=df_csv.columns)
            print('after filtering, df_test.shape : ', df_test.shape)
            print('imgs_list was prepared ...')
        return df_test


    def _fliter_doc_bbox(self, bboxes, landmarks):
        ''' Filter the face_box on card '''

        area = (bboxes[:, 2] - bboxes[:, 0] + 1) * (bboxes[:, 3] - bboxes[:, 1] + 1) * -1
        area_index = area.argsort()
        bbox = bboxes[area_index[0]]
        landmark = landmarks[area_index[0]]
        #prob_index = (bboxes[-1]*-1).argsort() # assist
#         if bboxes.shape[0] == 2 or area_index[0] == prob_index[0]:
#             bbox = bboxes[area_index[0]]
#             landmark = landmark[area_index[0]]

        return bbox, landmark


    def _fetch_block(self, img, bbox, landmark, lmk_flag = True):
        ''' Crop the chin_block of the detected face '''

        landmark = landmark.reshape(-1,2)

        if lmk_flag:
            left_down  = landmark[6]
            nose_point = landmark[33] # point-34 | nose
            right_down = landmark[10]
            chin_point = landmark[8]  # point-9  | chin

            x1, y1 = int(left_down[0]), int(nose_point[1])
            x2, y2 = int(right_down[0]), int(chin_point[1])
        else:
            x1, y1 = int(bbox[0]), int(bbox[1])
            x2, y2 = int(bbox[2]), int(bbox[3])

        crop_block = img[y1:y2 + 1, x1:x2 + 1, :]

        return crop_block


    def _statistics(self, block):
        ''' Statistics the block-pixels info '''

        height, width, _ = block.shape
        num_pixels = height * width
        bgr_prob   = np.zeros((3, 256), dtype=np.int)

        for y in range(height):
            for x in range(width):

                pixel = block[y,x]
                bgr_prob[0, pixel[0]] += 1
                bgr_prob[1, pixel[1]] += 1
                bgr_prob[2, pixel[2]] += 1
        bgr_prob = bgr_prob / num_pixels

        return bgr_prob


    def _hist_go(self):
        '''
        Statistic the info of resz_block
        step - 1. detect face with trained model
        step - 2. filter the doc_bbox
        step - 3. crop and resize the target block
        step - 4. get the statistics_info
        '''

        pdf_list = []
        for img_path in self.imgs_list:

            try:
                img = cv2.imread(img_path)
                bboxes, landmarks = self.model.detect_face(img, verbose=False)
                # save_name = 'result/r_%s' % img_path.split('/')[-1]
                # vis_utils.visual_face(img, bboxes, landmarks, save_name)
                print((img_path, bboxes.shape))
            except Exception as e:
                print(e)
            else:
                if bboxes.shape[0] == 0:
                    print('No face detected in %s' % img_path)
                    continue
                else:
                    bbox, landmark = self._fliter_doc_bbox(bboxes, landmarks)
                block = self._fetch_block(img, bbox, landmark)
                bgr_prob = self._statistics(block)
                pdf_list.append(bgr_prob.reshape(1, -1)[0])

        self.pdf_list = pdf_list
        date_stamp = self.args.csv_file.split('_')[-1].split('.')[0]
        save_name  = 'pdf/npy_data/%s_details_%s.npy' % (self.args.check_mode, date_stamp)
        print('npy_data was savd in %s' % save_name)
        np.save(save_name, pdf_list)
        print('hist-module was finished ...')


    def runner(self, vis = False):
        ''' Pipeline of HistOccBlock '''

        self._prepare_csv()
        self._hist_go()

Exemple #23

0

Afficher le fichier

Fichier : occ_utils.py Projet : Ontheway361/occlusion_detection

    def __init__(self, args):

        self.args      = args
        self.imgs_list = None
        self.model     = MtcnnDetector(self.args)
        self.pdf_list  = None

Exemple #24

0

Afficher le fichier

Fichier : test.py Projet : 666DZY666/Lightweight-facial-expression-recognition

import torch.nn.functional as F
from torch.autograd import Variable
import transforms as transform
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face
from models import vgg_prune
#from models import resnet_prune

use_cuda = False
cut_size = 46
fps1 = 0.0
fps2 = 0.0
v = 0.0000000001

pnet, rnet, onet = create_mtcnn_net(p_model_path="mtcnn_models/pnet.pt", r_model_path="mtcnn_models/rnet.pt", o_model_path="mtcnn_models/onet.pt", use_cuda=use_cuda)
mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size = 48, stride=2, threshold=[0.66, 0.7, 0.7], scale_factor=0.709)

#class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
class_names = ['just so so', 'just so so', 'just so so', 'good', 'common', 'just so so', 'common']

transform_test = transform.Compose([
    transform.TenCrop(cut_size),
    transform.Lambda(lambda crops: torch.stack([transform.ToTensor()(crop) for crop in crops])),
])

print('==>  ori_model ...')
net = vgg_prune.VGG()
checkpoint = torch.load('ori_models/fer_Pri_vgg16.pth')
net.load_state_dict(checkpoint['state_dict'])
#print('==>  ori_model ...')
#net = resnet_prune_1.resnet()

Exemple #25

0

Afficher le fichier

def main():
    eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"}
    point_nums = 24
    threshold = [0.6, 0.7, 0.7]
    data_trans = Transforms.Compose([
        # Transforms.Resize((24, 48)),
        Transforms.ToTensor(),
        Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)),
    ])
    mixnet = MixNet(input_size=(24, 48), num_classes=3)
    weight_dict = torch.load("weight/change_mix_data_0202/Mixnet_epoch_59.pth")
    new_state_dict = OrderedDict()
    for k, v in weight_dict.items():
        name = k[7:]
        new_state_dict[name] = v

    mixnet.load_state_dict(new_state_dict)
    # stat(net,(3,48,48))
    mixnet.to('cuda:0')
    mixnet.eval()

    pnet, rnet, onet = create_mtcnn_net(
        p_model_path=r'model_store/final/pnet_epoch_19.pt',
        r_model_path=r'model_store/final/rnet_epoch_7.pt',
        o_model_path=r'model_store/final/onet_epoch_92.pt',
        use_cuda=True)
    mtcnn_detector = MtcnnDetector(pnet=pnet,
                                   rnet=rnet,
                                   onet=onet,
                                   min_face_size=24,
                                   threshold=threshold)
    videos_root_path = 'test_video/20200522164730261_0.avi'
    save_path_root = 'result_video/20200522164730261_0.avi'

    cap = cv2.VideoCapture(videos_root_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = cap.get(cv2.CAP_PROP_FPS)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    out = cv2.VideoWriter(save_path_root, fourcc, fps, size)
    while True:
        ret, frame = cap.read()

        if ret:
            copy_frame = frame.copy()
            left_right_eye = []
            bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame,
                                                                    rgb=True)

            if landmarks is not None:
                for i in range(landmarks.shape[0]):
                    landmarks_one = landmarks[i, :]
                    landmarks_one = landmarks_one.reshape((point_nums, 2))
                    left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :])
                    xmin = np.min(left_eye[:, 0])
                    ymin = np.min(left_eye[:, 1])
                    xmax = np.max(left_eye[:, 0])
                    ymax = np.max(left_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])
                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)

                    right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :])
                    xmin = np.min(right_eye[:, 0])
                    ymin = np.min(right_eye[:, 1])
                    xmax = np.max(right_eye[:, 0])
                    ymax = np.max(right_eye[:, 1])
                    left_right_eye.append([xmin, ymin, xmax, ymax])
                    # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2)
                    for j in [*left_eye, *right_eye]:
                        cv2.circle(frame, (int(j[0]), int(j[1])), 2,
                                   (255, 0, 0), -1)

                crop_img = []
                for xmin, ymin, xmax, ymax in left_right_eye:
                    w, h = xmax - xmin, ymax - ymin
                    # 随机扩展大小0.05-0.15
                    k = 0.1
                    ratio = h / w
                    if ratio > 1:
                        ratio = ratio - 1
                        xmin -= (ratio / 2 * w + k * h)
                        ymin -= (k * h)
                        xmax += (ratio / 2 * w + k * h)
                        ymax += (k * h)

                    else:
                        ratio = w / h - 1
                        xmin -= (k * w)
                        ymin -= (ratio / 2 * h + k * w)
                        xmax += (k * w)
                        ymax += (ratio / 2 * h + k * w)
                    cv2.rectangle(frame, (int(xmin), int(ymin)),
                                  (int(xmax), int(ymax)), (0, 255, 255), 2)
                    temp_img = copy_frame[int(ymin):int(ymax),
                                          int(xmin):int(xmax)]
                    temp_img = cv2.resize(temp_img, (24, 24))
                    crop_img.append(temp_img)
                if len(crop_img) < 2:
                    out.write(frame)
                    continue
                compose_img = np.hstack((crop_img[0], crop_img[1]))
                compose_img = cv2.cvtColor(compose_img, cv2.COLOR_BGR2RGB)

                compose_img = Image.fromarray(compose_img)
                img = data_trans(compose_img)
                img = img.unsqueeze(0)
                with torch.no_grad():
                    outputs = mixnet(img.to('cuda:0'))
                    result = torch.max(outputs, 1)[1]
                    eye_class = eye_class_dict[result.item()]
                cv2.putText(frame,eye_class,(0,20),cv2.FONT_HERSHEY_COMPLEX,1.3,(255,0,255) \
                    if result.item() == 0 else (255,255,0),thickness=2)
            out.write(frame)
        else:
            print("finish")
            break