def recog_cap(self, image):
        image_size = self.args.image_size

        #img = cv2.resize(images,(image_size,image_size))

        #images = facenet_ext.load_data(images_path, False, False, 160)
        images = facenet_ext.load_data_im(image, False, False, 160)
        if len(images.shape) == 3:
            images = np.expand_dims(images, axis=0)

        feed_dict = {
            self.phase_train_placeholder: False,
            self.phase_train_placeholder_expression: False,
            self.images_placeholder: images,
            self.keep_probability_placeholder: 1.0
        }

        logits_array = self.sess.run([self.logits], feed_dict=feed_dict)

        logits0 = logits_array[0]
        cap_probs = np.exp(logits0) / np.sum(np.exp(logits0))
        iscap = np.argmax(logits0)
        cap_prob = cap_probs[0][iscap]

        return iscap, cap_prob
Exemple #2
0
    def recog_hand(self, imgs):
        #image_size = self.args.image_size

        #img = cv2.resize(images,(image_size,image_size))

        #images = facenet_ext.load_data(images_path, False, False, 160)
        images = facenet_ext.load_data_im(imgs, False, False, 160)
        if len(images.shape) == 3:
            images = np.expand_dims(images, axis=0)

        feed_dict = {self.phase_train_placeholder: False, self.phase_train_placeholder_expression: False,
                     self.images_placeholder: images, self.keep_probability_placeholder: 1.0}

        logits_ = self.sess.run([self.logits], feed_dict=feed_dict)

        # logits0 = logits_array[0]
        # hand_probs = np.exp(logits0) / np.sum(np.exp(logits0))
        # IDs = np.argmax(logits0)
        # probs = hand_probs[0][IDs]
        logits_array = np.array(logits_)
        logits_array = np.squeeze(logits_array, 0)
        exp_logit = np.exp(logits_array)
        imgs_sf_denominator = np.sum(exp_logit, 1)
        imgs_sf = exp_logit/imgs_sf_denominator
        IDs = np.argmax(imgs_sf, 1)
        probs = np.max(imgs_sf, 1)

        return IDs, probs
def face_embeddings(img_refs_, args, sess, args_model, Expr_dataset):

    # Load images
    image_size = args.image_size

    images = facenet_ext.load_data_im(img_refs_, False, False, image_size)
    if len(images.shape)==3:
        images = np.expand_dims(images, axis=0)

    if Expr_dataset == 'CK+' or 'FER2013':
        feed_dict = {args_model.phase_train_placeholder: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0}

    t2 = time.time()
    emb_array = sess.run([args_model.embeddings], feed_dict=feed_dict)
    t3 = time.time()
    print('Embedding calculation FPS:%d' % (int(1 / (t3 - t2))))

    return emb_array
def face_expression_multiref_forward(face_img_, emb_ref, args, sess, args_model, Expr_dataset):

    nrof_imgs = 1
    imgs = np.zeros((nrof_imgs, args.image_size, args.image_size, 3))
    imgs[0, :, :, :]=face_img_

    # Load images
    image_size = args.image_size

    images = facenet_ext.load_data_im(imgs, False, False, image_size)
    if len(images.shape) == 3:
        images = np.expand_dims(images,axis=0)

    if Expr_dataset == 'CK+':
        feed_dict = {args_model.phase_train_placeholder: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0}
    if Expr_dataset == 'FER2013':
        feed_dict = {args_model.phase_train_placeholder: False, args_model.phase_train_placeholder_expression: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0}

    t2 = time.time()
    emb_array, logits_array = sess.run([args_model.embeddings, args_model.logits], feed_dict=feed_dict)
    #emb_array = sess.run([args_model.embeddings], feed_dict=feed_dict)

    t3 = time.time()
    print('Embedding calculation FPS:%d' % (int(1 / (t3 - t2))))
    embeddings1 = emb_array[0]
    embeddings2 = emb_ref[0]


    # Caculate the distance of embeddings and verification the two face
    assert (embeddings1.shape[0] == embeddings2[0].shape[0])
    diff = np.subtract(embeddings1, embeddings2)
    if len(diff.shape)==2:
        dist = np.sum(np.square(diff), 1)
    elif len(diff.shape)==1:
        dist = np.sum(np.square(diff), 0)
    else:
        raise ValueError("Dimension of the embeddings2 is not correct!")


    predict_issame = np.less(dist, args.threshold)

    logits0 = logits_array[0]
    express_probs = np.exp(logits0)/sum(np.exp(logits0))
    return predict_issame, dist, express_probs
def verification_test(args):

    rect_len = 120
    offset_x = 50

    Expr_str = [
        'Neutre', 'Colere', 'Degoute', 'Peur', 'Content', 'Triste', 'Surprise'
    ]  #####FER2013+ EXPRSSIONS_TYPE_fusion
    Expr_dataset = 'FER2013'

    c_red = (0, 0, 255)
    c_green = (0, 255, 0)
    font = cv2.FONT_HERSHEY_SIMPLEX

    scale_size = 3  ## scale the original image as the input image to align the face

    ## load models for the face detection and verfication
    pnet, rnet, onet, sess, args_model = face_verification_verif.load_models_forward_v2(
        args, Expr_dataset)

    face_img_refs_ = []
    img_ref_paths = []
    for img_ref_path in os.listdir(args.img_ref):
        img_ref_paths.append(img_ref_path)
        img_ref = misc.imread(os.path.join(args.img_ref,
                                           img_ref_path))  # python format
        img_size = img_ref.shape[0:2]

        bb, probs = align.face_align_mtcnn.align_mtcnn_realplay(
            img_ref, pnet, rnet, onet)
        if (bb == []):
            continue

        bb_face = []
        probs_face = []
        for i, prob in enumerate(probs):
            if prob > args.face_detect_threshold:
                bb_face.append(bb[i])
                probs_face.append(prob)

        bb = np.asarray(bb_face)

        det = bb

        if det.shape[0] > 1:
            bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                           det[:, 1])
            img_center = np.array(img_size) / 2
            offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                                 (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
            index = np.argmax(bounding_box_size - offset_dist_squared *
                              2.0)  # some extra weight on the centering
            det = det[index, :]

        det = np.squeeze(det)
        x0 = det[0]
        y0 = det[1]

        bb_tmp = np.zeros(4, dtype=np.int32)
        bb_tmp[0] = np.maximum(det[0] - args.margin / 2, 0)
        bb_tmp[1] = np.maximum(det[1] - args.margin / 2, 0)
        bb_tmp[2] = np.minimum(det[2] + args.margin / 2, img_size[1])
        bb_tmp[3] = np.minimum(det[3] + args.margin / 2, img_size[0])

        face_img_ref = img_ref[bb_tmp[1]:bb_tmp[3], bb_tmp[0]:bb_tmp[2], :]
        face_img_ref = misc.imresize(face_img_ref,
                                     (args.image_size, args.image_size),
                                     interp='bilinear')
        face_img_ref_ = facenet_ext.load_data_im(face_img_ref, False, False,
                                                 args.image_size)
        face_img_refs_.append(face_img_ref_)

        img_ref_cv = cv2.cvtColor(img_ref, cv2.COLOR_BGR2RGB)
        cv2.rectangle(img_ref_cv, (int(det[0]), int(det[1])),
                      (int(det[2]), int(det[3])), c_red, 2, 8, 0)
        img_ref_name = img_ref_path.split('.')[0]
        cv2.putText(img_ref_cv, "%s" % img_ref_name, (int(x0), int(y0 - 10)),
                    font, 1, c_red, 2)
        cv2.imshow('%s' % img_ref_path, img_ref_cv)
        cv2.waitKey(20)

    face_img_refs_ = np.array(face_img_refs_)

    emb_ref = face_verification_verif.face_embeddings(face_img_refs_, args,
                                                      sess, args_model)

    ################ capture the camera for realplay #############################################
    if args.video == '0':
        video = 0
    else:
        video = args.video
    cap = cv2.VideoCapture(video)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 800)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600)

    realplay_window = "Realplay"
    cv2.namedWindow(realplay_window, cv2.WINDOW_NORMAL)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output.avi', fourcc, 20.0, (800, 600))

    while (cap.isOpened()):
        if cv2.getWindowProperty(realplay_window, cv2.WINDOW_NORMAL) < 0:
            return
        # Capture frame-by-frame
        ret, frame = cap.read()

        if ret == False:
            break

        cv2_im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        im_np = cv2_im
        img_size = im_np.shape[0:2]
        im_np_scale = cv2.resize(
            im_np,
            (int(img_size[1] / scale_size), int(img_size[0] / scale_size)),
            interpolation=cv2.INTER_LINEAR)
        bb, probs = align.face_align_mtcnn.align_mtcnn_realplay(
            im_np_scale, pnet, rnet, onet)

        bb_face = []
        probs_face = []
        for i, prob in enumerate(probs):
            if prob > args.face_detect_threshold:
                bb_face.append(bb[i])
                probs_face.append(prob)

        bb = np.asarray(bb_face)
        probs = np.asarray(probs_face)

        bb = bb * scale_size  #re_scale of the scaled image for align_face

        if (len(bb) > 0):
            for i in range(bb.shape[0]):
                prob = probs[i]
                det = bb[i]
                bb_tmp = np.zeros(4, dtype=np.int32)
                bb_tmp[0] = np.maximum(det[0] - args.margin / 2, 0)
                bb_tmp[1] = np.maximum(det[1] - args.margin / 2, 0)
                bb_tmp[2] = np.minimum(det[2] + args.margin / 2, img_size[1])
                bb_tmp[3] = np.minimum(det[3] + args.margin / 2, img_size[0])

                face_img = im_np[bb_tmp[1]:bb_tmp[3], bb_tmp[0]:bb_tmp[2], :]
                face_img_ = misc.imresize(face_img,
                                          (args.image_size, args.image_size),
                                          interp='bilinear')
                face_img_ = facenet_ext.load_data_im(face_img_, False, False,
                                                     args.image_size)

                #########
                x0 = bb[i][0]
                y0 = bb[i][1]
                x1 = bb[i][2]
                y1 = bb[i][3]
                offset_y = int((y1 - y0) / 7)

                # face experssion
                ##### 0=neutral, 1=anger, 2=contempt, 3=disgust, 4=fear, 5=happy, 6=sadness, 7=surprise   ############
                t2 = time.time()
                predict_issames, dists = face_verification_verif.face_expression_multiref_forward(
                    face_img_, emb_ref, args, sess, args_model)
                t3 = time.time()

                print('face verif FPS:%d' % (int(1 / ((t3 - t2)))))

                predict_issame_idxes = [
                    i for i, predict_issame in enumerate(predict_issames)
                    if predict_issame == True
                ]

                if len(predict_issame_idxes) > 1:
                    predict_issame_idx = np.argmin(dists)
                elif len(predict_issame_idxes) == 1:
                    predict_issame_idx = predict_issame_idxes[0]

                if len(predict_issame_idxes):
                    i = predict_issame_idx
                    dist = dists[i]
                    img_ref_name = img_ref_paths[i].split('.')[0]

                    cv2.rectangle(frame, (int(x0), int(y0)),
                                  (int(x1), int(y1)), c_green, 2, 8, 0)
                    cv2.putText(frame, "%.4f" % prob, (int(x0), int(y0)), font,
                                0.5, c_green, 1)
                    cv2.putText(frame, "%.2f" % dist, (int(x1), int(y1)), font,
                                0.5, c_green, 1)
                    cv2.putText(frame, "%s" % img_ref_name, (int(
                        (x1 + x0) / 2), int(y0 - 10)), font, 1, c_green, 2)

                else:
                    dist = min(dists)
                    cv2.rectangle(frame, (int(x0), int(y0)),
                                  (int(x1), int(y1)), c_red, 2, 8, 0)
                    cv2.putText(frame, "%.4f" % prob, (int(x0), int(y0)), font,
                                0.5, c_red, 1)
                    cv2.putText(frame, "%.2f" % dist, (int(x1), int(y1)), font,
                                0.5, c_red, 1)

        # visualation
        out.write(frame)
        cv2.imshow(realplay_window, frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    return