Beispiel #1
0
def videoVivoRec():
    CHUNK = 1024

    dir_voces = './voces/'
    nombres_hablados = []
    # angelica = wave.open(f'{dir_voces}/angelica.wav', 'rb')
    # angelica = wave.open(f'{dir_voces}/angelica.wav', 'rb')
    # voces = []

    modeldir = './modelo/modelo_preentrenado_caras.pb'
    classifier_filename = './clase/clasificador.pkl'
    npy = './npy'
    train_img = "./imagenes_entrenamiento"

    def speech(wf=''):
        p = pyaudio.PyAudio()

        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True)

        data = wf.readframes(CHUNK)

        while len(data) > 0:
            stream.write(data)
            data = wf.readframes(CHUNK)

        stream.stop_stream()
        stream.close()

        p.terminate()

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

            minsize = 20  # tamaño mínimo de la cara
            threshold = [0.6, 0.7, 0.7]  # umbral de tres pasos
            factor = 0.709  # factor de escala
            margin = 44
            frame_interval = 3
            batch_size = 1000
            image_size = 182
            input_image_size = 160

            HumanNames = os.listdir(train_img)
            HumanNames.sort()

            print('Cargando modelo')
            facenet.load_model(modeldir)
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)

            video_capture = cv2.VideoCapture(0)  # cambiado a canal 1 (celular)
            c = 0

            print('Comenzando Reconocimiento :D!')
            prevTime = 0
            while True:
                ret, frame = video_capture.read()

                frame = cv2.resize(frame, (0, 0), fx=1,
                                   fy=1)  # redimensionar frame (opcional)

                curTime = time.time() + 1  # calculando fps
                timeF = frame_interval

                if (c % timeF == 0):
                    find_results = []

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detect_face.detect_face(
                        frame, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    print('Número de caras detectadas: %d' % nrof_faces)

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # Excepción interna
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][
                                    2] >= len(
                                        frame[0]) or bb[i][3] >= len(frame):
                                print('La cara esta muy cerca!')
                                continue

                            cropped.append(frame[bb[i][1]:bb[i][3],
                                                 bb[i][0]:bb[i][2], :])
                            cropped[i] = facenet.flip(cropped[i], False)
                            scaled.append(
                                misc.imresize(cropped[i],
                                              (image_size, image_size),
                                              interp='bilinear'))
                            scaled[i] = cv2.resize(
                                scaled[i],
                                (input_image_size, input_image_size),
                                interpolation=cv2.INTER_CUBIC)
                            scaled[i] = facenet.prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(
                                -1, input_image_size, input_image_size, 3))
                            feed_dict = {
                                images_placeholder: scaled_reshape[i],
                                phase_train_placeholder: False
                            }
                            emb_array[0, :] = sess.run(embeddings,
                                                       feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)
                            print(predictions)
                            best_class_indices = np.argmax(predictions, axis=1)
                            best_class_probabilities = predictions[
                                np.arange(len(best_class_indices)),
                                best_class_indices]
                            # print("predicciones")
                            print(best_class_indices, ' con una precisión de ',
                                  best_class_probabilities)

                            # print(best_class_probabilities)
                            if best_class_probabilities > 0.53:
                                cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                              (bb[i][2], bb[i][3]),
                                              (66, 153, 236),
                                              1)  # encajando cara

                                # plotear resulto idx debajo de la caja
                                text_x = bb[i][0]
                                text_y = bb[i][3] + 20
                                prob_x = bb[i][0] + 15
                                prob_y = bb[i][1] - 10
                                print('Índices de resultados: ',
                                      best_class_indices[0])
                                print(HumanNames)
                                for H_i in HumanNames:
                                    if HumanNames[
                                            best_class_indices[0]] == H_i:
                                        result_names = HumanNames[
                                            best_class_indices[0]]
                                        dec = np.round(
                                            best_class_probabilities, 4)
                                        cv2.putText(frame,
                                                    str(dec), (prob_x, prob_y),
                                                    cv2.FONT_ITALIC,
                                                    0.5, (30, 103, 202),
                                                    thickness=1,
                                                    lineType=1)
                                        cv2.putText(frame,
                                                    result_names,
                                                    (text_x, text_y),
                                                    cv2.FONT_HERSHEY_DUPLEX,
                                                    0.7, (8, 6, 98),
                                                    thickness=1,
                                                    lineType=0)
                                        wf = wave.open(
                                            f'{dir_voces}/{result_names}.wav',
                                            'rb')
                                        # print('longitud: ' , len(nombres_hablados))
                                        if result_names not in nombres_hablados:
                                            nombres_hablados.append(
                                                result_names)
                                            speech(wf)
                                            print('hola')
                    else:
                        print('Fallo de alineación')
                # c+=1
                marco_display = cv2.resize(frame, (1200, 650),
                                           interpolation=cv2.INTER_CUBIC)
                cv2.imshow('Detectando rostros en vivo..', marco_display)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            cv2.destroyAllWindows()
def video_svm():
    #input_video="akshay_mov.mp4"
    modeldir = './modelo_transferlearning/20170511-185253.pb'
    classifier_filename = './resultados/classifier2.pkl'
    npy = './npy'
    train_img = './static/photos'

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            frame_interval = 3
            batch_size = 1000
            image_size = 182
            input_image_size = 160

            HumanNames = os.listdir(train_img)
            HumanNames.sort()

            print('Loading Modal')
            facenet.load_model(modeldir)
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)

            video_capture = cv2.VideoCapture(0)
            c = 0

            print('Comieza el reconocimiento')
            prevTime = 0
            while True:
                ret, frame = video_capture.read()

                #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

                curTime = time.time() + 1  # calc fps
                timeF = frame_interval

                if (c % timeF == 0):
                    find_results = []

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detect_face.detect_face(
                        frame, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    print('Nro de Caras detectadas: %d' % nrof_faces)

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][
                                    2] >= len(
                                        frame[0]) or bb[i][3] >= len(frame):
                                print('Face is very close!')
                                continue
                            if (i > len(cropped)):
                                print('Running')
                                break
                            else:
                                cropped.append(frame[bb[i][1]:bb[i][3],
                                                     bb[i][0]:bb[i][2], :])
                                cropped[i] = facenet.flip(cropped[i], False)
                                scaled.append(
                                    misc.imresize(cropped[i],
                                                  (image_size, image_size),
                                                  interp='bilinear'))
                                scaled[i] = cv2.resize(
                                    scaled[i],
                                    (input_image_size, input_image_size),
                                    interpolation=cv2.INTER_CUBIC)
                                scaled[i] = facenet.prewhiten(scaled[i])
                                scaled_reshape.append(scaled[i].reshape(
                                    -1, input_image_size, input_image_size, 3))
                                feed_dict = {
                                    images_placeholder: scaled_reshape[i],
                                    phase_train_placeholder: False
                                }
                                emb_array[0, :] = sess.run(embeddings,
                                                           feed_dict=feed_dict)
                                predictions = model.predict_proba(emb_array)
                                print(predictions)
                                best_class_indices = np.argmax(predictions,
                                                               axis=1)
                                best_class_probabilities = predictions[
                                    np.arange(len(best_class_indices)),
                                    best_class_indices]
                                # print("predictions")
                                print(best_class_indices, ' with accuracy ',
                                      best_class_probabilities)

                                # print(best_class_probabilities)
                                #if best_class_probabilities[0]:
                                cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                              (bb[i][2], bb[i][3]),
                                              (0, 255, 0), 2)  #boxing face

                                #plot result idx under box
                                text_x = bb[i][0]
                                text_y = bb[i][3] + 20
                                print('Resultado Indice Vector: ',
                                      best_class_indices[0])
                                print(HumanNames)
                                for H_i in HumanNames:
                                    if HumanNames[
                                            best_class_indices[0]] == H_i:
                                        result_names = HumanNames[
                                            best_class_indices[0]]
                                        #text = "{:.2f}%".format(best_class_probabilities*100)
                                        text = '{}: {:.2f}%'.format(
                                            result_names,
                                            best_class_probabilities[0] * 100)
                                        cv2.putText(
                                            frame,
                                            text, (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1, (0, 0, 255),
                                            thickness=1,
                                            lineType=2)
                                #else:
                                #    cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face
                                #plot result idx under box
                                #    text_x = bb[i][0]
                                #    text_y = bb[i][3] + 20
                                #    print('Resultado Indice Vector: ', best_class_indices[0])
                                #    print(HumanNames)
                                #    for H_i in HumanNames:
                                #        if HumanNames[best_class_indices[0]] == H_i:
                                #            result_names = HumanNames[best_class_indices[0]]
                                #            cv2.putText(frame, 'Desconocido', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                #                        1, (0, 0, 255), thickness=1, lineType=2)

                    else:
                        print('Alignment Failure')
                # c+=1
                cv2.imshow('Presione la tecla "q" para cerrar ', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            cv2.destroyAllWindows()
            return render_template('index.html')
Beispiel #3
0
def main(args):

    videoLink = args.video_link
    print('Creating networks and loading parameters')
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            frame_interval = args.frame_interval
            batch_size = 1000
            image_size = 182
            input_image_size = 160
            max_age = args.max_age

            print('Loading feature extraction model')
            modeldir = args.modeldir
            debug = args.debug
            print("Debug: ", debug)
            if debug == 'True':
                debug = True
            else:
                debug = False
            if debug:
                print("videoLink: ", args.video_link)
            facenet.load_model(modeldir)

            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename = args.classifier_filename

            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)
                print('load classifier file-> %s' % classifier_filename_exp)

            # video_capture = cv2.VideoCapture(0) #webcam
            video_capture = cv2.VideoCapture(args.video_link)
            c = 0
            fid = 0
            faces = []
            target_distance = args.target_distance

            print('Start Recognition!')
            prevTime = 0
            while True:
                ret, frame = video_capture.read()

                frame = cv2.resize(frame, (0, 0), fx=0.5,
                                   fy=0.5)  # resize frame (optional)

                curTime = time.time() + 1  # calc fps
                timeF = frame_interval
                new = True
                show = False
                for i in faces:
                    i.age_one()
                if (c % timeF == 0):

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detect_face.detect_face(
                        frame, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][
                                    2] >= len(
                                        frame[0]) or bb[i][3] >= len(frame):
                                if debug:
                                    print('face is inner of range!')
                                continue

                            cropped.append(frame[bb[i][1]:bb[i][3],
                                                 bb[i][0]:bb[i][2], :])
                            try:
                                cropped[i] = facenet.flip(cropped[i], False)
                            except:
                                continue
                            if debug:
                                print('Processing Status: PROCESSING FRAME')
                            scaled.append(
                                misc.imresize(cropped[i],
                                              (image_size, image_size),
                                              interp='bilinear'))
                            scaled[i] = cv2.resize(
                                scaled[i],
                                (input_image_size, input_image_size),
                                interpolation=cv2.INTER_CUBIC)
                            scaled[i] = facenet.prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(
                                -1, input_image_size, input_image_size, 3))
                            feed_dict = {
                                images_placeholder: scaled_reshape[i],
                                phase_train_placeholder: False
                            }
                            emb_array[0, :] = sess.run(embeddings,
                                                       feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)

                            best_class_indices = np.argmax(predictions, axis=1)
                            best_class_probabilities = predictions[
                                np.arange(len(best_class_indices)),
                                best_class_indices]

                            # plot result idx under box
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20
                            if debug:
                                print('frame_interval: ', frame_interval)
                            # track faces
                            result_names = class_names[best_class_indices[0]]

                            for k in faces:
                                # print(best_class_probabilities[0])
                                if abs(bb[i][0]-k.getX()) <= target_distance\
                                        and abs(bb[i][1] - k.getY())\
                                        <= target_distance and k.getDone() is False:
                                    if debug:
                                        print(k.getAge(), 'X Diff: ',
                                              abs(bb[i][0] - k.getX()),
                                              'Y Diff: ',
                                              abs(bb[i][1] - k.getY()))
                                    new = False
                                    if best_class_probabilities[0] > 0.20:
                                        k.updateCoords(bb[i][0], bb[i][1])
                                        k.updateConfidence(
                                            best_class_probabilities[0])
                                        result_names = class_names[
                                            best_class_indices[0]]
                                        k.updateStaffID(
                                            result_names.split(' ')[0])
                                        k.updateName(
                                            result_names.split(' ')[1])

                                    if k.getAge() > 1:
                                        show = True

                                    color = k.getRGB()
                                    counter = Counter(k.getName())
                                    most_common = counter.most_common()
                                    if debug:
                                        print('Show: ', show)
                                        print(most_common)

                                    if show:
                                        if len(most_common) >= 2:
                                            f_n, f_v = most_common[0]
                                            s_n, s_v = most_common[1]
                                            if f_n != 'Unk':
                                                name_to_show = f_n
                                                # name_to_show = name_mode
                                            else:
                                                name_to_show = s_n
                                        if len(most_common) == 1:
                                            f_n, f_v = most_common[0]
                                            name_to_show = f_n
                                    # print(name_to_show)
                            if new:
                                f = Face.MyFace(fid, bb[i][0], bb[i][1],
                                                max_age)
                                f.updateConfidence(best_class_probabilities[0])
                                result_names = class_names[
                                    best_class_indices[0]]
                                f.updateStaffID(result_names.split(' ')[0])
                                name = result_names.split(' ')[1]
                                f.updateName(name)
                                color = f.getRGB()
                                faces.append(f)
                                fid += 1
                                name_to_show = ''

                            cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                          (bb[i][2], bb[i][3]), color,
                                          2)  # boxing face
                            if name_to_show == 'Unk':
                                name_to_show = 'Unknown'
                            if debug:
                                print('Detected As: ', name_to_show)
                            cv2.putText(frame,
                                        name_to_show, (text_x, text_y),
                                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                        1,
                                        color,
                                        thickness=1,
                                        lineType=2)
                    else:
                        if debug:
                            print('Unable to align')
                else:
                    if debug:
                        print('Processing Status: NOT PROCESSING FRAME')
                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detect_face.detect_face(
                        frame, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][
                                    2] >= len(
                                        frame[0]) or bb[i][3] >= len(frame):
                                if debug:
                                    print('face is inner of range!')
                                continue
                            for k in faces:
                                # print(best_class_probabilities[0])
                                if abs(bb[i][0]-k.getX()) <= target_distance\
                                        and abs(bb[i][1] - k.getY())\
                                        <= target_distance and k.getDone() is False:
                                    if debug:
                                        print(k.getAge(), 'X Diff: ',
                                              abs(bb[i][0] - k.getX()),
                                              'Y Diff: ',
                                              abs(bb[i][1] - k.getY()))
                                    if k.getAge() > 1:
                                        show = True

                                    color = k.getRGB()
                                    counter = Counter(k.getName())
                                    most_common = counter.most_common()
                                    text_x = bb[i][0]
                                    text_y = bb[i][3] + 20
                                    if debug:
                                        print('Show: ', show)
                                        print(most_common)

                                    if show:
                                        if len(most_common) >= 2:
                                            f_n, f_v = most_common[0]
                                            s_n, s_v = most_common[1]
                                            if f_n != 'Unk':
                                                name_to_show = f_n
                                                # name_to_show = name_mode
                                            else:
                                                name_to_show = s_n
                                        elif len(most_common) == 1:
                                            f_n, f_v = most_common[0]
                                            name_to_show = f_n
                                        else:
                                            name_to_show = 'Unknown'

                                        cv2.rectangle(frame,
                                                      (bb[i][0], bb[i][1]),
                                                      (bb[i][2], bb[i][3]),
                                                      color, 2)  # boxing face
                                        if name_to_show == 'Unk':
                                            name_to_show = 'Unknown'
                                        if debug:
                                            print('Detected As: ',
                                                  name_to_show)
                                        cv2.putText(
                                            frame,
                                            name_to_show, (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1,
                                            color,
                                            thickness=1,
                                            lineType=2)
                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                str = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame,
                            str, (text_fps_x, text_fps_y),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                            1, (0, 0, 0),
                            thickness=1,
                            lineType=2)
                c += 1
                if frame.shape[0] < 1000:
                    frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
                cv2.imshow('Video', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            # #video writer
            # out.release()
            cv2.destroyAllWindows()
def Recognize(idList):
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
            minsize = 30  # minimum size of face
            threshold = [0.7, 0.8, 0.8]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            batch_size = 100  #1000
            image_size = 182
            input_image_size = 160
            HumanNames = os.listdir(train_img)
            HumanNames.sort()
            print('Loading Model')
            facenet.load_model(modeldir)
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile, encoding='latin1')

            video_capture = cv2.VideoCapture(video)
            print('Start Recognition')
            while True:
                ret, frame = video_capture.read()
                #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
                timer = time.time()
                if frame.ndim == 2:
                    frame = facenet.to_rgb(frame)
                bounding_boxes, _ = detect_face.detect_face(
                    frame, minsize, pnet, rnet, onet, threshold, factor)
                faceNum = bounding_boxes.shape[0]
                if faceNum > 0:
                    det = bounding_boxes[:, 0:4]
                    img_size = np.asarray(frame.shape)[0:2]
                    cropped = []
                    scaled = []
                    scaled_reshape = []
                    for i in range(faceNum):
                        emb_array = np.zeros((1, embedding_size))
                        xmin = int(det[i][0])
                        ymin = int(det[i][1])
                        xmax = int(det[i][2])
                        ymax = int(det[i][3])
                        try:
                            # inner exception
                            if xmin <= 0 or ymin <= 0 or xmax >= len(
                                    frame[0]) or ymax >= len(frame):
                                print('Face is very close!')
                                continue
                            cropped.append(frame[ymin:ymax, xmin:xmax, :])
                            cropped[i] = facenet.flip(cropped[i], False)
                            scaled.append(
                                np.array(
                                    Image.fromarray(cropped[i]).resize(
                                        (image_size, image_size))))
                            scaled[i] = cv2.resize(
                                scaled[i],
                                (input_image_size, input_image_size),
                                interpolation=cv2.INTER_CUBIC)
                            scaled[i] = facenet.prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(
                                -1, input_image_size, input_image_size, 3))
                            feed_dict = {
                                images_placeholder: scaled_reshape[i],
                                phase_train_placeholder: False
                            }
                            emb_array[0, :] = sess.run(embeddings,
                                                       feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)
                            best_class_indices = np.argmax(predictions, axis=1)
                            best_class_probabilities = predictions[
                                np.arange(len(best_class_indices)),
                                best_class_indices]
                            if best_class_probabilities > 0.87:
                                cv2.rectangle(frame, (xmin, ymin),
                                              (xmax, ymax), (0, 255, 0),
                                              2)  #boxing face
                                for H_i in HumanNames:
                                    if HumanNames[
                                            best_class_indices[0]] == H_i:
                                        result_ids = HumanNames[
                                            best_class_indices[0]]

                                        result_names = "?"
                                        profile = GetProfile(result_ids)
                                        if (profile != None):
                                            result_names = profile[1]
                                            if int(result_ids) not in idList:
                                                idList.append(int(result_ids))

                                        print(
                                            "Predictions : [ name: {} , accuracy: {:.3f} ]"
                                            .format(
                                                HumanNames[
                                                    best_class_indices[0]],
                                                best_class_probabilities[0]))
                                        cv2.rectangle(frame, (xmin, ymin - 20),
                                                      (xmax, ymin - 2),
                                                      (0, 255, 255), -1)
                                        cv2.putText(
                                            frame,
                                            result_names, (xmin, ymin - 5),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1, (0, 0, 0),
                                            thickness=1,
                                            lineType=1)

                            else:
                                cv2.rectangle(frame, (xmin, ymin),
                                              (xmax, ymax), (0, 255, 0), 2)
                                cv2.rectangle(frame, (xmin, ymin - 20),
                                              (xmax, ymin - 2), (0, 255, 255),
                                              -1)
                                cv2.putText(frame,
                                            "?", (xmin, ymin - 5),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1, (0, 0, 0),
                                            thickness=1,
                                            lineType=1)
                        except:

                            print("error")

                endtimer = time.time()
                fps = 1 / (endtimer - timer)
                cv2.rectangle(frame, (15, 30), (135, 60), (0, 255, 255), -1)
                cv2.putText(frame, "fps: {:.2f}".format(fps), (20, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
                cv2.imshow('Face Recognition', frame)
                key = cv2.waitKey(1)
                if key == 113:  # "q"
                    break
            video_capture.release()
            cv2.destroyAllWindows()
def predict(img_path):
    global sess, pnet, rnet, onet, embedding_size, images_placeholder, embeddings, model, HumanNames, phase_train_placeholder

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    margin = 44
    frame_interval = 3
    batch_size = 1000
    image_size = 182
    input_image_size = 160
    c = 0
    print('Start Recognition!')
    prevTime = 0
    # ret, frame = video_capture.read()
    frame = cv2.imread(img_path)
    frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)  #resize frame (optional)

    curTime = time.time() + 1  # calc fps
    timeF = frame_interval

    if (c % timeF == 0):
        find_results = []

        if frame.ndim == 2:
            frame = facenet.to_rgb(frame)
        frame = frame[:, :, 0:3]
        bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet,
                                                    onet, threshold, factor)
        nrof_faces = bounding_boxes.shape[0]  # số detec
        print('Face Detected: %d' % nrof_faces)

        if nrof_faces > 0:
            det = bounding_boxes[:, 0:4]
            img_size = np.asarray(frame.shape)[0:2]

            cropped = []
            scaled = []
            scaled_reshape = []
            bb = np.zeros((nrof_faces, 4), dtype=np.int32)

            for i in range(nrof_faces):
                emb_array = np.zeros((1, embedding_size))

                bb[i][0] = det[i][0]
                bb[i][1] = det[i][1]
                bb[i][2] = det[i][2]
                bb[i][3] = det[i][3]

                # inner exception
                if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                        frame[0]) or bb[i][3] >= len(frame):
                    print('face is too close')
                    continue

                cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                cropped[i] = facenet.flip(cropped[i], False)
                scaled.append(
                    misc.imresize(cropped[i], (image_size, image_size),
                                  interp='bilinear'))
                scaled[i] = cv2.resize(scaled[i],
                                       (input_image_size, input_image_size),
                                       interpolation=cv2.INTER_CUBIC)
                scaled[i] = facenet.prewhiten(scaled[i])
                scaled_reshape.append(scaled[i].reshape(
                    -1, input_image_size, input_image_size, 3))
                feed_dict = {
                    images_placeholder: scaled_reshape[i],
                    phase_train_placeholder: False
                }
                emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                predictions = model.predict_proba(emb_array)

                best_class_indices = np.argmax(predictions, axis=1)
                print(HumanNames[best_class_indices[0]])
                # print(best_class_indices)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]
                cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                              (bb[i][2], bb[i][3]), (0, 255, 0),
                              2)  #boxing face
                #plot result idx under box
                text_x = bb[i][0]
                text_y = bb[i][3] + 20
                print('Result Indices: ', best_class_indices[0])
                for H_i in HumanNames:
                    if HumanNames[best_class_indices[0]] == H_i:
                        result_names = HumanNames[best_class_indices[0]]
                        cv2.putText(frame,
                                    result_names, (text_x, text_y),
                                    cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                    1, (0, 0, 255),
                                    thickness=1,
                                    lineType=2)
        else:
            print('Unable to align')
    cv2.imshow('Image', frame)

    if cv2.waitKey(1000000) & 0xFF == ord('q'):
        #sys.exit("Thanks")
        cv2.destroyAllWindows()
def _main():

    args = get_args()
    

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            # pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/')

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            margin = 44
            frame_interval = 3
            batch_size = 1000
            image_size = 182
            input_image_size = 160

            print('Loading feature extraction model')
            modeldir = './models/facenet/20190310-055158'
            facenet.load_model(modeldir)

            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            classifier_filename = './myclassifier/my_classifier.pkl'
            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)
                print('load classifier file-> %s' % classifier_filename_exp)

            video_capture = cv2.VideoCapture(0)
            c = 0

            print('Start Recognition!')
            prevTime = 0
            myYolo = YOLO(args)
            while True:
                ret, frame = video_capture.read()

                # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

                curTime = time.time()    # calc fps
                timeF = frame_interval

                if (c % timeF == 0):
                    find_results = []

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    #print(frame.shape[0])
                    #print(frame.shape[1])
                    
                    image = Image.fromarray(frame)
                    img, bounding_boxes = myYolo.detect_image(image)

                    # Remove the bounding boxes with low confidence
                    nrof_faces = len(bounding_boxes)
                    ## Use MTCNN to get the bounding boxes
                    # bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
                    # nrof_faces = bounding_boxes.shape[0]
                    #print('Detected_FaceNum: %d' % nrof_faces)

                    if nrof_faces > 0:
                        # det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        # cropped = []
                        # scaled = []
                        # scaled_reshape = []
                        bb = np.zeros((nrof_faces,4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = bounding_boxes[i][0]
                            bb[i][1] = bounding_boxes[i][1]
                            bb[i][2] = bounding_boxes[i][2]
                            bb[i][3] = bounding_boxes[i][3]

                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                print('face is inner of range!')
                                continue

                            # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            # cropped[0] = facenet.flip(cropped[0], False)
                            # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
                            # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
                            #                        interpolation=cv2.INTER_CUBIC)
                            # scaled[0] = facenet.prewhiten(scaled[0])
                            # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
                            # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}

                            cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3]))
                            cropped = facenet.flip(cropped, False)
                            scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear'))
                            scaled = cv2.resize(scaled, (input_image_size,input_image_size),
                                                interpolation=cv2.INTER_CUBIC)
                            scaled = facenet.prewhiten(scaled)
                            scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3))
                            feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False}

                            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)

                            predictions = model.predict_proba(emb_array)
                            best_class_indices = np.argmax(predictions, axis=1)
                            best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                            print(best_class_probabilities)
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20

                            # for H_i in HumanNames:
                            #     if HumanNames[best_class_indices[0]] == H_i:
                            result_names = class_names[best_class_indices[0]] if best_class_probabilities[0] > 0.45 else "Unknown"
                            #print(result_names)
                            cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                        1, (0, 0, 255), thickness=1, lineType=2)
                    else:
                        print('Unable to align')

                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                str = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame, str, (text_fps_x, text_fps_y),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
                # c+=1
                cv2.imshow('Video', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            # #video writer
            # out.release()
            cv2.destroyAllWindows()
Beispiel #7
0
def one_by_one(rel_path, url=False):
    print('Start Recognition!')
    prevTime = 0
    # TODO: support multiple url
    if url: img_list = [None]
    else: img_list = glob.glob(os.path.join(rel_path, '*'))
    results = list()
    # cnt = 0
    # ok_list = list()
    for img_path in img_list:  # for each image in the list
        res = None
        # print('===', url)
        if url:

            try:
                rsp = urlget(rel_path)
                # print(rsp)
                if rsp.status_code == 200:
                    frame = np.array(Image.open(BytesIO(rsp.content)))
                else:
                    print('status code: ', rsp.status_code)
                    exit(-1)
            except Exception as e:
                print(repr(e))
                exit(-1)

        else:
            frame = cv2.imread(img_path)
        # ret, frame = video_capture.read()

        # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
        if frame is None:
            print(
                f'failure in reading image {img_path}, do not use chinese characters in file name!'
            )
            continue
        curTime = time.time()  # calc fps
        timeF = frame_interval

        if (c % timeF == 0):  # detect faces in the current image
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(
                frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:

                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]

                cropped = []
                scaled = []
                scaled_reshape = []
                bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                for i in range(nrof_faces):  # crop all the faces
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                            frame[0]) or bb[i][3] >= len(frame):
                        print('face is out of range!')
                        continue

                    cropped.append(frame[bb[i][1]:bb[i][3],
                                         bb[i][0]:bb[i][2], :])
                    cropped[0] = facenet.flip(cropped[0], False)
                    scaled.append(
                        facenet.imresize(cropped[0], (image_size, image_size),
                                         interp='bilinear'))
                    scaled[0] = cv2.resize(
                        scaled[0], (input_image_size, input_image_size),
                        interpolation=cv2.INTER_CUBIC)
                    scaled[0] = facenet.prewhiten(scaled[0])
                    scaled_reshape.append(scaled[0].reshape(
                        -1, input_image_size, input_image_size, 3))

                    feed_dict = {
                        images_placeholder: scaled_reshape[0],
                        phase_train_placeholder: False
                    }
                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                    predictions = model.predict_proba(emb_array)
                    best_class_indices = np.argmax(predictions, axis=1)
                    best_class_probabilities = predictions[
                        np.arange(len(best_class_indices)), best_class_indices]
                    if i == 0:
                        res = best_class_indices[0]
                        # ok_list.append(cnt)
                        # cnt += 1
                    cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                  (bb[i][2], bb[i][3]), (0, 255, 0),
                                  2)  # boxing face

                    # plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20
                    # print('result: ', best_class_indices[0])
                    if show_flag:
                        for H_i in class_names:
                            if class_names[best_class_indices[0]] == H_i:
                                result_names = class_names[
                                    best_class_indices[0]]
                                cv2.putText(frame,
                                            result_names, (text_x, text_y),
                                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                            1, (0, 0, 255),
                                            thickness=1,
                                            lineType=2)
            else:
                print('No face detected.')
                exit(-1)

        sec = curTime - prevTime
        prevTime = curTime
        fps = 1 / (sec)
        str = 'FPS: %2.3f' % fps
        text_fps_x = len(frame[0]) - 150
        text_fps_y = 20
        if show_flag:
            cv2.putText(frame,
                        str, (text_fps_x, text_fps_y),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        1, (0, 0, 0),
                        thickness=1,
                        lineType=2)
            # c+=1
            cv2.imshow('Video', frame)

            if cv2.waitKey(0) & 0xFF == ord('q'):
                break
        a, b, m, n = bb[0]
        if res is not None:
            results.append([res] + list(predictions[0]) + [m - a, n - b])
        else:
            # results.append([res]*10)
            print(f'cannot detect any face for {img_path}, skip')
            continue

    # video_capture.release()
    # #video writer
    # out.release()
    try:
        cv2.destroyAllWindows()
    except:
        pass
    # pred = np.zeros_like(img_list)
    # print(len(ok_list),len(results))
    # pred[ok_list] = results
    # print(pred)
    if len(results) == 0:
        return None
    results = np.array(results)
    # print(results.shape)
    # print(results)
    # labels = [class_names[int(i)] if i is not None else None for i in results[:,0]]
    # comb = np.concatenate([np.array(img_list).reshape((-1,1)),np.array(labels).reshape((-1,1)), results[:,1:]], axis=1)#list(zip(img_list, results))
    # pd.DataFrame(comb).to_csv(args.output_file + '.csv', index=False, header=header)
    comb = results[:, 1:]  # 1,9
    df = pd.DataFrame(comb)
    ret = df.apply(proc_line, axis=1)
    # return df.iloc[:,:-2].values, ret.values
    return ret.values
 def identify_face_video(self):
     modeldir = './model/20170511-185253.pb'
     classifier_filename = './class/classifier.pkl'
     npy='./npy'
     train_img="./train_img"
     
     with tf.Graph().as_default():
         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
         with sess.as_default():
             pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
     
             minsize = 20  # minimum size of face
             threshold = [0.6, 0.7, 0.7]  # three steps's threshold
             factor = 0.709  # scale factor
             margin = 44
             frame_interval = 3
             batch_size = 1000
             image_size = 182
             input_image_size = 160
             
             HumanNames = os.listdir(train_img)
             HumanNames.sort()
     
             print('Loading Modal')
             facenet.load_model(modeldir)
             images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
             embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
             phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
             embedding_size = embeddings.get_shape()[1]
     
     
             classifier_filename_exp = os.path.expanduser(classifier_filename)
             with open(classifier_filename_exp, 'rb') as infile:
                 (model, class_names) = pickle.load(infile)
     
             video_capture = cv2.VideoCapture(1)
             c = 0
     
     
             print('Start Recognition')
             prevTime = 0
             while True:
                 ret, frame = video_capture.read()
     
                 frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
     
                 curTime = time.time()+1    # calc fps
                 timeF = frame_interval
     
                 if (c % timeF == 0):
                     find_results = []
     
                     if frame.ndim == 2:
                         frame = facenet.to_rgb(frame)
                     frame = frame[:, :, 0:3]
                     bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
                     nrof_faces = bounding_boxes.shape[0]
                     print('Detected_FaceNum: %d' % nrof_faces)
     
                     if nrof_faces > 0:
                         det = bounding_boxes[:, 0:4]
                         img_size = np.asarray(frame.shape)[0:2]
     
                         cropped = []
                         scaled = []
                         scaled_reshape = []
                         bb = np.zeros((nrof_faces,4), dtype=np.int32)
                         
                         try:
                             for i in range(nrof_faces):
                                 emb_array = np.zeros((1, embedding_size))
                                 
                                 bb[i][0] = det[i][0]
                                 bb[i][1] = det[i][1]
                                 bb[i][2] = det[i][2]
                                 bb[i][3] = det[i][3]
                                 
                                 # inner exception
                                 if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                     print('Face is very close!')
                                     continue
                                 
                                 cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                                 cropped[i] = facenet.flip(cropped[i], False)
                                 scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear'))
                                 scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
                                                        interpolation=cv2.INTER_CUBIC)
                                 scaled[i] = facenet.prewhiten(scaled[i])
                                 scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
                                 feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                                 emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                                 predictions = model.predict_proba(emb_array)
                                 print(predictions)
                                 best_class_indices = np.argmax(predictions, axis=1)
                                 best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                                 # print("predictions")
                                 print(best_class_indices,' with accuracy ',best_class_probabilities)
                                 
                                 # print(best_class_probabilities)
                                 if best_class_probabilities>0.85:
                                     cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face
                                     
                                     #plot result idx under box
                                     text_x = bb[i][0]
                                     text_y = bb[i][3] + 20
                                     print('Result Indices: ', best_class_indices[0])
                                     print(HumanNames)
                                     global getName
                                     getName = best_class_indices[0]
                                     global name,fetch
                                     name=HumanNames[getName]
                                     for H_i in HumanNames:
                                         if HumanNames[best_class_indices[0]] == H_i:
                                             result_names = HumanNames[best_class_indices[0]]
                                             predict_name = result_names[ :-17]
                                             fetch = result_names[-16:-1]
                                             cv2.putText(frame, predict_name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                                         1, (0, 0, 255), thickness=1, lineType=2)
                         except IndexError:
                             print("Oops! IndexError : list index out of range for multi_faces")
                     else:
                         print('Alignment Failure')
                 # c+=1
                 cv2.imshow('Video', frame)
     
                 if cv2.waitKey(1) & 0xFF == ord('q'):
                     break
     
             video_capture.release()
             cv2.destroyAllWindows()
Beispiel #9
0
def Face_Recognize(frame):
    global minsize, pnet, rnet, onet, threshold, factor, sess, embedding_size, image_size, phase_train_placeholder, embeddings, embeddings

    if frame.ndim == 2:
        frame = facenet.to_rgb(frame)
    frame = frame[:, :, 0:3]
    bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet,
                                                onet, threshold, factor)
    nrof_faces = bounding_boxes.shape[0]

    print('Detected Faces: %d' % nrof_faces)

    if nrof_faces > 0:
        det = bounding_boxes[:, 0:4]
        img_size = np.asarray(frame.shape)[0:2]

        cropped = []
        scaled = []
        scaled_reshape = []
        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

        for i in range(nrof_faces):
            emb_array = np.zeros((1, embedding_size))
            bb[i][0] = det[i][0]
            bb[i][1] = det[i][1]
            bb[i][2] = det[i][2]
            bb[i][3] = det[i][3]

            # inner exception
            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                    frame[0]) or bb[i][3] >= len(frame):
                print('face is inner of range!')
                continue

            cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
            cropped[i] = facenet.flip(cropped[i], False)
            scaled.append(
                misc.imresize(cropped[i], (image_size, image_size),
                              interp='bilinear'))
            scaled[i] = cv2.resize(scaled[i],
                                   (input_image_size, input_image_size),
                                   interpolation=cv2.INTER_CUBIC)
            scaled[i] = facenet.prewhiten(scaled[i])
            scaled_reshape.append(scaled[i].reshape(-1, input_image_size,
                                                    input_image_size, 3))
            feed_dict = {
                images_placeholder: scaled_reshape[i],
                phase_train_placeholder: False
            }
            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
            predictions = model.predict_proba(emb_array)
            print("Distances:")
            print(predictions)
            best_class_indices = np.argmax(predictions, axis=1)
            best_class_probabilities = predictions[
                np.arange(len(best_class_indices)), best_class_indices]
            cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]),
                          (0, 255, 0), 2)  #boxing face

            #plot result idx under box
            text_x = bb[i][0]
            text_y = bb[i][3] + 20
            print('Names: ')
            print(names)
            for H_i in names:
                if names[best_class_indices[0]] == H_i:
                    result_names = names[best_class_indices[0]]
                    print("Person: " + result_names)
                    cv2.putText(frame,
                                result_names, (text_x, text_y),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                1, (0, 0, 255),
                                thickness=1,
                                lineType=2)
    else:
        print('Unable to align')

    cv2.imshow('Video', frame)
Beispiel #10
0
        def predict():

            frame = YUVtoRGB(list(request.data))

            frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

            # rotate 2700 degree
            (h, w) = frame.shape[:2]
            center = (w / 2, h / 2)
            M = cv2.getRotationMatrix2D(center, 270, 1.0)
            frame = cv2.warpAffine(frame, M, (h, w))

            curTime = time.time()    # calc fps
            timeF = frame_interval

            if (c % timeF == 0):
                find_results = []

                if frame.ndim == 2:
                    frame = facenet.to_rgb(frame)
                frame = frame[:, :, 0:3]
                bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
                nrof_faces = bounding_boxes.shape[0]

                if nrof_faces > 0:
                    print('Detected_FaceNum: %d' % nrof_faces)
                    det = bounding_boxes[:, 0:4]
                    img_size = np.asarray(frame.shape)[0:2]

                    cropped = []
                    scaled = []
                    scaled_reshape = []
                    bb = np.zeros((nrof_faces,4), dtype=np.int32)

                    for i in range(nrof_faces):
                        emb_array = np.zeros((1, embedding_size))

                        bb[i][0] = det[i][0]
                        bb[i][1] = det[i][1]
                        bb[i][2] = det[i][2]
                        bb[i][3] = det[i][3]

                        # inner exception
                        if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                            print('face is inner of range!')
                            continue

                        cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                        cropped[i] = facenet.flip(cropped[i], False)
                        scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear'))
                        scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
                                               interpolation=cv2.INTER_CUBIC)
                        scaled[i] = facenet.prewhiten(scaled[i])
                        scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
                        feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                        emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                        predictions = model.predict_proba(emb_array)
                        best_class_indices = np.argmax(predictions, axis=1)
                        best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                        cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face

                        if best_class_probabilities > 0.8:
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20
                            for H_i in HumanNames:
                                if HumanNames[best_class_indices[0]] == H_i:
                                    result_names = HumanNames[best_class_indices[0]] + " "+ str(best_class_probabilities * 100) + "%"
                                    print(result_names)
                                    return str(result_names)
            return ""
Beispiel #11
0
def Detect():
    # dectect image in img_path
    ans = "Unknown"
    print(sys.path)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    with sess.as_default():
        pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

        minsize = 20  # minimum size of face
        threshold = [0.6, 0.7, 0.7]  # three steps's threshold
        factor = 0.709  # scale factor
        margin = 44
        frame_interval = 3
        batch_size = 1000
        image_size = 182
        input_image_size = 160

        HumanNames = os.listdir(train_img)
        HumanNames.sort()

        print('Loading feature extraction model')
        facenet.load_model(modeldir)

        images_placeholder = tf.get_default_graph().get_tensor_by_name(
            "input:0")
        embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(
            "phase_train:0")
        embedding_size = embeddings.get_shape()[1]

        classifier_filename_exp = os.path.expanduser(classifier_filename)
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        # video_capture = cv2.VideoCapture("akshay_mov.mp4")
        c = 0

        print('Start Recognition!')
        prevTime = 0
        # ret, frame = video_capture.read()
        frame = cv2.imread(img_path, 0)

        frame = cv2.resize(frame, (0, 0), fx=0.5,
                           fy=0.5)  # resize frame (optional)

        curTime = time.time() + 1  # calc fps
        timeF = frame_interval

        if (c % timeF == 0):
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(
                frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Face Detected: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]

                cropped = []
                scaled = []
                scaled_reshape = []
                bb = np.zeros((nrof_faces, 4), dtype=np.int32)

                if nrof_faces > 1:
                    return ans

                for i in range(nrof_faces):
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                            frame[0]) or bb[i][3] >= len(frame):
                        print('face is too close')
                        return ans
                        continue

                    cropped.append(frame[bb[i][1]:bb[i][3],
                                         bb[i][0]:bb[i][2], :])
                    cropped[i] = facenet.flip(cropped[i], False)
                    scaled.append(
                        misc.imresize(cropped[i], (image_size, image_size),
                                      interp='bilinear'))
                    scaled[i] = cv2.resize(
                        scaled[i], (input_image_size, input_image_size),
                        interpolation=cv2.INTER_CUBIC)
                    scaled[i] = facenet.prewhiten(scaled[i])
                    scaled_reshape.append(scaled[i].reshape(
                        -1, input_image_size, input_image_size, 3))
                    feed_dict = {
                        images_placeholder: scaled_reshape[i],
                        phase_train_placeholder: False
                    }
                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                    predictions = model.predict_proba(emb_array)
                    print(predictions)
                    best_class_indices = np.argmax(predictions, axis=1)
                    # print(best_class_indices)
                    best_class_probabilities = predictions[
                        np.arange(len(best_class_indices)), best_class_indices]
                    print(best_class_probabilities)
                    cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                  (bb[i][2], bb[i][3]), (0, 255, 0),
                                  2)  # boxing face

                    # plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20

                    result_names = HumanNames[best_class_indices[0]]
                    print('Result Indices: ', result_names)
                    ans = result_names
                    cv2.putText(frame,
                                result_names, (text_x, text_y),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                1, (0, 0, 255),
                                thickness=1,
                                lineType=2)

            else:
                print('Unable to align')

    cv2.imwrite('result.jpg', frame)

    return ans
Beispiel #12
0
    def runTest(self):
        print('Creating networks and loading parameters')
        with tf.Graph().as_default():
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
            sess = tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options, log_device_placement=False))
            with sess.as_default():
                pnet, rnet, onet = align.detect_face.create_mtcnn(
                    sess, self.alignDirectory)
                minimunSizeOfFace = 20
                scaleFactor = 0.709
                threshold = [0.6, 0.7, 0.7]  # three steps's threshold
                # margin = 44
                frame_interval = 2
                image_size = 182
                input_image_size = 160
                facesList = self.getFacesList()
                print('Listado de rostros', facesList)
                facenet.load_model(self.modelFilePath)
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")
                embedding_size = embeddings.get_shape()[1]
                try:
                    model = self.getModel()

                    video_capture = cv2.VideoCapture(0)  #'./test.mp4'
                    #video_capture.set(3,4920)
                    #video_capture.set(4,3080)
                    c = 0

                    # #video writer
                    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
                    out = cv2.VideoWriter('3F_0726.avi',
                                          fourcc,
                                          fps=30,
                                          frameSize=(4920, 3080))

                    print('Start Recognition!')
                    prevTime = 0
                    while True:
                        ret, frame = video_capture.read()
                        #if (frame != None):
                        frame = cv2.resize(frame, (0, 0), fx=2,
                                           fy=2)  #resize frame (optional)
                        curTime = time.time() + 1  # calc fps
                        timeF = frame_interval

                        if (c % timeF == 0):
                            find_results = []
                            if frame.ndim == 2:
                                frame = facenet.to_rgb(frame)
                            frame = frame[:, :, 0:3]
                            boundingBoxesOfAllDetectedFacesFromCameraFrame, _ = align.detect_face.detect_face(
                                frame, minimunSizeOfFace, pnet, rnet, onet,
                                threshold, scaleFactor)
                            numberOfFacesDeteted = boundingBoxesOfAllDetectedFacesFromCameraFrame.shape[
                                0]
                            print("----------------------")

                            self.printTextToImage(
                                frame,
                                "No. Faces " + str(numberOfFacesDeteted), 20,
                                20, "black")

                            if numberOfFacesDeteted > 0:
                                boundingBoxesOfDetectedFacesWith4PositionsFromCameraFrame = boundingBoxesOfAllDetectedFacesFromCameraFrame[:,
                                                                                                                                           0:
                                                                                                                                           4]
                                # img_size = np.asarray(frame.shape)[0:2]
                                cropped = []
                                scaled = []
                                scaled_reshape = []
                                boundingBoxesOfDetectedFace = np.zeros(
                                    (numberOfFacesDeteted, 4), dtype=np.int32)

                                for indexOfFaceDetected in range(
                                        numberOfFacesDeteted):
                                    emb_array = np.zeros((1, embedding_size))

                                    boundingBoxesOfDetectedFace[
                                        indexOfFaceDetected] = self.getBoundingBoxesOfDetectedFaceFromCameraFrame(
                                            boundingBoxesOfDetectedFacesWith4PositionsFromCameraFrame,
                                            indexOfFaceDetected)
                                    # inner exception
                                    if boundingBoxesOfDetectedFace[indexOfFaceDetected][
                                            0] <= 0 or boundingBoxesOfDetectedFace[
                                                indexOfFaceDetected][
                                                    1] <= 0 or boundingBoxesOfDetectedFace[
                                                        indexOfFaceDetected][2] >= len(
                                                            frame[0]
                                                        ) or boundingBoxesOfDetectedFace[
                                                            indexOfFaceDetected][
                                                                3] >= len(
                                                                    frame):
                                        #print('face is inner of range!')
                                        continue

                                    cropped.append(
                                        frame[boundingBoxesOfDetectedFace[
                                            indexOfFaceDetected][1]:
                                              boundingBoxesOfDetectedFace[
                                                  indexOfFaceDetected][3],
                                              boundingBoxesOfDetectedFace[
                                                  indexOfFaceDetected][0]:
                                              boundingBoxesOfDetectedFace[
                                                  indexOfFaceDetected][2], :])
                                    cropped[
                                        indexOfFaceDetected] = facenet.flip(
                                            cropped[indexOfFaceDetected],
                                            False)
                                    scaled.append(
                                        misc.imresize(
                                            cropped[indexOfFaceDetected],
                                            (image_size, image_size),
                                            interp='bilinear'))
                                    scaled[indexOfFaceDetected] = cv2.resize(
                                        scaled[indexOfFaceDetected],
                                        (input_image_size, input_image_size),
                                        interpolation=cv2.INTER_CUBIC)
                                    scaled[
                                        indexOfFaceDetected] = facenet.prewhiten(
                                            scaled[indexOfFaceDetected])
                                    scaled_reshape.append(
                                        scaled[indexOfFaceDetected].reshape(
                                            -1, input_image_size,
                                            input_image_size, 3))

                                    feed_dict = {
                                        images_placeholder:
                                        scaled_reshape[indexOfFaceDetected],
                                        phase_train_placeholder:
                                        False
                                    }
                                    emb_array[0, :] = sess.run(
                                        embeddings, feed_dict=feed_dict)
                                    predictions = model.predict_proba(
                                        emb_array)
                                    best_class_indices = np.argmax(predictions,
                                                                   axis=1)
                                    best_class_probabilities = predictions[
                                        np.arange(len(best_class_indices)),
                                        best_class_indices]
                                    faceName = self.getFaceNameFromFacesListByIndex(
                                        facesList, best_class_indices[0])

                                    if ((int)(best_class_probabilities[0] *
                                              100)) > 30:
                                        faceNameWithProbability = faceName + " " + str(
                                            (int)(best_class_probabilities[0] *
                                                  100)) + "%"
                                        self.printTextAndBox(
                                            frame, boundingBoxesOfDetectedFace,
                                            indexOfFaceDetected,
                                            faceNameWithProbability)

                                        # print('best class indices: ',best_class_indices)
                                        # print("best class probabilities ",best_class_probabilities[0])
                                    print('Predicción: ', predictions)
                                    print("face ", faceName, (int)(
                                        best_class_probabilities[0] * 100),
                                          "%", " indice ",
                                          best_class_indices[0])
                            #else:
                            #print('Unable to align, no faces')

                        sec = curTime - prevTime
                        prevTime = curTime
                        fps = 1 / (sec)
                        strFPS = 'FPS: %2.3f' % fps
                        self.printTextToImage(frame, strFPS, 20, 50, "black")
                        cv2.imshow('Video', frame)
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break
                        #else:
                        #   print('No video')
                    video_capture.release()
                    # #video writer
                    out.release()
                    cv2.destroyAllWindows()
                except Exception as e:
                    print(
                        'Error on line {}'.format(
                            sys.exc_info()[-1].tb_lineno),
                        type(e).__name__, e)
Beispiel #13
0
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                            frame[0]) or bb[i][3] >= len(frame):
                        print('face is inner of range!')
                        continue

                    cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                    print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1],
                                                   bb[i][2], bb[i][3]))
                    cropped = facenet.flip(cropped, False)
                    scaled = (misc.imresize(cropped, (image_size, image_size),
                                            interp='bilinear'))
                    scaled = cv2.resize(scaled,
                                        (input_image_size, input_image_size),
                                        interpolation=cv2.INTER_CUBIC)
                    scaled = facenet.prewhiten(scaled)
                    scaled_reshape = (scaled.reshape(-1, input_image_size,
                                                     input_image_size, 3))
                    feed_dict = {
                        images_placeholder: scaled_reshape,
                        phase_train_placeholder: False
                    }

                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
Beispiel #14
0
def show_frame():
    print(button_flag)
    _, cv2image = cap.read()

    bounding_boxes, cv2image = detector.run_mtcnn(cv2image)
    nrof_faces = bounding_boxes.shape[0]
    print('Detected_FaceNum: %d' % nrof_faces)

    if nrof_faces > 0:
        det = bounding_boxes[:, 0:4]
        img_size = np.asarray(cv2image.shape)[0:2]

        cropped = []
        scaled = []
        scaled_reshape = []
        bb = np.zeros((nrof_faces, 4), dtype=np.int32)

        for i in range(nrof_faces):
            emb_array = np.zeros((1, embedding_size))

            bb[i][0] = det[i][0]
            bb[i][1] = det[i][1]
            bb[i][2] = det[i][2]
            bb[i][3] = det[i][3]

            #여기부터 add ------------>
            face = cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]]
            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                    cv2image[0]) or bb[i][3] >= len(cv2image):
                print('Face is very close! 0:', bb[i][0], '    1:', bb[i][1],
                      '      2:', bb[i][2], '          3:', bb[i][3])
                continue

            cropped.append(cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
            cropped[i] = facenet.flip(cropped[i], False)

            scaled.append(
                misc.imresize(cropped[i], (image_size, image_size),
                              interp='bilinear'))
            scaled[i] = cv2.resize(scaled[i],
                                   (input_image_size, input_image_size),
                                   interpolation=cv2.INTER_CUBIC)
            scaled[i] = facenet.prewhiten(scaled[i])
            scaled_reshape.append(scaled[i].reshape(-1, input_image_size,
                                                    input_image_size, 3))

            #서버로 넘김.
            URL = server + "video"
            tolist_img = scaled_reshape[i].tolist()
            json_feed = {'images_placeholder': tolist_img}
            response = requests.post(URL, json=json_feed)

            img_data = response.json()
            #확인

            #img_data = facenet.check_features(feature_list, emb_array[0], {"name" : "", "cos_sim" : 0}, 0)

            print("name : ", img_data["name"], "\nsimilarity : ",
                  img_data["cos_sim"])
            ##########################################################################################################
            #                                                                                                        #
            #  현재 GUI에서 button부분이랑 연결이 안되서 우선 이렇게 밖으로 뺴서 얼굴부분은 모두 모자이크 처리하도록 했으요  #
            #                                                                                                        #
            ##########################################################################################################
            #cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur(cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]], (23,23))

            if img_data["cos_sim"] >= 0.5:

                if button_flag[button_name.index(img_data["name"])] % 2 == 0:
                    cv2.rectangle(cv2image, (bb[i][0], bb[i][1]),
                                  (bb[i][2], bb[i][3]), (0, 255, 0),
                                  2)  #boxing face

                    #plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20
                    cv2.putText(cv2image,
                                img_data["name"], (text_x, text_y),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                1, (0, 0, 255),
                                thickness=1,
                                lineType=2)
                else:
                    cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur(
                        cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]],
                        (23, 23))

            else:
                cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur(
                    cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]], (23, 23))

    cv2image = cv2.cvtColor(cv2image, cv2.COLOR_BGR2RGBA)
    #cv2image = cv2.flip(cv2image, 1)

    #face = cv2.flip(face, 1)
    #face = cv2.cvtColor(face, cv2.COLOR_BGR2RGBA)
    webcam_img = ImageTk.PhotoImage(image=Image.fromarray(cv2image))
    mv_label.imgtk = webcam_img
    mv_label.configure(image=webcam_img)
    mv_label.after(10, show_frame)
                        emb_array = np.zeros((1, embedding_size))

                        bb[i][0] = det[i][0]
                        bb[i][1] = det[i][1]
                        bb[i][2] = det[i][2]
                        bb[i][3] = det[i][3]

                        # inner exception
                        if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                                frame[0]) or bb[i][3] >= len(frame):
                            print('Face is very close!')
                            continue

                        cropped.append(frame[bb[i][1]:bb[i][3],
                                             bb[i][0]:bb[i][2], :])
                        cropped[i] = facenet.flip(cropped[i], False)
                        scaled.append(
                            misc.imresize(cropped[i], (image_size, image_size),
                                          interp='bilinear'))
                        scaled[i] = cv2.resize(
                            scaled[i], (input_image_size, input_image_size),
                            interpolation=cv2.INTER_CUBIC)
                        scaled[i] = facenet.prewhiten(scaled[i])
                        scaled_reshape.append(scaled[i].reshape(
                            -1, input_image_size, input_image_size, 3))
                        feed_dict = {
                            images_placeholder: scaled_reshape[i],
                            phase_train_placeholder: False
                        }
                        emb_array[0, :] = sess.run(embeddings,
                                                   feed_dict=feed_dict)
Beispiel #16
0
def main():
    with tf.Graph().as_default():
        last_log=str(datetime.now().time())
        mycursor = mydb.cursor()
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy')

            minsize = 20  # minimum size of face
            threshold = [0.6, 0.7, 0.7]  # three steps's threshold
            factor = 0.709  # scale factor
            image_size = 182
            input_image_size = 160

            print('Loading Modal')
            facenet.load_model(modeldir)
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            HumanNames = os.listdir(pre_img)
            HumanNames.sort()

            classifier_filename_exp = os.path.expanduser(classifier_filename)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)

            # video_capture = cv2.VideoCapture(0)

            print('Start Recognition')
            with no_ssl_verification():
                while True:
                    args = parse_args()

                    # ret, frame = video_capture.read()
                    try:
                        resp = requests.get(args.url)
                        frame = np.asarray(bytearray(resp.content), dtype=np.uint8)
                        frame = cv2.imdecode(frame, cv2.IMREAD_COLOR)

                        frame_raw = frame
                        if frame_raw.ndim == 2:
                            frame_raw = facenet.to_rgb(frame_raw)
                        img_raw_size = np.asarray(frame_raw.shape)[0:2]
                        frame_raw = frame_raw[:, :, 0:3]
                    except:
                        continue

                    frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)

                    if frame.ndim == 2:
                        frame = facenet.to_rgb(frame)
                    
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
                    nrof_faces = bounding_boxes.shape[0]
                    print('Detected_FaceNum: %d' % nrof_faces)

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        img_size = np.asarray(frame.shape)[0:2]

                        rcropped = []
                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces,4), dtype=np.int32)

                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                print('Face is very close!')
                                continue

                            rbb = [
                                int((bb[i][0] / img_size[0]) * img_raw_size[0] - (args.margin / 2)),
                                int((bb[i][1] / img_size[1]) * img_raw_size[1] - (args.margin / 2)),
                                int((bb[i][2] / img_size[0]) * img_raw_size[0] + (args.margin / 2)),
                                int((bb[i][3] / img_size[1]) * img_raw_size[1] + (args.margin / 2))
                            ]

                            if rbb[0] < 0:
                                rbb[0] = 0
                            if rbb[1] < 0:
                                rbb[1] = 0
                            if rbb[2] > len(frame_raw[0]):
                                rbb[2] = len(frame_raw[0])
                            if rbb[3] > len(frame_raw):
                                rbb[3] = len(frame_raw)

                            rcropped.append(frame_raw[rbb[1]:rbb[3], rbb[0]:rbb[2], :])
                            cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            try:
                                rcropped[i] = cv2.cvtColor(rcropped[i], cv2.COLOR_RGB2BGR)
                                cropped[i] = facenet.flip(cropped[i], False)
                                scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear'))
                                scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC)
                                scaled[i] = facenet.prewhiten(scaled[i])
                                scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
                                feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                                emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                                predictions = model.predict_proba(emb_array)
                                print(predictions)
                                best_class_indices = np.argmax(predictions, axis=1)
                                best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                                print(best_class_indices,' with accuracy ',best_class_probabilities)

                                scale_x = 1 / img_size[0]
                                scale_y = 1 / img_size[1]

                                tbb = [ scale_x * bb[i][0], scale_y * bb[i][1], scale_x * bb[i][2], scale_y * bb[i][3] ]

                                bb_w = tbb[2] - tbb[0]
                                bb_h = tbb[3] - tbb[1]
                                bb_area = bb_w * bb_h
                                posY = tbb[1] / 0.5

                                # area = bb_area * 100
                                # area = round(area, 2)
                                # text_x = bb[i][0]
                                # text_y = bb[i][1] - 10
                                # cv2.putText(frame, str(area)+" "+str(round(posY*100,2)), (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                #             1, (0, 0, 255), thickness=1, lineType=2)

                                if bb_area>args.bb_area:
                                    cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)

                                if bb_area>args.bb_area and posY>args.yframe:
                                    if best_class_probabilities>args.class_probability:
                                        #plot result idx under box
                                        text_x = bb[i][0]
                                        text_y = bb[i][3] + 20
                                        print('Result Indices:', best_class_indices[0])
                                        print(HumanNames)

                                        start_time = datetime.strptime(last_log, '%H:%M:%S.%f')
                                        end_time = datetime.strptime(str(datetime.now().time()), '%H:%M:%S.%f')
                                        diff = end_time - start_time
                                        elapsed_time = int((diff.seconds * 1000) + (diff.microseconds / 1000))

                                        for H_i in HumanNames:
                                            if HumanNames[best_class_indices[0]] == H_i:
                                                result_names = HumanNames[best_class_indices[0]]
                                                print('Face recognized:', result_names)
                                                if elapsed_time>5000:
                                                    last_log = str(datetime.now().time())
                                                    currdatetime = time.strftime('%Y-%m-%d %H:%M:%S')
                                                    sql = "INSERT INTO "+args.log+" (id_num, date) VALUES (%s, %s)"
                                                    val = (result_names, currdatetime)
                                                    mycursor.execute(sql, val)
                                                    mydb.commit()
                                                cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
                                                cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                                            1, (0, 0, 255), thickness=1, lineType=2)
                                    timestr = time.strftime('%Y%m%d%H%M%S')
                                    misc.imsave(os.path.join(cluster_dir, timestr + '.png'), rcropped[i])
                            except:
                                pass

                    else:
                        print('Alignment Failure')
                    cv2.imshow('Video', frame)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

                video_capture.release()
                cv2.destroyAllWindows()
Beispiel #17
0
                    for i in range(nrof_faces):
                        emb_array = np.zeros((1, embedding_size))

                        bb[i][0] = det[i][0]
                        bb[i][1] = det[i][1]
                        bb[i][2] = det[i][2]
                        bb[i][3] = det[i][3]

                        # inner exception
                        if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                            print('face is inner of range!')
                            continue

                        cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                        cropped[0] = facenet.flip(cropped[0], False)
                        scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
                        scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
                                               interpolation=cv2.INTER_CUBIC)
                        scaled[0] = facenet.prewhiten(scaled[0])
                        scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
                        feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
                        emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                        predictions = model.predict_proba(emb_array)
                        best_class_indices = np.argmax(predictions, axis=1)
                        best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                        cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face

                        #plot result idx under box
                        text_x = bb[i][0]
                        text_y = bb[i][3] + 20
Beispiel #18
0
            for i in range(number_of_faces):
                emb_array = np.zeros((1, embedding_size))

                bb[i][0] = det[i][0]
                bb[i][1] = det[i][1]
                bb[i][2] = det[i][2]
                bb[i][3] = det[i][3]

                # inner exception
                if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                        frame[0]) or bb[i][3] >= len(frame):
                    # print('face is inner of range!')
                    continue

                cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                cropped[0] = facenet.flip(cropped[0], False)
                scaled.append(
                    misc.imresize(cropped[0], (image_size, image_size),
                                  interp='bilinear'))
                scaled[0] = cv2.resize(scaled[0],
                                       (input_image_size, input_image_size),
                                       interpolation=cv2.INTER_CUBIC)
                scaled[0] = facenet.prewhiten(scaled[0])
                scaled_reshape.append(scaled[0].reshape(
                    -1, input_image_size, input_image_size, 3))
                feed_dict = {
                    images_placeholder: scaled_reshape[0],
                    phase_train_placeholder: False
                }
                emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                predictions = model.predict_proba(emb_array)
                        if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                            print('face is inner of range!')
                            continue

                        # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                        # cropped[0] = facenet.flip(cropped[0], False)
                        # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
                        # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
                        #                        interpolation=cv2.INTER_CUBIC)
                        # scaled[0] = facenet.prewhiten(scaled[0])
                        # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
                        # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}

                        cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                        print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3]))
                        cropped = facenet.flip(cropped, False)
                        scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear'))
                        scaled = cv2.resize(scaled, (input_image_size,input_image_size),
                                            interpolation=cv2.INTER_CUBIC)
                        scaled = facenet.prewhiten(scaled)
                        scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3))
                        feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False}

                        emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)

                        predictions = model.predict_proba(emb_array)
                        best_class_indices = np.argmax(predictions, axis=1)
                        best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                        cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
                        text_x = bb[i][0]
                        text_y = bb[i][3] + 20
Beispiel #20
0
def batch_inp(rel_path):
    print('Start Recognition!')
    prevTime = 0
    img_list = glob.glob(os.path.join(rel_path, '*'))
    results = list()
    cnt = 0
    ok_ind = list()
    for img_path in img_list:  # for each image in the list
        res = None
        frame = cv2.imread(img_path)
        # ret, frame = video_capture.read()

        # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

        curTime = time.time()  # calc fps
        timeF = frame_interval

        if (c % timeF == 0):  # detect faces in the current image
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(
                frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]
                scaled_reshape = []

                bb = [int(np.round(i)) for i in det[0]]
                # inner exception
                if bb[0] <= 0 or bb[1] <= 0 or bb[2] >= len(
                        frame[0]) or bb[3] >= len(frame):
                    print('face is out of range!')
                    continue

                cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :]
                cropped = facenet.flip(cropped, False)
                scaled = facenet.imresize(cropped, (image_size, image_size),
                                          interp='bilinear')
                scaled = cv2.resize(scaled,
                                    (input_image_size, input_image_size),
                                    interpolation=cv2.INTER_CUBIC)
                scaled = facenet.prewhiten(scaled)
                scaled_reshape.append(
                    scaled.reshape(input_image_size, input_image_size, 3))
                ok_ind.append(cnt)
        cnt += 1

    feed_dict = {
        images_placeholder: scaled_reshape,
        phase_train_placeholder: False
    }
    emb_array = sess.run(embeddings, feed_dict=feed_dict)  # n,n_emb
    predictions = model.predict_proba(emb_array)
    best_class_indices = np.argmax(predictions, axis=1)  # n,1
    # best_class_probabilities = np.max(predictions, axis=1)

    results = np.zeros_like(img_list)
    results[ok_ind] = [class_names[i] for i in best_class_indices]
    comb = list(zip(img_list, results))
    pd.DataFrame(comb).to_csv('test_results.csv')