Beispiel #1
0
def load_and_align_data(config, imgs):
    print('Creating networks and loading parameters')
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config, sess)
#    print (config.compared_imgs)
    nrof_samples = len(imgs)
    img_list = [None] * nrof_samples
    for i in xrange(nrof_samples):
        img = misc.imread(os.path.expanduser(imgs[i]))
        img_size = np.asarray(img.shape)[0:2]
        bounding_boxes, _ = detector.detect_face(img)
        det = np.squeeze(bounding_boxes[0, 0:4])
        bb = np.zeros(4, dtype=np.int32)
        bb[0] = np.maximum(det[0] - config.mtcnn.margin / 2, 0)
        bb[1] = np.maximum(det[1] - config.mtcnn.margin / 2, 0)
        bb[2] = np.minimum(det[2] + config.mtcnn.margin / 2, img_size[1])
        bb[3] = np.minimum(det[3] + config.mtcnn.margin / 2, img_size[0])
        cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
        aligned = misc.imresize(
            cropped, (config.mtcnn.image_size, config.mtcnn.image_size),
            interp='bilinear')
        prewhitened = prewhiten(aligned)
        img_list[i] = prewhitened
    images = np.stack(img_list)
    return images
def main(config):
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config, sess)
            # Load the model
            load_model(config.lfw.valid_model_path)
            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")
            # Run forward pass to calculate embeddings

            with open(config.parse_path) as f:
                names = f.readlines()

            for name in names:
                keyword, name, saved_dir = get_keyword(name, config.output_dir)
                fanye_url = url_init_first + urllib.quote(keyword, safe='/')
                feats = []
                page = 0
                while len(os.listdir(saved_dir)) < 20 and fanye_url != '':
                    onepage_urls, fanye_url = get_onepage_urls(fanye_url)
                    for index, url in enumerate(onepage_urls):
                        img_path = download(url,
                                            page * len(onepage_urls) + index,
                                            saved_dir)
                        try:
                            img = misc.imread(img_path)
                            nrof_faces, face = detect(img, detector, config)
                            face = np.expand_dims(face, 0)
                            if nrof_faces:
                                feed_dict = {
                                    images_placeholder: face,
                                    phase_train_placeholder: False
                                }
                                feats.append(
                                    sess.run(embeddings, feed_dict=feed_dict))
                                if index != 0:
                                    dist = np.sqrt(
                                        np.sum(
                                            np.square(
                                                np.subtract(
                                                    feats[0], feats[-1]))))
                                    print('the %d-th %s image dist: %f' %
                                          (index, name, dist))
                                    if dist > config.max_dist:
                                        os.remove(img_path)
                        except Exception as e:
                            print(e)
                            if img_path:
                                os.remove(img_path)
                            continue
                    page += 1
Beispiel #3
0
def main(config):

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config, sess)
            frame = cv2.imread(config.image)
            #            frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    # If necessary
            if frame.ndim == 2:
                frame = to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detector.detect_face(frame)
            nrof_faces = bounding_boxes.shape[0]

            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]

                bb = np.zeros((nrof_faces, 4), dtype=np.int32)
                for i in range(nrof_faces):

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                            frame[0]) or bb[i][3] >= len(frame):
                        print('face is inner of range!')
                        continue
                    cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                  (bb[i][2], bb[i][3]), (0, 255, 0), 2)
            else:
                print('Unable to align')
            cv2.imshow('Image', frame)
            if cv2.waitKey(0):
                cv2.destroyAllWindows()
def main(config):

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config, sess)
            video_capture = cv2.VideoCapture(0)
            prevTime = 0
            c = 0
            while True:
                ret, frame = video_capture.read()
                frame = cv2.resize(frame, (0, 0), fx=0.5,
                                   fy=0.5)  # resize frame (optional),
                curTime = time.time()  # calc fps
                timeF = config.mtcnn.frame_interval
                if (c % timeF == 0):
                    if frame.ndim == 2:
                        frame = to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detector.detect_face(frame)
                    nrof_faces = bounding_boxes.shape[0]

                    print('Detected_FaceNum: %d' % nrof_faces)

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]

                        bb = np.zeros((nrof_faces, 4), dtype=np.int32)
                        for i in range(nrof_faces):

                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]

                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][
                                    2] >= len(
                                        frame[0]) or bb[i][3] >= len(frame):
                                print('face is inner of range!')
                                continue
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]),
                                          (bb[i][2], bb[i][3]), (0, 255, 0), 2)
                    else:
                        print('Unable to align')
                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                string = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame,
                            string, (text_fps_x, text_fps_y),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL,
                            1, (0, 255, 0),
                            thickness=1,
                            lineType=2)
                #                c += 1
                cv2.imshow('Video', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            video_capture.release()
            cv2.destroyAllWindows()
def main(config):
    print('Creating networks and loading parameters')
    
    Data = DataGenerator(config)
    
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config,sess)
            
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(config.output_dir, 'bounding_boxes_%05d.txt' % random_key)
    
    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        
        for cls in Data.dataset:
            output_class_dir = os.path.join(config.output_dir, cls.name)
            create_dirs([output_class_dir])
                
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir, filename + '.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                        img = cv2.resize(img, (0,0), fx=0.5, fy=0.5)
#                        print('read data dimension: ', img.ndim)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim < 2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        
                        if img.ndim == 2:
                            img = to_rgb(img)
                            print('to_rgb data dimension: ', img.ndim)
                        img = img[:, :, 0:3]
#                        print('after data dimension: ', img.ndim)
    
                        bounding_boxes, _ = detector.detect_face(img)
                        
                        nrof_faces = bounding_boxes.shape[0]
#                        print('detected_face: %d' % nrof_faces)
                        
                        if nrof_faces > 0:
                            det = bounding_boxes[:, 0:4]
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces > 1:
                                bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
                                img_center = img_size / 2
                                offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
                                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                                index = np.argmax(bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
                                det = det[index, :]
                            det = np.squeeze(det)
                            bb_temp = np.zeros(4, dtype=np.int32)
    
                            bb_temp[0] = det[0]
                            bb_temp[1] = det[1]
                            bb_temp[2] = det[2] 
                            bb_temp[3] = det[3]
    
                            cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :]
                            
                            try:
                                scaled_temp = misc.imresize(cropped_temp, (config.mtcnn.image_size, config.mtcnn.image_size), interp='bilinear')
                            except (IOError, ValueError, IndexError) as e:
                                continue
                            nrof_successfully_aligned += 1
                            
                            misc.imsave(output_filename, scaled_temp)
                            text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
    
    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
Beispiel #6
0
def main(config):

    print('Creating networks and loading parameters')

    Data = DataGenerator(config, False)

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config, sess)

    nrof_images_total = 0
    nrof_successfully_aligned = 0

    for cls in Data.dataset:
        output_class_dir = os.path.join(config.output_dir, cls.name)
        create_dirs([output_class_dir])

        for image_path in cls.image_paths:
            nrof_images_total += 1
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename + '.png')
            print(image_path)
            if not os.path.exists(output_filename):
                try:
                    img = misc.imread(image_path)


#                        print('read data dimension: ', img.ndim)
                except (IOError, ValueError, IndexError) as e:
                    errorMessage = '{}: {}'.format(image_path, e)
                    print(errorMessage)
                else:
                    if img.ndim < 2:
                        print('Unable to align "%s"' % image_path)
                        continue

                    if img.ndim == 2:
                        img = to_rgb(img)
                        print('to_rgb data dimension: ', img.ndim)
                    img = img[:, :, 0:3]
                    #                    print('after data dimension: ', img.ndim)

                    bounding_boxes, _ = detector.detect_face(img)

                    nrof_faces = bounding_boxes.shape[0]
                    #                    print('detected_face: %d' % nrof_faces)

                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        bb_temp = np.zeros((nrof_faces, 4), dtype=np.int32)
                        for i in range(nrof_faces):
                            bb_temp[i][0] = det[i][0]
                            bb_temp[i][1] = det[i][1]
                            bb_temp[i][2] = det[i][2]
                            bb_temp[i][3] = det[i][3]

                            cropped_temp = img[bb_temp[i][1]:bb_temp[i][3],
                                               bb_temp[i][0]:bb_temp[i][2], :]

                            try:
                                scaled_temp = misc.imresize(
                                    cropped_temp, (config.mtcnn.image_size,
                                                   config.mtcnn.image_size),
                                    interp='bilinear')
                            except (IOError, ValueError, IndexError) as e:
                                continue

                            nrof_successfully_aligned += 1

                            if nrof_faces == 1:
                                misc.imsave(output_filename, scaled_temp)
                            else:
                                output_bodyname = output_filename.split('.')[0]
                                output_indexname = '_%d' % i
                                misc.imsave(
                                    output_bodyname + output_indexname +
                                    '.png', scaled_temp)
                    else:
                        print('Unable to align "%s"' % image_path)

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
def main(config):
    
    print('Creating networks and loading parameters')
    
    graph_detect = tf.Graph()
    with graph_detect.as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            detector = MTCNN(config,sess)
            
            load_model(config.lfw.valid_model_path)
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            
            classifier_filename = config.classifier_path
            classifier_filename_exp = os.path.expanduser(classifier_filename)
            print('load classifier file-> %s' % classifier_filename_exp)
            with open(classifier_filename_exp, 'rb') as infile:
                (model, class_names) = pickle.load(infile)
                
            HumanNames = os.listdir(config.input_dir)
            
            video_capture = cv2.VideoCapture(0)
            prevTime = 0
            c = 0
            while True:
                ret, frame = video_capture.read()
                frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    # resize frame (optional), 
                curTime = time.time()    # calc fps
                timeF = config.mtcnn.frame_interval
                if (c % timeF == 0):
                    if frame.ndim == 2:
                        frame = to_rgb(frame)
                    frame = frame[:, :, 0:3]
                    bounding_boxes, _ = detector.detect_face(frame)
                    nrof_faces = bounding_boxes.shape[0]
                    
                    print('Detected_FaceNum: %d' % nrof_faces)
                    
                    if nrof_faces > 0:
                        det = bounding_boxes[:, 0:4]
                        cropped = []
                        scaled = []
                        scaled_reshape = []
                        bb = np.zeros((nrof_faces,4), dtype=np.int32)
                        for i in range(nrof_faces):
                            emb_array = np.zeros((1, embedding_size))
                            
                            bb[i][0] = det[i][0]
                            bb[i][1] = det[i][1]
                            bb[i][2] = det[i][2]
                            bb[i][3] = det[i][3]
                            
                            # inner exception
                            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                                print('face is inner of range!')
                                continue
                            cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                            cropped[i] = flip(cropped[i], False)
                            scaled.append(misc.imresize(cropped[i], (182, 182), interp='bilinear'))
                            scaled[i] = cv2.resize(scaled[i], (160,160),interpolation=cv2.INTER_CUBIC)
                            scaled[i] = prewhiten(scaled[i])
                            scaled_reshape.append(scaled[i].reshape(-1,160,160,3))
                            feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                            emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                            predictions = model.predict_proba(emb_array)
                            best_class_indices = np.argmax(predictions, axis=1)
#                            best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
#                            print "best_class_probabilities:", best_class_probabilities
                            cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) 
                            
                            text_x = bb[i][0]
                            text_y = bb[i][3] + 20     
                            print('result: ', best_class_indices[0])
                            for H_i in HumanNames:
                                if HumanNames[best_class_indices[0]] == H_i:
                                    result_names = HumanNames[best_class_indices[0]]
                                    cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                                1, (0, 0, 255), thickness=1, lineType=2)
                    else:
                        print('Unable to align')
                sec = curTime - prevTime
                prevTime = curTime
                fps = 1 / (sec)
                string = 'FPS: %2.3f' % fps
                text_fps_x = len(frame[0]) - 150
                text_fps_y = 20
                cv2.putText(frame, string, (text_fps_x, text_fps_y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2)
                c += 1
                cv2.imshow('Video', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            video_capture.release()
            cv2.destroyAllWindows()