Ejemplo n.º 1
0
def main(args):

    image_size = 48
    save_dir = str(image_size)
    anno_file = 'wider_face_train.txt'
    im_dir = 'WIDER_train/images/'
    neg_save_dir = save_dir+'/negative'
    pos_save_dir = save_dir+'/positive'
    part_save_dir = save_dir+'/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir+'/pos_48.txt', 'w')
    f2 = open(save_dir+'/neg_48.txt', 'w')
    f3 = open(save_dir+'/part_48.txt', 'w')
    threshold = [0.6, 0.6]
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file_pnet = args.pnet_model
        model_file_rnet = args.rnet_model
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            config.gpu_options.allow_growth = True
            with tf.Session(config=config) as sess:
                image_pnet = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image_pnet}, mode='test')
                out_tensor_pnet = pnet.get_all_output()
                image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                rnet = RNet({'data': image_rnet}, mode='test')
                out_tensor_rnet = rnet.get_all_output()

                saver_pnet = tf.train.Saver([v for v in tf.global_variables()
                                             if v.name[0:4] == 'pnet'])
                saver_rnet = tf.train.Saver([v for v in tf.global_variables()
                                             if v.name[0:4] == 'rnet'])
                saver_pnet.restore(sess, model_file_pnet)
                saver_rnet.restore(sess, model_file_rnet)

                def pnet_fun(img): return sess.run(
                    out_tensor_pnet, feed_dict={image_pnet: img})

                def rnet_fun(img): return sess.run(
                    out_tensor_rnet, feed_dict={image_rnet: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    #img_path = im_dir + annotation[0] + '.jpg'
                    img_path = annotation[0]
                    img = cv2.imread(img_path)
                    rectangles = detect_face_24net(img, minsize,
                                                   pnet_fun, rnet_fun,
                                                   threshold, factor)
                    image_idx += 1
                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top: y_bottom+1, x_left: x_right+1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('%s/negative/%s' %
                                     (image_size, n_idx) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(pos_save_dir,
                                                         '%s.jpg' % p_idx)
                                f1.write('%s/positive/%s' %
                                         (image_size, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(part_save_dir,
                                                         '%s.jpg' % d_idx)
                                f3.write('%s/part/%s' % (image_size, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
Ejemplo n.º 2
0
def main(args):

    img = cv2.imread(args.image_path)
    file_paths = get_model_filenames(args.model_dir)
    count = 0 
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(
                        tf.float32, [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "pnet/"])
                    saver_rnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "rnet/"])
                    saver_onet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "onet/"])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img): return sess.run(
                        out_tensor_pnet, feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img): return sess.run(
                        out_tensor_rnet, feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img): return sess.run(
                        out_tensor_onet, feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img): return sess.run(
                        ('softmax/Reshape_1:0',
                         'pnet/conv4-2/BiasAdd:0'),
                        feed_dict={
                            'Placeholder:0': img})

                    def rnet_fun(img): return sess.run(
                        ('softmax_1/softmax:0',
                         'rnet/conv5-2/rnet/conv5-2:0'),
                        feed_dict={
                            'Placeholder_1:0': img})

                    def onet_fun(img): return sess.run(
                        ('softmax_2/softmax:0',
                         'onet/conv6-2/onet/conv6-2:0',
                         'onet/conv6-3/onet/conv6-3:0'),
                        feed_dict={
                            'Placeholder_2:0': img})

                start_time = time.time()
                rectangles, points = detect_face(img, args.minsize,
                                                 pnet_fun, rnet_fun, onet_fun,
                                                 args.threshold, args.factor)
                duration = time.time() - start_time

                points = np.transpose(points)
                for rectangle in rectangles:
                    cv2.putText(img, str(rectangle[4]),
                                (int(rectangle[0]), int(rectangle[1])),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                0.5, (0, 255, 0))
                    cv2.rectangle(img, (int(rectangle[0]), int(rectangle[1])),
                                  (int(rectangle[2]), int(rectangle[3])),
                                  (255, 0, 0), 1)
                    count+=1
                for point in points:
                    for i in range(0, 10, 2):
                        cv2.circle(img, (int(point[i]), int(
                            point[i + 1])), 2, (0, 255, 0))
                print(duration)
                print(type(rectangles))
                print(args.image_path)
                print(count)
                print(np.int_(rectangles))
                data = [args.image_path, "\n", str(count), "\n", str(np.int_(rectangles)), "\n"]
                file = open(args.save_file,"a+")
                file.writelines(data)
                cv2.imshow("test", img)
                if args.save_image:
                    cv2.imwrite(args.save_name, img)
                if cv2.waitKey(0) & 0xFF == ord('q'):
                    cv2.destroyAllWindows()
Ejemplo n.º 3
0
def detect_frame(capture_count, img, file_paths, minsize, threshold, factor,
                 save_path):
    output_dir_img = './datasets/mtcnn_160_face/img/'
    if not os.path.exists(output_dir_img):
        os.makedirs(output_dir_img)
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            with tf.Session(config=config) as sess:
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                random_key = np.random.randint(0, high=99999)
                output_dir_bbox = './datasets/mtcnn_160_face/bbox/'
                if not os.path.exists(output_dir_bbox):
                    os.makedirs(output_dir_bbox)
                bounding_boxes_filename = os.path.join(
                    output_dir_bbox, 'bounding_boxes_%05d.txt' % random_key)

                with open(bounding_boxes_filename, "w") as text_file:
                    start_time = time.time()
                    rectangles, points = detect_face(img, minsize, pnet_fun,
                                                     rnet_fun, onet_fun,
                                                     threshold, factor)
                    duration = time.time() - start_time

                    print("detect time:", duration)

                    nrof_faces = rectangles.shape[0]
                    if nrof_faces > 0:
                        det = rectangles[:, 0:4]
                        det_arr = []
                        img_size = np.asarray(img.shape)[0:2]
                        if nrof_faces > 1:
                            for i in range(nrof_faces):
                                det_arr.append(np.squeeze(det[i]))
                        else:
                            det_arr.append(np.squeeze(det))

                        for i, det in enumerate(det_arr):
                            output_filename = "{}{}{}{}{}".format(
                                output_dir_img, capture_count, '_', i, '.jpg')
                            det = np.squeeze(det)
                            bb = np.zeros(4, dtype=np.int32)
                            bb[0] = np.maximum(det[0] - 32 / 2, 0)
                            bb[1] = np.maximum(det[1] - 32 / 2, 0)
                            bb[2] = np.minimum(det[2] + 32 / 2, img_size[1])
                            bb[3] = np.minimum(det[3] + 32 / 2, img_size[0])
                            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                            scaled = misc.imresize(cropped, (160, 160),
                                                   interp='bilinear')
                            scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2RGB)
                            misc.imsave(output_filename, scaled)
                            text_file.write(
                                '%s %d %d %d %d\n' %
                                (output_filename, bb[0], bb[1], bb[2], bb[3]))
                    else:
                        print('NO FACE in capture %d' % (capture_count))
                        text_file.write('%s\n' % (output_dir_img))

                points = np.transpose(points)
                for rectangle in rectangles:
                    cv2.putText(img, str(rectangle[4]),
                                (int(rectangle[0]), int(rectangle[1])),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
                    cv2.rectangle(img, (int(rectangle[0]), int(rectangle[1])),
                                  (int(rectangle[2]), int(rectangle[3])),
                                  (255, 0, 0), 2)
                for point in points:
                    for i in range(0, 10, 2):
                        cv2.circle(img, (int(point[i]), int(point[i + 1])),
                                   4, (255, 0, 255),
                                   thickness=2)
                cv2.imwrite(save_path + str(capture_count) + '.jpg', img)

    return rectangles
def main(args):

    detect_totalTime = 0.0
    frameCount = 0

    # Does there need store result images or not
    # If yes, check the directory which store result is existed or not
    # If the directory is existed, delete the directory recursively then recreate the directory.
    if args.save_image:
        output_directory = args.save_image
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        os.mkdir(output_directory)
        fw = open(os.path.join(output_directory, args.save_bbox_coordinates + '_dets.txt'), 'w')

    # Create 
    # The steps are similiar to "store result images" above.
    if args.save_camera_images is not False:
        source_directory = args.save_camera_images
        if os.path.exists(source_directory):
            shutil.rmtree(source_directory)
        os.mkdir(source_directory)

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)
                print(file_paths, len(file_paths))

                # The if else statement is to check which type of model user used.
                # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models.
                # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate".
                # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory. 
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(
                        tf.float32, [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "pnet/"])
                    saver_rnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "rnet/"])
                    saver_onet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "onet/"])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img): return sess.run(
                        out_tensor_pnet, feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img): return sess.run(
                        out_tensor_rnet, feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img): return sess.run(
                        out_tensor_onet, feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img): return sess.run(
                        ('softmax/Reshape_1:0',
                        'pnet/conv4-2/BiasAdd:0'),
                        feed_dict={
                            'Placeholder:0': img})

                    def rnet_fun(img): return sess.run(
                        ('softmax_1/softmax:0',
                        'rnet/conv5-2/rnet/conv5-2:0'),
                        feed_dict={
                            'Placeholder_1:0': img})

                    def onet_fun(img): return sess.run(
                        ('softmax_2/softmax:0',
                        'onet/conv6-2/onet/conv6-2:0',
                        'onet/conv6-3/onet/conv6-3:0'),
                        feed_dict={
                            'Placeholder_2:0': img})

                video_capture = cv2.VideoCapture(0)
                print(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH), video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))

                if video_capture.isOpened() == False:
                    print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.")

                else:

                    print(video_capture.get(cv2.CAP_PROP_FPS))

                    while True:

                        ret, frame = video_capture.read()
                        original_img = frame.copy()

                        if ret:

                            width  = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)*args.resize)
                            height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)*args.resize)
                            resized_image = cv2.resize(frame, (width, height))

                            start_time = time.time()*1000

                            # P-Net + R-Net + O-Net
                            if args.net == "ALL":
                                rectangles, points = detect_face(resized_image, args.minsize,
                                                                pnet_fun, rnet_fun, onet_fun,
                                                                args.threshold, args.factor)

                            # P-Net + R-Net without faces' landmarks
                            elif args.net == "PR":
                                rectangles = detect_face_24net(resized_image, args.minsize, 
                                                                pnet_fun, rnet_fun,
                                                                args.threshold, args.factor)

                            # Only P-Net
                            elif args.net == "P":
                                rectangles = detect_face_12net(resized_image, args.minsize,
                                                                pnet_fun, args.threshold, args.factor)

                            else:
                                print("ERROR: WRONG NET INPUT")

                            end_time = time.time()*1000
                            detect_totalTime = detect_totalTime + (end_time - start_time)

                            if args.net == "ALL":
                                points = np.transpose(points) # The outputs of O-Net which are faces' landmarks
                            else:
                                points = None # the others 

                            add_overlays(frame, rectangles, points, 1000/(end_time - start_time), 1/args.resize, 1/args.resize)
                            cv2.imshow("MTCNN-Tensorflow wangbm", frame)

                            print("ID: {:d}, cost time: {:.1f}ms".format(frameCount, (end_time - start_time))) s

                            if points is not None:
                                for point in points:
                                    for i in range(0, 10, 2):
                                        point[i]   = point[i] * (1/args.resize)
                                        point[i+1] = point[i+1] * (1/args.resize)
                                        print("\tID: {:d}, face landmarks x = {:.1f}, y = {:.1f}".format(int(i/2+1), point[i], point[i+1]))

                            if args.save_image:
                                outputFilePath = os.path.join(output_directory, str(frameCount) + ".jpg")
                                cv2.imwrite(outputFilePath, frame)
                                for rectangle in rectangles:
                                    fw.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(str(frameCount), rectangle[4], rectangle[0], rectangle[1], rectangle[2], rectangle[3]))
                                fw.close()

                            if args.save_camera_images:
                                sourceFilePath = os.path.join(source_directory, str(frameCount) + ".jpg")
                                cv2.imwrite(sourceFilePath, original_img)

                            frameCount = frameCount + 1

                            if cv2.waitKey(1) & 0xFF == ord('q'):
                                cv2.destroyAllWindows()
                                break

                    video_capture.release()
                    detect_average_time = detect_totalTime/frameCount
                    print("*" * 50)
                    print("detection average time: " + str(detect_average_time) + "ms" )
                    print("detection fps: " + str(1000/detect_average_time))
def main(args):

    image_size = 24
    save_dir = str(image_size)
    anno_file = 'wider_face_train.txt'
    im_dir = 'WIDER_train/images/'

    neg_save_dir = save_dir+'/negative'
    pos_save_dir = save_dir+'/positive'
    part_save_dir = save_dir+'/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir+'/pos_24.txt', 'w')
    f2 = open(save_dir+'/neg_24.txt', 'w')
    f3 = open(save_dir+'/part_24.txt', 'w')
    threshold = 0.6
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file = args.pnet_model
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_file)

                def pnet_fun(img): return sess.run(
                    out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = im_dir + annotation[0] + '.jpg'
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top: y_bottom+1, x_left: x_right+1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('%s/negative/%s' %
                                     (save_dir, n_idx) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(pos_save_dir,
                                                         '%s.jpg' % p_idx)
                                f1.write('%s/positive/%s' % (save_dir, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(part_save_dir,
                                                         '%s.jpg' % d_idx)
                                f3.write('%s/part/%s' % (save_dir, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def main(args):

    img = cv2.imread(args.image_path)
    print("\n"+"LOCATION!!!image get"+"\n")
    file_paths = get_model_filenames(args.model_dir)
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:
                print("\n"+"LOCATION!!!tf config done"+"\n")
                if len(file_paths) == 3:
                    print("\n"+"LOCATION!!!file_paths(model_dir)=3"+"\n")
                    image_pnet = tf.placeholder(
                        tf.float32, [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()
                    
                    print("\n"+"LOCATION!!!placeholder and out_tensor done"+"\n")

                    saver_pnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "pnet/"])
                    saver_rnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "rnet/"])
                    saver_onet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                     if v.name[0:5] == "onet/"])

                    print("\n"+"LOCATION!!!saver done"+"\n")
					
					
					saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img): return sess.run(
                        out_tensor_pnet, feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img): return sess.run(
                        out_tensor_rnet, feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img): return sess.run(
                        out_tensor_onet, feed_dict={image_onet: img})
                    print("\n"+"LOCATION!!!def net_fun done"+"\n")

                else:
                    print("LOCATION!!!ifile_paths(model_dir)!=3"+"\n")
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img): return sess.run(
                        ('softmax/Reshape_1:0',
                         'pnet/conv4-2/BiasAdd:0'),
                        feed_dict={
                            'Placeholder:0': img})

                    def rnet_fun(img): return sess.run(
                        ('softmax_1/softmax:0',
                         'rnet/conv5-2/rnet/conv5-2:0'),
                        feed_dict={
                            'Placeholder_1:0': img})

                    def onet_fun(img): return sess.run(
                        ('softmax_2/softmax:0',
                         'onet/conv6-2/onet/conv6-2:0',
                         'onet/conv6-3/onet/conv6-3:0'),
                        feed_dict={
                            'Placeholder_2:0': img})
Ejemplo n.º 7
0
def detect_frame(dist_thre, capture_count, nrof_successfully_aligned, img,
                 img_list, emb_list, file_paths, minsize, threshold, factor,
                 save_path):
    output_dir_img = './datasets/mtcnn_160/img/'
    if not os.path.exists(output_dir_img):
        os.makedirs(output_dir_img)
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            with tf.Session(config=config) as sess:
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                # Add a random key to the filename to allow alignment using multiple processes
                random_key = np.random.randint(0, high=99999)
                output_dir_bbox = './datasets/mtcnn_160/bbox/'
                if not os.path.exists(output_dir_bbox):
                    os.makedirs(output_dir_bbox)
                bounding_boxes_filename = os.path.join(
                    output_dir_bbox, 'bounding_boxes_%05d.txt' % random_key)

                with open(bounding_boxes_filename, "w") as text_file:
                    start_time = time.time()
                    rectangles, points = detect_face(img, minsize, pnet_fun,
                                                     rnet_fun, onet_fun,
                                                     threshold, factor)
                    duration = time.time() - start_time

                    print("detect time:", duration)
                    print(type(rectangles))

                    nrof_faces = rectangles.shape[0]
                    if nrof_faces > 0:
                        facenet.load_model(
                            '20180408-102900/20180408-102900.pb')
                        #                         facenet.load_model('20190218-164145/20190218-164145.pb')
                        image_placeholder = tf.get_default_graph(
                        ).get_tensor_by_name("input:0")
                        embeddings = tf.get_default_graph().get_tensor_by_name(
                            "embeddings:0")
                        phase_train_placeholder = tf.get_default_graph(
                        ).get_tensor_by_name("phase_train:0")
                        embedding_size = embeddings.get_shape()[1]

                        det = rectangles[:, 0:4]
                        det_arr = []
                        img_size = np.asarray(img.shape)[0:2]
                        if nrof_faces > 1:
                            for i in range(nrof_faces):
                                det_arr.append(np.squeeze(det[i]))
                        else:
                            det_arr.append(np.squeeze(det))

                        for i, det in enumerate(det_arr):
                            output_filename = "{}{}{}".format(
                                output_dir_img, capture_count, '.png')
                            det = np.squeeze(det)
                            bb = np.zeros(4, dtype=np.int32)
                            bb[0] = np.maximum(det[0] - 32 / 2, 0)
                            bb[1] = np.maximum(det[1] - 32 / 2, 0)
                            bb[2] = np.minimum(det[2] + 32 / 2, img_size[1])
                            bb[3] = np.minimum(det[3] + 32 / 2, img_size[0])
                            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                            scaled = misc.imresize(cropped, (160, 160),
                                                   interp='bilinear')
                            scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2RGB)
                            image = facenet.prewhiten(scaled)
                            image_reshaped = image.reshape(-1, 160, 160, 3)
                            emb_temp = np.zeros((1, embedding_size))
                            emb_temp[0, :] = sess.run(
                                embeddings,
                                feed_dict={
                                    image_placeholder: image_reshaped,
                                    phase_train_placeholder: False
                                })[0]

                            if len(os.listdir(output_dir_img)) == 0:
                                nrof_successfully_aligned += 1
                                output_peoplename = "{}{}{}".format(
                                    output_dir_img, nrof_successfully_aligned,
                                    '.png')
                                misc.imsave(output_peoplename, scaled)
                                print("\n save new.")
                                img_list.append(image_reshaped)
                                emb_list.append(emb_temp[0, :])
                            else:
                                x = len(os.listdir(output_dir_img))
                                is_exist = False
                                print(i + 1, 'face in capture', capture_count,
                                      ':')
                                for k in range(x):
                                    dist = np.sqrt(
                                        np.sum(
                                            np.square(
                                                np.subtract(
                                                    emb_temp[0, :],
                                                    emb_list[k]))))
                                    print(' %1.4f  ' % dist, end='')
                                    if (dist < dist_thre and dist > 0):
                                        print("\n already existed.")
                                        is_exist = True
                                        break

                                if not is_exist:
                                    nrof_successfully_aligned += 1
                                    output_peoplename = "{}{}{}".format(
                                        output_dir_img,
                                        nrof_successfully_aligned, '.png')
                                    misc.imsave(output_peoplename, scaled)
                                    print("\n save new.")
                                    emb_list.append(emb_temp[0, :])
                                    img_list.append(image_reshaped)

                            text_file.write(
                                '%s %d %d %d %d\n' %
                                (output_filename, bb[0], bb[1], bb[2], bb[3]))
                    else:
                        print('NO FACE in capture %d' % (capture_count))
                        text_file.write('%s\n' % (output_dir_img))

            points = np.transpose(points)
            for rectangle in rectangles:
                cv2.putText(img, str(rectangle[4]),
                            (int(rectangle[0]), int(rectangle[1])),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
                cv2.rectangle(img, (int(rectangle[0]), int(rectangle[1])),
                              (int(rectangle[2]), int(rectangle[3])),
                              (255, 0, 0), 2)
            for point in points:
                for i in range(0, 10, 2):
                    cv2.circle(img, (int(point[i]), int(point[i + 1])),
                               4, (255, 0, 255),
                               thickness=2)
            cv2.imwrite(save_path + str(capture_count) + '.jpg', img)


#             if cv2.waitKey(0) & 0xFF == ord('q'):
#                 cv2.destroyAllWindows()
    return rectangles, nrof_successfully_aligned, img_list, emb_list
Ejemplo n.º 8
0
def main(args):

    detect_totalTime = 0.0
    frameCount = 0

    # Does there need store result images or not
    # If yes, check the directory which store result is existed or not
    # If the directory is existed, delete the directory recursively then recreate the directory.
    if args.save_image:
        output_directory = args.save_image
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        os.mkdir(output_directory)
        fw = open(
            os.path.join(output_directory,
                         args.save_bbox_coordinates + '_dets.txt'), 'w')

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)

                # The if else statement is to check which type of model user used.
                # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models.
                # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate".
                # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory.
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                for filename in os.listdir(args.image_path):

                    img = cv2.imread(os.path.join(args.image_path, filename))
                    height, width, _ = img.shape
                    orginal_img = img.copy()
                    width = int(width * args.resize)
                    height = int(height * args.resize)
                    resized_image = cv2.resize(img, (width, height))

                    start_time = time.time() * 1000

                    # P-Net + R-Net + O-Net
                    if args.net == "ALL":
                        rectangles, points = detect_face(
                            resized_image, args.minsize, pnet_fun, rnet_fun,
                            onet_fun, args.threshold, args.factor)

                    # P-Net + R-Net without faces' landmarks
                    elif args.net == "PR":
                        rectangles = detect_face_24net(resized_image,
                                                       args.minsize, pnet_fun,
                                                       rnet_fun,
                                                       args.threshold,
                                                       args.factor)

                    # Only P-Net
                    elif args.net == "P":
                        rectangles = detect_face_12net(resized_image,
                                                       args.minsize, pnet_fun,
                                                       args.threshold,
                                                       args.factor)

                    else:
                        print("ERROR: WRONG NET INPUT")

                    end_time = time.time() * 1000
                    detect_totalTime = detect_totalTime + (end_time -
                                                           start_time)

                    print(
                        str(frameCount) + " time : " +
                        str(end_time - start_time) + "ms")

                    if args.net == "ALL":
                        points = np.transpose(
                            points
                        )  # The outputs of O-Net which are faces' landmarks
                    else:
                        points = None  # the others

                    add_overlays(img, rectangles, points,
                                 1000 / (end_time - start_time),
                                 1 / args.resize, 1 / args.resize)
                    cv2.imshow("MTCNN-Tenssorflow wangbm", img)
                    frameCount = frameCount + 1

                    if args.save_image:
                        outputFilePath = os.path.join(output_directory,
                                                      filename)
                        cv2.imwrite(outputFilePath, img)
                        for rectangle in rectangles:
                            fw.write(
                                '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(filename[:-4], rectangle[4],
                                       rectangle[0], rectangle[1],
                                       rectangle[2], rectangle[3]))

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        cv2.destroyAllWindows()
                        break
                if args.save_image:
                    fw.close()
    detect_average_time = detect_totalTime / frameCount
    print("*" * 50)
    print("detection average time: " + str(detect_average_time) + "ms")
    print("detection fps: " + str(1000 / detect_average_time))
def main(args):

    detect_totalTime = 0.0
    totalTime = 0.0
    frameCount = 0

    if args.save_image:
        output_directory = args.save_path
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        else:
            os.mkdir(output_directory)

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)

                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                # for filename in os.listdir(args.image_path):

                video_capture = cv2.VideoCapture(0)

                if video_capture.isOpened() == False:
                    print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.")

                else:

                    video_capture.set(3, 1280)
                    video_capture.set(4, 720)

                    while True:

                        ret, frame = video_capture.read()

                        if ret:

                            resized_image = cv2.resize(frame, (640, 360))
                            # resized_image = cv2.resize(frame, (640, 480))

                            start_time = time.time() * 1000

                            # P-Net + R-Net + O-Net
                            if args.net == "ALL":
                                rectangles, points = detect_face(
                                    resized_image, args.minsize, pnet_fun,
                                    rnet_fun, onet_fun, args.threshold,
                                    args.factor)

                            # P-Net + R-Net without faces' landmarks
                            elif args.net == "PR":
                                rectangles = detect_face_24net(
                                    resized_image, args.minsize, pnet_fun,
                                    rnet_fun, args.threshold, args.factor)

                            # Only P-Net
                            elif args.net == "P":
                                rectangles = detect_face_12net(
                                    resized_image, args.minsize, pnet_fun,
                                    args.threshold, args.factor)

                            else:
                                print("ERROR: WRONG NET INPUT")
                            end_time = time.time() * 1000
                            detect_totalTime = detect_totalTime + (end_time -
                                                                   start_time)

                            print(
                                str(frameCount) + " time : " +
                                str(end_time - start_time) + "ms")

                            # print(type(rectangles))
                            if args.net == "ALL":
                                points = np.transpose(
                                    points
                                )  # The outputs of O-Net which are faces' landmarks
                            for rectangle in rectangles:
                                cv2.putText(
                                    resized_image, str(rectangle[4]),
                                    (int(rectangle[0]), int(rectangle[1])),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
                                cv2.rectangle(
                                    resized_image,
                                    (int(rectangle[0]), int(rectangle[1])),
                                    (int(rectangle[2]), int(rectangle[3])),
                                    (255, 0, 0), 1)

                            if args.net == "ALL":
                                for point in points:
                                    for i in range(0, 10, 2):
                                        cv2.circle(
                                            resized_image,
                                            (int(point[i]), int(point[i + 1])),
                                            2, (0, 255, 0))
                            cv2.imshow("MTCNN-Tensorflow wangbm",
                                       resized_image)

                            if args.save_image:
                                outputFilePath = os.path.join(
                                    output_directory,
                                    str(frameCount) + ".jpg")
                                cv2.imwrite(outputFilePath, resized_image)
                            if cv2.waitKey(1) & 0xFF == ord('q'):
                                cv2.destroyAllWindows()
                                break

                            frameCount = frameCount + 1

                    video_capture.release()

                    detect_average_time = detect_totalTime / frameCount
                    print("detection average time: " +
                          str(detect_average_time) + "ms")
                    print("detection fps: " +
                          str(1 / (detect_average_time / 1000)))
def main(annotation_fp, image_dir, model_fp, output_dir):
    image_size = 24
    save_dir = os.path.join(output_dir, str(image_size))

    neg_save_dir = save_dir + '/negative'
    pos_save_dir = save_dir + '/positive'
    part_save_dir = save_dir + '/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir + '/pos_24.txt', 'w')
    f2 = open(save_dir + '/neg_24.txt', 'w')
    f3 = open(save_dir + '/part_24.txt', 'w')
    threshold = 0.6
    with open(annotation_fp, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_fp)

                def pnet_fun(img):
                    return sess.run(out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:5]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = os.path.join(image_dir, annotation[0])
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        iou = IoU(box, gts)
                        cropped_im = img[y_top:y_bottom + 1,
                                         x_left:x_right + 1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(iou) < 0.3:
                            filename = str(n_idx) + '.jpg'
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir, filename)
                            f2.write(
                                os.path.join(neg_save_dir, filename) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(iou) >= 0.65:
                                filename = str(p_idx) + '.jpg'
                                save_file = os.path.join(
                                    pos_save_dir, filename)
                                f1.write(
                                    os.path.join(pos_save_dir, filename) +
                                    ' 1 %.2f %.2f %.2f %.2f\n' %
                                    (offset_x1, offset_y1, offset_x2,
                                     offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(iou) >= 0.4:
                                filename = str(d_idx) + '.jpg'
                                save_file = os.path.join(
                                    part_save_dir, filename)
                                f3.write(
                                    os.path.join(part_save_dir, filename) +
                                    ' -1 %.2f %.2f %.2f %.2f\n' %
                                    (offset_x1, offset_y1, offset_x2,
                                     offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
Ejemplo n.º 11
0
def main(args):

    file_paths = get_model_filenames(args.model_dir)

    #get image lise
    jpg_list = glob.glob(r'mAP/images/*.jpg')
    if len(jpg_list) == 0:
        print("Error: no .jpg files found in ground-truth")

    with tf.device('/gpu:2'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:
                print("LOCATION!!!tf config done" + "\n")
                if len(file_paths) == 3:
                    print("LOCATION!!!file_paths(model_dir)=3" + "\n")
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    print("LOCATION!!!placeholder and out_tensor done" + "\n")

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    print("LOCATION!!!saver done" + "\n")

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                    print("LOCATION!!!def net_fun done" + "\n")

                else:
                    print("LOCATION!!!ifile_paths(model_dir)!=3" + "\n")
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})


#                third_idxtry=[110,120]
#                for third_idx in third_idxtry:

                ROI_idx = [0, 300, 40, 310]
                for tmp_file in jpg_list:
                    img = cv2.imread(tmp_file)
                    # add ROI region
                    ROI = img[ROI_idx[0]:ROI_idx[1], ROI_idx[2]:ROI_idx[3]]
                    ROI_temp = ROI.copy()
                    img[:, :, :] = 0
                    img[ROI_idx[0]:ROI_idx[1],
                        ROI_idx[2]:ROI_idx[3]] = ROI_temp
                    #create txt file
                    tmp_file = tmp_file.replace("jpg", "txt")
                    txt_filename = tmp_file.replace("images", "predicted")
                    print("LOACTION!!!predict:" + tmp_file)

                    #                    start_time = time.time()
                    #print("LOCATION!!!detect_face function start"+"\n")
                    rectangles, points = detect_face(img, args.minsize,
                                                     pnet_fun, rnet_fun,
                                                     onet_fun, args.threshold,
                                                     args.factor)
                    #print("LOCATION!!!idetect_face function done"+"\n")
                    #                    duration = time.time() - start_time

                    #                    print("duration:"+str(duration))
                    #print(type(rectangles))
                    points = np.transpose(points)
                    #print("LOCATION!!!loop rectangles"+"\n")
                    with open(txt_filename, 'w') as result_file:
                        for rectangle in rectangles:
                            result_file.write("head" + " " +
                                              str(rectangle[4]) + " " +
                                              str(rectangle[0]) + " " +
                                              str(rectangle[1]) + " " +
                                              str(rectangle[2]) + " " +
                                              str(rectangle[3]) + "\n")
                    #print("LOCATION!!!Write done!"+"\n")
                print(ROI_idx)
                os.chdir("mAP/")
                os.system("python main.py -na")
Ejemplo n.º 12
0
    def __init__(self):
        file_paths = get_model_filenames('save_model/all_in_one')
        with tf.Session() as sess:
            if len(file_paths) == 3:
                image_pnet = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image_pnet}, mode='test')
                out_tensor_pnet = pnet.get_all_output()

                image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                rnet = RNet({'data': image_rnet}, mode='test')
                out_tensor_rnet = rnet.get_all_output()

                image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                onet = ONet({'data': image_onet}, mode='test')
                out_tensor_onet = onet.get_all_output()

                saver_pnet = tf.train.Saver([
                    v for v in tf.global_variables() if v.name[0:5] == "pnet/"
                ])
                saver_rnet = tf.train.Saver([
                    v for v in tf.global_variables() if v.name[0:5] == "rnet/"
                ])
                saver_onet = tf.train.Saver([
                    v for v in tf.global_variables() if v.name[0:5] == "onet/"
                ])

                saver_pnet.restore(sess, file_paths[0])

                def pnet_fun(img):
                    return sess.run(out_tensor_pnet,
                                    feed_dict={image_pnet: img})

                saver_rnet.restore(sess, file_paths[1])

                def rnet_fun(img):
                    return sess.run(out_tensor_rnet,
                                    feed_dict={image_rnet: img})

                saver_onet.restore(sess, file_paths[2])

                def onet_fun(img):
                    return sess.run(out_tensor_onet,
                                    feed_dict={image_onet: img})

            else:
                saver = tf.train.import_meta_graph(file_paths[0])
                saver.restore(sess, file_paths[1])

                def pnet_fun(img):
                    return sess.run(
                        ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                        feed_dict={'Placeholder:0': img})

                def rnet_fun(img):
                    return sess.run(
                        ('softmax_1/softmax:0', 'rnet/conv5-2/rnet/conv5-2:0'),
                        feed_dict={'Placeholder_1:0': img})

                def onet_fun(img):
                    return sess.run(
                        ('softmax_2/softmax:0', 'onet/conv6-2/onet/conv6-2:0',
                         'onet/conv6-3/onet/conv6-3:0'),
                        feed_dict={'Placeholder_2:0': img})