Beispiel #1
0
def test_pnet(img, min_img_size, net_size, net):
    norm_img = (img.copy() - 127.5) / 127.5
    h, w, c = norm_img.shape
    scales = gen_scales(w, h, min_img_size, net_size)
    rects = []
    for scale in scales:
        sh = int(h * scale)
        sw = int(w * scale)
        scale_img = cv2.resize(norm_img, (sw, sh))
        scale_img = cv2.transpose(scale_img)
        scale_img = np.swapaxes(scale_img, 0, 2)
        net.blobs['data'].reshape(1, 3, sh, sw)
        net.blobs['data'].data[...] = scale_img
        print(scale_img.shape)
        print(scale_img)
        out = net.forward()
        conv1 = net.blobs["conv1"]
        label_prob = out[config.NET_OUTPUTS['pnet']['label']][0]
        bbox = out[config.NET_OUTPUTS['pnet']['bbox']][0]
        print(conv1.data.shape)
        print(conv1.data)
        out_h, out_w = label_prob[1].shape
        out_side = max(out_h, out_w)
        rect = tools.detect_face_12net(label_prob[1], bbox, out_side,
                                       1 / scale, w, h, 0.65)
        rects += rect
        break

    rects = tools.NMS(rects, 0.7, 'iou')
    return rects
def detectFace(img_path,threshold):
    img = cv2.imread(img_path)
    caffe_img = img.copy()-128
    origin_h,origin_w,ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h*scale)
        ws = int(origin_w*scale)
        scale_img = cv2.resize(caffe_img,(ws,hs))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_12.blobs['data'].reshape(1,3,ws,hs)
        net_12.blobs['data'].data[...]=scale_img
	caffe.set_device(0)
	caffe.set_mode_gpu()
	out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):    
        cls_prob = out[i]['cls_score'][0][1]
        roi      = out[i]['conv4-2'][0]
        out_h,out_w = cls_prob.shape
        out_side = max(out_h,out_w)
        rectangle = tools.detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0])
        rectangles.extend(rectangle)
    return rectangles
def detectFace(img_path, threshold):
    img = cv2.imread(img_path)
    caffe_img = img.copy() - 128
    origin_h, origin_w, ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(caffe_img, (ws, hs))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_12.blobs['data'].reshape(1, 3, ws, hs)
        net_12.blobs['data'].data[...] = scale_img
        # caffe.set_device(0)
        # caffe.set_mode_gpu()
        caffe.set_mode_cpu()
        out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i]['prob1'][0][1]
        roi = out[i]['conv4-2'][0]
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        rectangle = tools.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            threshold[0])
        rectangles.extend(rectangle)
    return rectangles
Beispiel #4
0
def detectFace(img_path, threshold):
    img = cv2.imread(img_path)
    caffe_img = img.copy() - 128
    origin_h, origin_w, ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(caffe_img, (ws, hs))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_12.blobs['data'].reshape(1, 3, ws, hs)
        net_12.blobs['data'].data[...] = scale_img
        caffe.set_device(0)
        caffe.set_mode_gpu()
        out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i]['prob1'][0][1]
        roi = out[i]['conv4-2'][0]
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        rectangle = tools.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            threshold[0])
        rectangles.extend(rectangle)
    if len(rectangles) == 0:
        return rectangles
    net_24.blobs['data'].reshape(len(rectangles), 3, 24, 24)
    crop_number = 0
    for rectangle in rectangles:
        crop_img = caffe_img[rectangle[1]:rectangle[3],
                             rectangle[0]:rectangle[2]]
        scale_img = cv2.resize(crop_img, (24, 24))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_24.blobs['data'].data[crop_number] = scale_img
        crop_number += 1
    out = net_24.forward()
    cls_prob = out['prob1']
    roi_prob = out['conv5-2']
    rectangles = tools.filter_face_24net(cls_prob, roi_prob, rectangles,
                                         origin_w, origin_h, threshold[1])
    return rectangles
Beispiel #5
0
def detectFace(img_path, threshold):
    fs = cv2.FileStorage(img_path, cv2.FileStorage_READ)
    img = fs.getNode('depth').mat()
    fs.release()

    if img is None:
        print("Fail to read XML file: " + depth_path)
        return None

    origin_h, origin_w = img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        net_12.blobs['data'].reshape(1, 1, hs, ws)
        net_12.blobs['data'].data[...] = preprocess(img, (ws, hs))
        out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i]['prob1'][0][1]
        # print "cls_prob.shape: ", cls_prob.shape
        roi = out[i]['conv4-2'][0]
        out_h, out_w = cls_prob.shape
        # print "out_h: ", out_h
        # print "out_w: ", out_w
        out_side = max(out_h, out_w)
        rectangle = tools.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            threshold)
        rectangles.extend(rectangle)

    rectangles = tools.NMS(rectangles, 0.7, 'iou')

    if len(rectangles) == 0:
        print "rect drop to 0 at 12net"
        return rectangles

    rectangles_new = []
    for rectangle in rectangles:
        rectangles_new.append(rectangular2square(rectangle, img))
    return rectangles_new
Beispiel #6
0
def detectFace(img_path, threshold):
    # img = cv2.imread(img_path)
    fs = cv2.FileStorage(img_path, cv2.FileStorage_READ)
    img = fs.getNode('depth').mat()
    fs.release()
    origin_h, origin_w= img.shape
    scales = tools.calculateScales(img)
    # scales = [0.1, 0.07]
    out = []
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        net_12.blobs['data'].reshape(1, 1, hs, ws)
        net_12.blobs['data'].data[...] = preprocess(img, (ws, hs))
        out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i]['prob1'][0][1]
        print "cls_prob.shape: ", cls_prob.shape
        roi = out[i]['conv4-2'][0]
        out_h, out_w = cls_prob.shape
        print "out_h: ", out_h
        print "out_w: ", out_w
        out_side = max(out_h, out_w)
        rectangle = tools.detect_face_12net(
            cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0])
        rectangles.extend(rectangle)

    rectangles = tools.NMS(rectangles, 0.7, 'iou')

    if len(rectangles) == 0:
        print "rect drop to 0 at 12net"
        return rectangles

    doc12 = open('/home/xingduan/YupengHan/inference/saving_docs/12/12doc.txt', 'w')
    for temp_rectangle in rectangles:
        doc12.write('%d %d %d %d %f\n' %(temp_rectangle[0], temp_rectangle[1], temp_rectangle[2], temp_rectangle[3], temp_rectangle[4]))
    

    # Here might be a problme
    net_24.blobs['data'].reshape(len(rectangles), 1, 24, 24)
    crop_number = 0
    for rectangle in rectangles:
        rectangle = rectangular2square(rectangle, img)
        crop_img = img[int(rectangle[1]):int(
            rectangle[3]), int(rectangle[0]):int(rectangle[2])]
        net_24.blobs['data'].data[crop_number] = preprocess(crop_img, (24, 24))
        crop_number += 1
    # Here might be a problme
    out = net_24.forward()
    cls_prob = out['prob1']
    roi_prob = out['ip_roi']
    rectangles = tools.filter_face_24net(
        cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1])
    doc24 = open('/home/xingduan/YupengHan/inference/saving_docs/24/24doc.txt', 'w')
    for temp_rectangle in rectangles:
        doc24.write('%d %d %d %d %f\n' %(temp_rectangle[0], temp_rectangle[1], temp_rectangle[2], temp_rectangle    [3], temp_rectangle[4]))
    if len(rectangles) == 0:
        print "rect drop to 0 at 24net"
        return rectangles
    net_48.blobs['data'].reshape(len(rectangles), 1, 48, 48)
    crop_number = 0
    for rectangle in rectangles:
        rectangle = rectangular2square(rectangle, img)
        crop_img = img[int(rectangle[1]):int(
            rectangle[3]), int(rectangle[0]):int(rectangle[2])]
        net_48.blobs['data'].data[crop_number] = preprocess(crop_img, (48, 48))
        crop_number += 1
    out = net_48.forward()
    cls_prob = out['prob1']
    roi_prob = out['ip_roi']
    rectangles = tools.filter_face_48net(
        cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[2])

    return rectangles
def main(args):

    image_size = 24
    save_dir = str(image_size)
    anno_file = 'wider_face_train.txt'
    im_dir = 'WIDER_train/images/'

    neg_save_dir = save_dir + '/negative'
    pos_save_dir = save_dir + '/positive'
    part_save_dir = save_dir + '/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir + '/pos_24.txt', 'w')
    f2 = open(save_dir + '/neg_24.txt', 'w')
    f3 = open(save_dir + '/part_24.txt', 'w')
    threshold = 0.6
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file = args.pnet_model
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_file)

                def pnet_fun(img):
                    return sess.run(out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = im_dir + annotation[0] + '.jpg'
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top:y_bottom + 1,
                                         x_left:x_right + 1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('%s/negative/%s' % (save_dir, n_idx) +
                                     ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(
                                    pos_save_dir, '%s.jpg' % p_idx)
                                f1.write('%s/positive/%s' % (save_dir, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1, offset_x2,
                                          offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(
                                    part_save_dir, '%s.jpg' % d_idx)
                                f3.write('%s/part/%s' % (save_dir, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1, offset_x2,
                                          offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def main(args):

    image_size = 24
    save_dir = str(image_size)
    anno_file = 'wider_face_train.txt'
    im_dir = 'WIDER_train/images/'

    neg_save_dir = save_dir+'/negative'
    pos_save_dir = save_dir+'/positive'
    part_save_dir = save_dir+'/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir+'/pos_24.txt', 'w')
    f2 = open(save_dir+'/neg_24.txt', 'w')
    f3 = open(save_dir+'/part_24.txt', 'w')
    threshold = 0.6
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file = args.pnet_model
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_file)

                def pnet_fun(img): return sess.run(
                    out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = im_dir + annotation[0] + '.jpg'
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top: y_bottom+1, x_left: x_right+1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('%s/negative/%s' %
                                     (save_dir, n_idx) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(pos_save_dir,
                                                         '%s.jpg' % p_idx)
                                f1.write('%s/positive/%s' % (save_dir, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(part_save_dir,
                                                         '%s.jpg' % d_idx)
                                f3.write('%s/part/%s' % (save_dir, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def main(args):

    detect_totalTime = 0.0
    frameCount = 0

    # Does there need store result images or not
    # If yes, check the directory which store result is existed or not
    # If the directory is existed, delete the directory recursively then recreate the directory.
    if args.save_image:
        output_directory = args.save_image
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        os.mkdir(output_directory)
        fw = open(os.path.join(output_directory, args.save_bbox_coordinates + '_dets.txt'), 'w')

    # Create 
    # The steps are similiar to "store result images" above.
    if args.save_camera_images is not False:
        source_directory = args.save_camera_images
        if os.path.exists(source_directory):
            shutil.rmtree(source_directory)
        os.mkdir(source_directory)

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)
                print(file_paths, len(file_paths))

                # The if else statement is to check which type of model user used.
                # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models.
                # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate".
                # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory. 
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(
                        tf.float32, [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "pnet/"])
                    saver_rnet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "rnet/"])
                    saver_onet = tf.train.Saver(
                                    [v for v in tf.global_variables()
                                    if v.name[0:5] == "onet/"])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img): return sess.run(
                        out_tensor_pnet, feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img): return sess.run(
                        out_tensor_rnet, feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img): return sess.run(
                        out_tensor_onet, feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img): return sess.run(
                        ('softmax/Reshape_1:0',
                        'pnet/conv4-2/BiasAdd:0'),
                        feed_dict={
                            'Placeholder:0': img})

                    def rnet_fun(img): return sess.run(
                        ('softmax_1/softmax:0',
                        'rnet/conv5-2/rnet/conv5-2:0'),
                        feed_dict={
                            'Placeholder_1:0': img})

                    def onet_fun(img): return sess.run(
                        ('softmax_2/softmax:0',
                        'onet/conv6-2/onet/conv6-2:0',
                        'onet/conv6-3/onet/conv6-3:0'),
                        feed_dict={
                            'Placeholder_2:0': img})

                video_capture = cv2.VideoCapture(0)
                print(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH), video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))

                if video_capture.isOpened() == False:
                    print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.")

                else:

                    print(video_capture.get(cv2.CAP_PROP_FPS))

                    while True:

                        ret, frame = video_capture.read()
                        original_img = frame.copy()

                        if ret:

                            width  = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)*args.resize)
                            height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)*args.resize)
                            resized_image = cv2.resize(frame, (width, height))

                            start_time = time.time()*1000

                            # P-Net + R-Net + O-Net
                            if args.net == "ALL":
                                rectangles, points = detect_face(resized_image, args.minsize,
                                                                pnet_fun, rnet_fun, onet_fun,
                                                                args.threshold, args.factor)

                            # P-Net + R-Net without faces' landmarks
                            elif args.net == "PR":
                                rectangles = detect_face_24net(resized_image, args.minsize, 
                                                                pnet_fun, rnet_fun,
                                                                args.threshold, args.factor)

                            # Only P-Net
                            elif args.net == "P":
                                rectangles = detect_face_12net(resized_image, args.minsize,
                                                                pnet_fun, args.threshold, args.factor)

                            else:
                                print("ERROR: WRONG NET INPUT")

                            end_time = time.time()*1000
                            detect_totalTime = detect_totalTime + (end_time - start_time)

                            if args.net == "ALL":
                                points = np.transpose(points) # The outputs of O-Net which are faces' landmarks
                            else:
                                points = None # the others 

                            add_overlays(frame, rectangles, points, 1000/(end_time - start_time), 1/args.resize, 1/args.resize)
                            cv2.imshow("MTCNN-Tensorflow wangbm", frame)

                            print("ID: {:d}, cost time: {:.1f}ms".format(frameCount, (end_time - start_time))) s

                            if points is not None:
                                for point in points:
                                    for i in range(0, 10, 2):
                                        point[i]   = point[i] * (1/args.resize)
                                        point[i+1] = point[i+1] * (1/args.resize)
                                        print("\tID: {:d}, face landmarks x = {:.1f}, y = {:.1f}".format(int(i/2+1), point[i], point[i+1]))

                            if args.save_image:
                                outputFilePath = os.path.join(output_directory, str(frameCount) + ".jpg")
                                cv2.imwrite(outputFilePath, frame)
                                for rectangle in rectangles:
                                    fw.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(str(frameCount), rectangle[4], rectangle[0], rectangle[1], rectangle[2], rectangle[3]))
                                fw.close()

                            if args.save_camera_images:
                                sourceFilePath = os.path.join(source_directory, str(frameCount) + ".jpg")
                                cv2.imwrite(sourceFilePath, original_img)

                            frameCount = frameCount + 1

                            if cv2.waitKey(1) & 0xFF == ord('q'):
                                cv2.destroyAllWindows()
                                break

                    video_capture.release()
                    detect_average_time = detect_totalTime/frameCount
                    print("*" * 50)
                    print("detection average time: " + str(detect_average_time) + "ms" )
                    print("detection fps: " + str(1000/detect_average_time))
Beispiel #10
0
def detectFace(img, threshold):

    caffe_img = (img.copy() - 127.5) / 127.5
    origin_h, origin_w, ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    t0 = time.time()
    # del scales[:4]

    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(caffe_img, (ws, hs))
        input = scale_img.reshape(1, *scale_img.shape)
        ouput = Pnet.predict(
            input
        )  # .transpose(0,2,1,3) should add, but seems after process is wrong then.
        out.append(ouput)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i][0][
            0][:, :,
               1]  # i = #scale, first 0 select cls score, second 0 = batchnum, alway=0. 1 one hot repr
        roi = out[i][1][0]
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        # print('calculating img scale #:', i)
        cls_prob = np.swapaxes(cls_prob, 0, 1)
        roi = np.swapaxes(roi, 0, 2)
        rectangle = tools.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            threshold[0])
        rectangles.extend(rectangle)
    rectangles = tools.NMS(rectangles, 0.7, 'iou')

    t1 = time.time()
    print('time for 12 net is: ', t1 - t0)

    if len(rectangles) == 0:
        return rectangles

    crop_number = 0
    out = []
    predict_24_batch = []
    for rectangle in rectangles:
        crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]),
                             int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (24, 24))
        predict_24_batch.append(scale_img)
        crop_number += 1

    predict_24_batch = np.array(predict_24_batch)

    out = Rnet.predict(predict_24_batch)

    cls_prob = out[
        0]  # first 0 is to select cls, second batch number, always =0
    cls_prob = np.array(cls_prob)  # convert to numpy
    roi_prob = out[
        1]  # first 0 is to select roi, second batch number, always =0
    roi_prob = np.array(roi_prob)
    rectangles = tools.filter_face_24net(cls_prob, roi_prob, rectangles,
                                         origin_w, origin_h, threshold[1])
    t2 = time.time()
    print('time for 24 net is: ', t2 - t1)

    if len(rectangles) == 0:
        return rectangles

    crop_number = 0
    predict_batch = []
    for rectangle in rectangles:
        # print('calculating net 48 crop_number:', crop_number)
        crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]),
                             int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (48, 48))
        predict_batch.append(scale_img)
        crop_number += 1

    predict_batch = np.array(predict_batch)

    output = Onet.predict(predict_batch)
    cls_prob = output[0]
    roi_prob = output[1]
    pts_prob = output[2]  # index
    # rectangles = tools.filter_face_48net_newdef(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h,
    #                                             threshold[2])
    rectangles = tools.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                         rectangles, origin_w, origin_h,
                                         threshold[2])
    t3 = time.time()
    print('time for 48 net is: ', t3 - t2)

    return rectangles
Beispiel #11
0
def detectFace(img, threshold):
    caffe_img = (img.copy() - 127.5) / 127.5
    origin_h, origin_w, ch = caffe_img.shape
    scales = calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(caffe_img, (ws, hs))
        input = scale_img.reshape(1, *scale_img.shape)
        ouput = Pnet.predict(input)
        out.append(ouput)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i][0][0][:, :, 1]
        roi = out[i][1][0]
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        cls_prob = np.swapaxes(cls_prob, 0, 1)
        roi = np.swapaxes(roi, 0, 2)
        rectangle = detect_face_12net(cls_prob, roi, out_side, 1 / scales[i],
                                      origin_w, origin_h, threshold[0])
        rectangles.extend(rectangle)
    rectangles = NMS(rectangles, 0.7, 'iou')

    if len(rectangles) == 0:
        return rectangles

    crop_number = 0
    out = []
    predict_24_batch = []
    for rectangle in rectangles:
        crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]),
                             int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (24, 24))
        predict_24_batch.append(scale_img)
        crop_number += 1

    predict_24_batch = np.array(predict_24_batch)
    out = Rnet.predict(predict_24_batch)

    cls_prob = out[
        0]  # first 0 is to select cls, second batch number, always =0
    cls_prob = np.array(cls_prob)  # convert to numpy
    roi_prob = out[
        1]  # first 0 is to select roi, second batch number, always =0
    roi_prob = np.array(roi_prob)
    rectangles = filter_face_24net(cls_prob, roi_prob, rectangles, origin_w,
                                   origin_h, threshold[1])

    if len(rectangles) == 0:
        return rectangles

    crop_number = 0
    predict_batch = []
    for rectangle in rectangles:
        crop_img = caffe_img[int(rectangle[1]):int(rectangle[3]),
                             int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (48, 48))
        predict_batch.append(scale_img)
        crop_number += 1

    predict_batch = np.array(predict_batch)
    output = Onet.predict(predict_batch)
    cls_prob = output[0]
    roi_prob = output[1]
    pts_prob = output[2]
    rectangles = filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles,
                                   origin_w, origin_h, threshold[2])
    return rectangles
def main(args):

    detect_totalTime = 0.0
    frameCount = 0

    # Does there need store result images or not
    # If yes, check the directory which store result is existed or not
    # If the directory is existed, delete the directory recursively then recreate the directory.
    if args.save_image:
        output_directory = args.save_image
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        os.mkdir(output_directory)
        fw = open(
            os.path.join(output_directory,
                         args.save_bbox_coordinates + '_dets.txt'), 'w')

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)

                # The if else statement is to check which type of model user used.
                # if the if condition is true, which means user use separate P-Net, R-Net and O-Net models.
                # In anaconda bash to type the command line which is "python test_camera.py --model_dir model/separate".
                # And there are three folders which are P-Net, R-Net and O-Net in the named separate directory.
                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                for filename in os.listdir(args.image_path):

                    img = cv2.imread(os.path.join(args.image_path, filename))
                    height, width, _ = img.shape
                    orginal_img = img.copy()
                    width = int(width * args.resize)
                    height = int(height * args.resize)
                    resized_image = cv2.resize(img, (width, height))

                    start_time = time.time() * 1000

                    # P-Net + R-Net + O-Net
                    if args.net == "ALL":
                        rectangles, points = detect_face(
                            resized_image, args.minsize, pnet_fun, rnet_fun,
                            onet_fun, args.threshold, args.factor)

                    # P-Net + R-Net without faces' landmarks
                    elif args.net == "PR":
                        rectangles = detect_face_24net(resized_image,
                                                       args.minsize, pnet_fun,
                                                       rnet_fun,
                                                       args.threshold,
                                                       args.factor)

                    # Only P-Net
                    elif args.net == "P":
                        rectangles = detect_face_12net(resized_image,
                                                       args.minsize, pnet_fun,
                                                       args.threshold,
                                                       args.factor)

                    else:
                        print("ERROR: WRONG NET INPUT")

                    end_time = time.time() * 1000
                    detect_totalTime = detect_totalTime + (end_time -
                                                           start_time)

                    print(
                        str(frameCount) + " time : " +
                        str(end_time - start_time) + "ms")

                    if args.net == "ALL":
                        points = np.transpose(
                            points
                        )  # The outputs of O-Net which are faces' landmarks
                    else:
                        points = None  # the others

                    add_overlays(img, rectangles, points,
                                 1000 / (end_time - start_time),
                                 1 / args.resize, 1 / args.resize)
                    cv2.imshow("MTCNN-Tenssorflow wangbm", img)
                    frameCount = frameCount + 1

                    if args.save_image:
                        outputFilePath = os.path.join(output_directory,
                                                      filename)
                        cv2.imwrite(outputFilePath, img)
                        for rectangle in rectangles:
                            fw.write(
                                '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(filename[:-4], rectangle[4],
                                       rectangle[0], rectangle[1],
                                       rectangle[2], rectangle[3]))

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        cv2.destroyAllWindows()
                        break
                if args.save_image:
                    fw.close()
    detect_average_time = detect_totalTime / frameCount
    print("*" * 50)
    print("detection average time: " + str(detect_average_time) + "ms")
    print("detection fps: " + str(1000 / detect_average_time))
def main(args):

    detect_totalTime = 0.0
    totalTime = 0.0
    frameCount = 0

    if args.save_image:
        output_directory = args.save_path
        print(args.save_image)
        if os.path.exists(output_directory):
            shutil.rmtree(output_directory)
        else:
            os.mkdir(output_directory)

    with tf.device('/cpu:0'):
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            with tf.Session(config=config) as sess:

                file_paths = get_model_filenames(args.model_dir)

                if len(file_paths) == 3:
                    image_pnet = tf.placeholder(tf.float32,
                                                [None, None, None, 3])
                    pnet = PNet({'data': image_pnet}, mode='test')
                    out_tensor_pnet = pnet.get_all_output()

                    image_rnet = tf.placeholder(tf.float32, [None, 24, 24, 3])
                    rnet = RNet({'data': image_rnet}, mode='test')
                    out_tensor_rnet = rnet.get_all_output()

                    image_onet = tf.placeholder(tf.float32, [None, 48, 48, 3])
                    onet = ONet({'data': image_onet}, mode='test')
                    out_tensor_onet = onet.get_all_output()

                    saver_pnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "pnet/"
                    ])
                    saver_rnet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "rnet/"
                    ])
                    saver_onet = tf.train.Saver([
                        v for v in tf.global_variables()
                        if v.name[0:5] == "onet/"
                    ])

                    saver_pnet.restore(sess, file_paths[0])

                    def pnet_fun(img):
                        return sess.run(out_tensor_pnet,
                                        feed_dict={image_pnet: img})

                    saver_rnet.restore(sess, file_paths[1])

                    def rnet_fun(img):
                        return sess.run(out_tensor_rnet,
                                        feed_dict={image_rnet: img})

                    saver_onet.restore(sess, file_paths[2])

                    def onet_fun(img):
                        return sess.run(out_tensor_onet,
                                        feed_dict={image_onet: img})

                else:
                    saver = tf.train.import_meta_graph(file_paths[0])
                    saver.restore(sess, file_paths[1])

                    def pnet_fun(img):
                        return sess.run(
                            ('softmax/Reshape_1:0', 'pnet/conv4-2/BiasAdd:0'),
                            feed_dict={'Placeholder:0': img})

                    def rnet_fun(img):
                        return sess.run(('softmax_1/softmax:0',
                                         'rnet/conv5-2/rnet/conv5-2:0'),
                                        feed_dict={'Placeholder_1:0': img})

                    def onet_fun(img):
                        return sess.run(('softmax_2/softmax:0',
                                         'onet/conv6-2/onet/conv6-2:0',
                                         'onet/conv6-3/onet/conv6-3:0'),
                                        feed_dict={'Placeholder_2:0': img})

                # for filename in os.listdir(args.image_path):

                video_capture = cv2.VideoCapture(0)

                if video_capture.isOpened() == False:
                    print("ERROR: NO VIDEO STREAM OR NO CAMERA DEVICE.")

                else:

                    video_capture.set(3, 1280)
                    video_capture.set(4, 720)

                    while True:

                        ret, frame = video_capture.read()

                        if ret:

                            resized_image = cv2.resize(frame, (640, 360))
                            # resized_image = cv2.resize(frame, (640, 480))

                            start_time = time.time() * 1000

                            # P-Net + R-Net + O-Net
                            if args.net == "ALL":
                                rectangles, points = detect_face(
                                    resized_image, args.minsize, pnet_fun,
                                    rnet_fun, onet_fun, args.threshold,
                                    args.factor)

                            # P-Net + R-Net without faces' landmarks
                            elif args.net == "PR":
                                rectangles = detect_face_24net(
                                    resized_image, args.minsize, pnet_fun,
                                    rnet_fun, args.threshold, args.factor)

                            # Only P-Net
                            elif args.net == "P":
                                rectangles = detect_face_12net(
                                    resized_image, args.minsize, pnet_fun,
                                    args.threshold, args.factor)

                            else:
                                print("ERROR: WRONG NET INPUT")
                            end_time = time.time() * 1000
                            detect_totalTime = detect_totalTime + (end_time -
                                                                   start_time)

                            print(
                                str(frameCount) + " time : " +
                                str(end_time - start_time) + "ms")

                            # print(type(rectangles))
                            if args.net == "ALL":
                                points = np.transpose(
                                    points
                                )  # The outputs of O-Net which are faces' landmarks
                            for rectangle in rectangles:
                                cv2.putText(
                                    resized_image, str(rectangle[4]),
                                    (int(rectangle[0]), int(rectangle[1])),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
                                cv2.rectangle(
                                    resized_image,
                                    (int(rectangle[0]), int(rectangle[1])),
                                    (int(rectangle[2]), int(rectangle[3])),
                                    (255, 0, 0), 1)

                            if args.net == "ALL":
                                for point in points:
                                    for i in range(0, 10, 2):
                                        cv2.circle(
                                            resized_image,
                                            (int(point[i]), int(point[i + 1])),
                                            2, (0, 255, 0))
                            cv2.imshow("MTCNN-Tensorflow wangbm",
                                       resized_image)

                            if args.save_image:
                                outputFilePath = os.path.join(
                                    output_directory,
                                    str(frameCount) + ".jpg")
                                cv2.imwrite(outputFilePath, resized_image)
                            if cv2.waitKey(1) & 0xFF == ord('q'):
                                cv2.destroyAllWindows()
                                break

                            frameCount = frameCount + 1

                    video_capture.release()

                    detect_average_time = detect_totalTime / frameCount
                    print("detection average time: " +
                          str(detect_average_time) + "ms")
                    print("detection fps: " +
                          str(1 / (detect_average_time / 1000)))