예제 #1
0
def test_net(gpuId):
    prefix = [
        os.path.join(mtnnDir, 'pnet'),
        os.path.join(mtnnDir, 'rnet'),
        os.path.join(mtnnDir, 'onet')
    ]
    epoch = [16, 16, 16]
    batch_size = [2048, 256, 16]
    ctx = mx.gpu(gpuId)
    thresh = [0.5, 0.5, 0.7]
    min_face_size = 40
    stride = 2

    args_p, auxs_p = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    PNet = FcnDetector(P_Net("test"), ctx, args_p, auxs_p)

    # load rnet model
    args_r, auxs_r = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args_r, auxs_r)

    # load onet model
    args_o, auxs_o = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args_o, auxs_o)
    return MtcnnDetector(
        detectors=[PNet, RNet, ONet],
        ctx=ctx,
        min_face_size=min_face_size,
        stride=stride,
        threshold=thresh,
        slide_window=False)
def test_net(prefix=['model/pnet', 'model/rnet', 'model/onet'], epoch=[16, 16, 16], batch_size=[2048, 256, 16], ctx=mx.cpu(0),
             thresh=[0.6, 0.6, 0.7], min_face_size=24,
             stride=2, camera_path='0'):

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    PNet = FcnDetector(P_Net("test"), ctx, args, auxs)

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)

    mtcnn_detector = MtcnnDetector(detectors=[PNet, RNet, ONet], ctx=ctx, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=False)

    try:
        capture = cv2.VideoCapture(int(camera_path))
    except ValueError as e:
        capture = cv2.VideoCapture(camera_path)

    first_loop = True
    while (capture.isOpened()):
        ret, img = capture.read()
        if img is None:
            continue

        # Initialize video writing
        if (first_loop):
            first_loop = False
            fourcc = cv2.VideoWriter_fourcc(*'H264')
            h, w = img.shape[:2]
            writer = cv2.VideoWriter('test.mkv', fourcc, 10, (w, h), True)

        t1 = time.time()

        boxes, boxes_c = mtcnn_detector.detect_pnet(img)
        boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
        boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

        print('shape: ', img.shape, '--', 'time: ', time.time() - t1)

        draw = img.copy()
        if boxes_c is not None:
            font = cv2.FONT_HERSHEY_SIMPLEX
            for b in boxes_c:
                cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 255), 1)
                cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)

        cv2.imshow("detection result", draw)
        writer.write(draw)

        k = cv2.waitKey(1)
        if k == 27 or k == 113:  # Esc or q key to stop
            writer.release()
            cv2.destroyAllWindows()
            break
예제 #3
0
def mtcnn_model(prefix, epoch, batch_size, ctx, thresh, min_face, stride,
                slide_window):
    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)
    return mtcnn_detector
예제 #4
0
def creat_mtcnn_detector(prefix, epoch, batch_size, test_mode, thresh,
                         min_face_size, ctx):
    detectors = [None, None, None]
    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    PNet = FcnDetector(P_Net20("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["onet", "hardrnet", "hardonet"]:
        args, auxs = load_param(prefix[1], epoch[1], convert=True, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

    # load onet model
    if test_mode == "hardonet":
        args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
        ONet = Detector(O_Net("test", False), 48, batch_size[2], ctx, args,
                        auxs)
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=4,
                                   threshold=thresh,
                                   slide_window=False)
    return mtcnn_detector
예제 #5
0
def test_net(prefix, epoch, batch_size, ctx,
             thresh=[0.6, 0.6, 0.7], min_face_size=24,
             stride=2, slide_window=False, camera_path='0'):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)

    try:
        capture = cv2.VideoCapture(int(camera_path))
    except ValueError as e:
        capture = cv2.VideoCapture(camera_path)

    while (capture.isOpened()):
        ret, img = capture.read()
        if img is None:
            continue
        # img = cv2.imread('test01.jpg')
        t1 = time.time()

        boxes, boxes_c = mtcnn_detector.detect_pnet(img)
        boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
        boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

        print('shape: ', img.shape, '--', 'time: ', time.time() - t1)

        if boxes_c is not None:
            draw = img.copy()
            font = cv2.FONT_HERSHEY_SIMPLEX
            for b in boxes_c:
                cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 255), 1)
                cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)

            cv2.imshow("detection result", draw)
        else:
            cv2.imshow("detection result", img)

        k = cv2.waitKey(1)
        if k == 27 or k == 113:  # Esc or q key to stop
            break
예제 #6
0
def test_net(root_path,
             dataset_path,
             image_set,
             prefix,
             epoch,
             batch_size,
             ctx,
             test_mode="onet",
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False,
             shuffle=False,
             vis=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["rnet", "onet"]:
        args, auxs = load_param(prefix[1], epoch[1], convert=True, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

    # load onet model
    if test_mode == "onet":
        args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
        ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    imdb = IMDB("fddb", image_set, root_path, dataset_path, 'test')
    gt_imdb = imdb.gt_imdb()

    test_data = TestLoader(gt_imdb)
    print('1')
    # 得到的detections格式为:[[第一张图的所有预测框],[第二张图片的所有预测框],[第三张图片的所有预测框],...,[最后一张图片的所有预测框]],每个预测框为[x1,y1,x2,y2,score],score为该预测框为人脸的分数
    detections = mtcnn_detector.detect_face(imdb, test_data, vis=vis)
    save_path = "/Users/qiuxiaocong/Downloads/mtcnn1"
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    save_file = os.path.join(save_path, "detections_onet_0009_givenPRnet.pkl")
    with open(save_file, 'wb') as f:
        pickle.dump(detections, f, pickle.HIGHEST_PROTOCOL)
    print('detections saved done!')
예제 #7
0
def test_net(prefix,
             epoch,
             batch_size,
             ctx,
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=4,
             slide_window=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net20("test"), 20, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net20("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[1], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    img = cv2.imread('test01.jpg')
    t1 = time.time()

    boxes, boxes_c = mtcnn_detector.detect_pnet20(img)
    boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
    boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

    print 'time: ', time.time() - t1

    if boxes_c is not None:
        draw = img.copy()
        font = cv2.FONT_HERSHEY_SIMPLEX
        for b in boxes_c:
            cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                          (0, 255, 255), 1)
            cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font, 0.4,
                        (255, 255, 255), 1)

        cv2.imshow("detection result", draw)
        cv2.waitKey(0)
예제 #8
0
def test_net(root_path,
             dataset_path,
             prefix,
             epoch,
             batch_size,
             ctx,
             test_mode="onet",
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False,
             shuffle=False,
             vis=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=False, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["rnet", "onet"]:
        args, auxs = load_param(prefix[1], epoch[0], convert=False, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

    # load onet model
    if test_mode == "onet":
        args, auxs = load_param(prefix[2], epoch[2], convert=False, ctx=ctx)
        ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    for i in range(1, 11):
        image_set = "fold-" + str(i).zfill(2)
        imdb = IMDB("fddb", image_set, root_path, dataset_path, 'test')
        gt_imdb = imdb.gt_imdb()

        test_data = TestLoader(gt_imdb)
        all_boxes = mtcnn_detector.detect_face(imdb, test_data, vis=vis)
        imdb.write_results(all_boxes)
예제 #9
0
def test_net(root_path, dataset_path, image_set, prefix, epoch,
             batch_size, ctx, test_mode="rnet",
             thresh=[0.6, 0.6, 0.7], min_face_size=24,
             stride=2, slide_window=False, shuffle=False, vis=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["rnet", "onet"]:
        args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

    # load onet model
    if test_mode == "onet":
        args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
        ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)


    imdb = IMDB("wider", image_set, root_path, dataset_path, 'test')
    gt_imdb = imdb.gt_imdb()

    test_data = TestLoader(gt_imdb)
    detections = mtcnn_detector.detect_face(imdb, test_data, vis=vis)

    if test_mode == "pnet":
        net = "rnet"
    elif test_mode == "rnet":
        net = "onet"

    save_path = "./prepare_data/%s"%net
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    save_file = os.path.join(save_path, "detections.pkl")
    with open(save_file, 'wb') as f:
        cPickle.dump(detections, f, cPickle.HIGHEST_PROTOCOL)

    save_hard_example(net)
예제 #10
0
    def onet_detector(image):
        """
        :param image: 输入为48*48大小的图像
        :return:    返回概率值
        """
        sym = O_Net('test')
        ctx = mx.cpu()
        # ctx = mx.gpu()
        # ctx = [mx.gpu(int(i)) for i in [0,1,2,3]]

        args, auxs = load_param('model/onet', 9, convert=False, ctx=ctx)
        data_size = 48  # landmark net 输入的图像尺寸为48*48
        data_shapes = {'data': (1, 3, data_size, data_size)}
        # # img_resized = cv2.resize(image, (48, 48))

        newimg = transform(image)
        args['data'] = mx.nd.array(newimg, ctx)
        executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes))
        executor.copy_params_from(args, auxs)
        executor.forward(is_train=False)  # inference
        out_list = [[] for _ in range(len(executor.outputs))]
        for o_list, o_nd in zip(out_list, executor.outputs):
            o_list.append(o_nd.asnumpy())
        out = list()
        for o in out_list:
            out.append(np.vstack(o))
        cls_pro = out[0][0][1]
        return out
예제 #11
0
파일: demo.py 프로젝트: kidkid168/mtcnn
def test_net(prefix, epoch, batch_size, ctx,
             thresh=[0.6, 0.6, 0.7], min_face_size=24,
             stride=2, slide_window=False, filename='test01.jpg'):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)

    img = cv2.imread(filename)
    t1 = time.time()

    boxes, boxes_c = mtcnn_detector.detect_pnet(img)
    boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
    boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

    print('time: ', time.time() - t1)

    if boxes_c is not None:
        draw = img.copy()
        font = cv2.FONT_HERSHEY_SIMPLEX
        for b in boxes_c:
            cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 255), 1)
            cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)

        cv2.imshow("detection result", draw)
        f = filename.split('.')
        cv2.imwrite(''.join([*f[:-1], "_annotated.", f[-1]]), draw)
        cv2.waitKey(0)
예제 #12
0
def test_net(root_path, dataset_path, prefix, epoch,
             batch_size, ctx, test_mode="onet",
             thresh=[0.6, 0.6, 0.7], min_face_size=24,
             stride=2, slide_window=False, shuffle=False, vis=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["rnet", "onet"]:
        args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

    # load onet model
    if test_mode == "onet":
        args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
        ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors, ctx=ctx, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)

    for i in range(1,11):
        image_set = "fold-" + str(i).zfill(2)
        imdb = IMDB("fddb", image_set, root_path, dataset_path, 'test')
        gt_imdb = imdb.gt_imdb()

        test_data = TestLoader(gt_imdb)
        all_boxes = mtcnn_detector.detect_face(imdb, test_data, vis=vis)
        imdb.write_results(all_boxes)
예제 #13
0
    def loadModel(self):
        self.model = face_embedding.FaceModel(self.args)
        detectors = [None, None, None]
        ctx = mx.gpu(0)
        prefix = ['mtcnnmodel/pnet', 'mtcnnmodel/rnet', 'mtcnnmodel/onet']
        epoch = [16, 16, 16]
        batch_size = [2048, 256, 16]
        thresh = [0.6, 0.6, 0.7]
        min_face_size = 24
        stride = 2
        slide_window = False
        # load pnet model
        args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
        if slide_window:
            PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
        else:
            PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
        detectors[0] = PNet

        # load rnet model
        args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
        RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
        detectors[1] = RNet

        # load onet model
        args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
        ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
        detectors[2] = ONet

        self.mtcnn_detector = MtcnnDetector(detectors=detectors,
                                            ctx=ctx,
                                            min_face_size=min_face_size,
                                            stride=stride,
                                            threshold=thresh,
                                            slide_window=slide_window)
        #print (self.model)
        self.id_dataset, self.idnums = self.get_id_data(self.args.id_dir)
예제 #14
0
def get_net(prefix, epoch, ctx):
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
    sym = get_symbol_vgg_test()
    detector = Detector(sym, ctx, args, auxs)
    return detector
예제 #15
0
파일: demo.py 프로젝트: 1132520084/mxnet
def get_net(prefix, epoch, ctx):
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
    sym = get_symbol_vgg_test()
    detector = Detector(sym, ctx, args, auxs)
    return detector
예제 #16
0
    def doingLandmark_onet(self, image, trackBox):
        """

        :param image:
        :param trackBox:
        :return:
        """
        # x1 = trackBox[0]
        # y1 = trackBox[1]
        #
        # cv2.imwrite('error.jpg', image)
        # mtcnn_result = MTCNN(image)
        # print(mtcnn_result)
        # cls_pro = mtcnn_result[0][2]  # 0 -> 5 points, 1 -> bbox, 2 ->score
        # bbox = mtcnn_result[0][1]
        # bbox[0] = bbox[0] + x1
        # bbox[1] = bbox[1] + y1
        # bbox[2] = bbox[2] + x1
        # bbox[3] = bbox[3] + y1
        # landmarks = mtcnn_result[0][0]
        # landmarks[0] = landmarks[0] + x1
        # landmarks[1] = landmarks[1] + y1
        # landmarks[2] = landmarks[2] + x1
        # landmarks[3] = landmarks[3] + y1
        # landmarks[4] = landmarks[4] + x1
        # landmarks[5] = landmarks[5] + y1
        # landmarks[6] = landmarks[6] + x1
        # landmarks[7] = landmarks[7] + y1
        # landmarks[8] = landmarks[8] + x1
        # landmarks[9] = landmarks[9] + y1

        # bbox = list(bbox)

        # return cls_pro, bbox, landmarks

        detect_length = min(image.shape[0], image.shape[1])
        ctx = mx.cpu()
        # ctx = mx.gpu()
        # ctx = [mx.gpu(int(i)) for i in [0,1,2,3]]

        sym = L_Net('test')
        args, auxs = load_param('model/lnet', 4390, convert=False, ctx=ctx)

        data_size = 48  # landmark net 输入的图像尺寸为48*48
        imshow_size = 48  # imshow_size为landmark结果展示的图片尺寸
        data_shapes = {'data': (1, 3, data_size, data_size)}
        img_resized = cv2.resize(image, (48, 48))
        result = self.onet_detector(img_resized)  # 得到该图是人脸的概率值
        cls_pro = result[0][0][1]
        reg_m = result[1][0]
        bbox_new = self.calibrate_box(trackBox, reg_m)
        newimg = transform(img_resized)
        args['data'] = mx.nd.array(newimg, ctx)
        executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes))
        executor.copy_params_from(args, auxs)
        out_list = [[] for _ in range(len(executor.outputs))]
        executor.forward(is_train=False)  # inference
        for o_list, o_nd in zip(out_list, executor.outputs):
            o_list.append(o_nd.asnumpy())
        out = list()
        for o in out_list:
            out.append(np.vstack(o))
        landmarks = out[0]

        for j in range(int(len(landmarks) / 2)):
            if landmarks[2 * j] > 1:
                landmarks[2 * j] = 1
            if landmarks[2 * j] < 0:
                landmarks[2 * j] = 0
            if landmarks[2 * j + 1] > 1:
                landmarks[2 * j + 1] = 1
            if landmarks[2 * j + 1] < 0:
                landmarks[2 * j + 1] = 0

        landmarks = landmarks * imshow_size  # landmarks输出值应该在0~1 需复原
        landmarks = np.reshape(landmarks, -1)

        fator = float(detect_length) / 48.0
        disp_landmark = []

        for j in range(int(len(landmarks) / 2)):
            display_landmark_x = int(landmarks[j] * fator + trackBox[0])
            display_landmark_y = int(landmarks[j + 5] * fator + trackBox[1])
            disp_landmark.append(display_landmark_x)
            disp_landmark.append(display_landmark_y)

        # for j in range(int(len(landmarks) / 2)):
        #     cv2.circle(frame, (int(disp_landmark[j * 2]), int(disp_landmark[j * 2 + 1])), 2, (0, 255, 0), -1)  # b g r
        # cv2.rectangle(frame, (int(trackBox[0]), int(trackBox[1])), (int(trackBox[2]), int(trackBox[3])), (0, 255, 0), 2)  #
        # cv2.imshow('frame', frame)
        # cv2.waitKey(0)

        return cls_pro, bbox_new, disp_landmark
예제 #17
0
def train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, imdb, batch_size, thread_num,
              net=12, with_cls = True, with_bbox = True, with_landmark = False, frequent=50, initialize=True, base_lr=0.01, lr_epoch = [6,14]):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    train_data = ImageLoader(imdb, net, batch_size, thread_num, True, shuffle=True, ctx=ctx)

    if not initialize:
        args, auxs = load_param(pretrained, epoch, convert=True)

    if initialize:
        print "init weights and bias:"
        data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
        arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
        init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
        args = dict()
        auxs = dict()
        print 'hello3'
		
        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue

            #print 'init', k

            args[k] = mx.nd.zeros(arg_shape_dict[k])
            init(k, args[k])
            if k.startswith('fc'):
                args[k][:] /= 10

            '''
            if k.endswith('weight'):
                if k.startswith('conv'):
                    args[k] = mx.random.normal(loc=0, scale=0.001, shape=arg_shape_dict[k])
                else:
                    args[k] = mx.random.normal(loc=0, scale=0.01, shape=arg_shape_dict[k])
            else: # bias
                args[k] = mx.nd.zeros(shape=arg_shape_dict[k])
            '''

        for k in sym.list_auxiliary_states():
            auxs[k] = mx.nd.zeros(aux_shape_dict[k])
            #print aux_shape_dict[k]
            init(k, auxs[k])

    lr_factor = 0.1
    image_num = len(imdb)
    
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * image_num / batch_size) for epoch in lr_epoch_diff]
    print 'lr', lr, 'lr_epoch', lr_epoch, 'lr_epoch_diff', lr_epoch_diff
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)

    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = mx.callback.do_checkpoint(prefix,period=10)
    eval_metrics = mx.metric.CompositeEvalMetric()
    eval_metrics.add(metric_human14.LANDMARK_MSE())
    eval_metrics.add(metric_human14.LANDMARK_L1())
    
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.00001,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0}

    mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
예제 #18
0
def test_net(prefix,
             epoch,
             batch_size,
             ctx,
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:  # 使用滑动窗口(MTCNN的P_NET不使用了滑动窗口,而是全卷积网络)
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[1], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector1(detectors=detectors,
                                    ctx=ctx,
                                    min_face_size=min_face_size,
                                    stride=stride,
                                    threshold=thresh,
                                    slide_window=slide_window)

    # img = cv2.imread('test01.jpg')  # 读取图片
    # img = cv2.imread('zhang.jpeg')  # 读取图片
    # img = cv2.imread('curry.jpg')  # 读取图片
    # img = cv2.imread('physics.jpg')  # 读取图片
    # img = cv2.imread('000007.jpg')  # 读取图片
    # img = cv2.imread('test01.jpg')  # 读取图片
    # img = cv2.imread('NBA98.jpg')
    # img = cv2.imread('download.jpg')
    # img = cv2.imread('/Users/qiuxiaocong/Downloads/WIDER_train/images/7--Cheering/7_Cheering_Cheering_7_16.jpg')
    # img = cv2.imread('/Users/qiuxiaocong/Downloads/WIDER_train/images/11--Meeting/11_Meeting_Meeting_11_Meeting_Meeting_11_77.jpg')
    # img = cv2.imread('/Users/qiuxiaocong/Downloads/3Dfacedeblurring/dataset_test/falling1/input/00136.png')
    img = cv2.imread('/Users/qiuxiaocong/Downloads/facetrack_python/error.jpg')

    boxes, boxes_c = mtcnn_detector.detect_pnet(img)
    boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
    boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)
    # print(boxes_c)  # x1 y1 x2 y2

    original_detect = []
    crop_list = []
    detect_len_list = []
    nd_array = []
    score_list = []

    if boxes_c is not None:
        draw = img.copy()
        font = cv2.FONT_HERSHEY_SIMPLEX  # Python 一种字体
        idx = 0

        for b in boxes_c:  # nms和bbr之后的结果
            # 在draw上绘制矩形框(左上角坐标+右下角坐标)
            b_new0 = np.array(b[0:4])  # 添加检测框
            original_detect.append(b_new0)
            b_new = convert_to_square(b_new0)  # 添加送入到landmark net的48*48大小的框
            crop_list.append(b_new)
            score_list.append(b[4])

            # cv2.rectangle(draw, (int(b_new[0]), int(b_new[1])), (int(b_new[2]), int(b_new[3])),
            #               (0, 255, 255), 1)
            # # 在draw上添加文字
            # cv2.putText(draw, '%.3f'%b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)
            # cv2.imshow("detection result", draw)

            img_draw = img[int(b_new[1]):int(b_new[3]),
                           int(b_new[0]):int(b_new[2])]
            detect_len = min(img_draw.shape[0], img_draw.shape[1])
            # print(img_draw.shape[0], img_draw.shape[1])
            if detect_len != 0:
                detect_len_list.append(detect_len)

                img_resized = cv2.resize(img_draw, (48, 48))
                # cv2.imshow("detection result", draw)
                # print('img_resized type is :{}'.format(type(img_resized)))
                nd_array.append(img_resized)

                # cv2.imwrite("detection_result{}.jpg".format(idx), img_resized)
                # cv2.waitKey(0)
                idx = idx + 1

    return crop_list, detect_len_list, original_detect, idx, img, nd_array
예제 #19
0
def test_landmark_net(crop_list, detect_len_list, original_detect, idx, img0,
                      img_array):
    sym = L_Net('test')
    ctx = mx.cpu()

    # cv2.imshow("progin", img0)
    # load lnet model
    args, auxs = load_param('model/lnet', 4390, convert=False,
                            ctx=ctx)  # 1990 3330 4390

    data_size = 48  # landmark net 输入的图像尺寸为48*48
    imshow_size = 48  # imshow_size为landmark结果展示的图片尺寸

    data_shapes = {'data': (1, 3, data_size, data_size)}
    disp_landmarks = []

    for idx_ in range(idx):
        # img = cv2.imread('./detection_result{}.jpg'.format(idx_))
        img = img_array[idx_]
        # img = cv2.resize(img, (data_size, data_size)) # 输入lnet的图片已经是48*48 无需resize
        # cv2.imshow("landmarks_10", img)
        # cv2.waitKey(0)
        newimg = transform(img)
        args['data'] = mx.nd.array(newimg, ctx)
        executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes))
        # mx.cpu(), x=(5,4), grad_req='null'
        executor.copy_params_from(args, auxs)
        # print(executor.outputs)

        out_list = [[] for _ in range(len(executor.outputs))]
        executor.forward(is_train=False)
        for o_list, o_nd in zip(out_list, executor.outputs):
            o_list.append(o_nd.asnumpy())
        out = list()
        for o in out_list:
            out.append(np.vstack(o))
        landmarks = out[0]

        for j in range(int(len(landmarks) / 2)):
            if landmarks[2 * j] > 1:
                landmarks[2 * j] = 1
            if landmarks[2 * j] < 0:
                landmarks[2 * j] = 0
            if landmarks[2 * j + 1] > 1:
                landmarks[2 * j + 1] = 1
            if landmarks[2 * j + 1] < 0:
                landmarks[2 * j + 1] = 0

        # print(len(landmarks))
        # print(landmarks)

        imshow_img = cv2.resize(img, (imshow_size, imshow_size))
        landmarks = landmarks * imshow_size
        # print('------------')
        # print(landmarks)
        # print('------------')
        landmarks = np.reshape(landmarks, -1)

        # for j in range(int(len(landmarks)/2)):
        #     cv2.circle(imshow_img, (int(landmarks[j]), (int(landmarks[j + 5]))), 2, (0, 0, 255),-1)
        # cv2.imshow("landmarks_10", imshow_img)
        # cv2.waitKey(0)

        fator = detect_len_list[idx_] / 48.0
        disp_landmark = []

        for j in range(int(len(landmarks) / 2)):
            display_landmark_x = int(landmarks[j] * fator + crop_list[idx_][0])
            display_landmark_y = int(landmarks[j + 5] * fator +
                                     crop_list[idx_][1])
            disp_landmark.append(display_landmark_x)
            disp_landmark.append(display_landmark_y)

        disp_landmarks.append(disp_landmark)

    for i in range(idx):
        for j in range(int(len(landmarks) / 2)):
            cv2.circle(img0, (int(
                disp_landmarks[i][j * 2]), int(disp_landmarks[i][j * 2 + 1])),
                       4, (0, 255, 0), -1)  # b g r
        cv2.rectangle(img0,
                      (int(original_detect[i][0]), int(original_detect[i][1])),
                      (int(original_detect[i][2]), int(original_detect[i][3])),
                      (0, 255, 0), 4)
        # (0, 255, 255) yellow

    cv2.imshow("landmarks_10_total", img0)
    cv2.waitKey(0)
예제 #20
0
def test_net(prefix,
             epoch,
             batch_size,
             ctx,
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False,
             camera_path='0'):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    try:
        capture = cv2.VideoCapture(int(camera_path))
    except ValueError as e:
        capture = cv2.VideoCapture(camera_path)

    while (capture.isOpened()):
        ret, img = capture.read()
        if img is None:
            continue
        # img = cv2.imread('test01.jpg')
        t1 = time.time()

        boxes, boxes_c = mtcnn_detector.detect_pnet(img)
        boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
        boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

        print('shape: ', img.shape, '--', 'time: ', time.time() - t1)

        if boxes_c is not None:
            draw = img.copy()
            font = cv2.FONT_HERSHEY_SIMPLEX
            for b in boxes_c:
                cv2.rectangle(draw, (int(b[0]), int(b[1])),
                              (int(b[2]), int(b[3])), (0, 255, 255), 1)
                cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font,
                            0.4, (255, 255, 255), 1)

            cv2.imshow("detection result", draw)
        else:
            cv2.imshow("detection result", img)

        k = cv2.waitKey(1)
        if k == 27 or k == 113:  # Esc or q key to stop
            break
예제 #21
0
def test_net(prefix,
             epoch,
             batch_size,
             ctx,
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:  # 使用滑动窗口(MTCNN的P_NET不使用了滑动窗口,而是全卷积网络)
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[1], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    # img = cv2.imread('test01.jpg')  # 读取图片
    # img = cv2.imread('000007.jpg')  # 读取图片
    # img = cv2.imread('crow.jpg')  # 读取图片
    img = cv2.imread('physics.jpg')  # 读取图片

    t1 = time.time()  # 开始计时

    boxes, boxes_c = mtcnn_detector.detect_pnet(img)
    boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
    boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

    print(boxes_c)

    print('time: ', time.time() - t1)

    if boxes_c is not None:
        draw = img.copy()
        font = cv2.FONT_HERSHEY_SIMPLEX  # Python 一种字体
        for b in boxes_c:  # nms和bbr之后的结果
            # for b in boxes:       # nms和bbr之前的结果
            # 在draw上绘制矩形框(左上角坐标+右下角坐标)
            cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                          (0, 255, 255), 1)
            # 在draw上添加文字
            cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])), font, 0.4,
                        (255, 255, 255), 1)

        cv2.imshow("detection result", draw)
        cv2.waitKey(0)
예제 #22
0
def train_net(sym,
              prefix,
              ctx,
              pretrained,
              epoch,
              begin_epoch,
              end_epoch,
              imdb,
              net=12,
              frequent=50,
              initialize=True,
              base_lr=0.01):

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)  # 记录到标准输出

    # 训练数据
    train_data = ImageLoader(imdb,
                             net,
                             config.BATCH_SIZE,
                             shuffle=True,
                             ctx=ctx)

    if not initialize:  # 如果非初始化 加载参数
        args, auxs = load_param(pretrained, epoch, convert=True)

    if initialize:
        print("init weights and bias:")
        data_shape_dict = dict(train_data.provide_data +
                               train_data.provide_label)
        arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))

        # 权重初始化 Xavier初始化器
        init = mx.init.Xavier(factor_type="in",
                              rnd_type='gaussian',
                              magnitude=2)
        args = dict()  # 模型参数以及网络权重字典
        auxs = dict()  # 模型参数以及一些附加状态的字典

        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue

            print('init', k)

            args[k] = mx.nd.zeros(arg_shape_dict[k])
            init(k, args[k])
            if k.startswith('fc'):
                args[k][:] /= 10
            '''
            if k.endswith('weight'):
                if k.startswith('conv'):
                    args[k] = mx.random.normal(loc=0, scale=0.001, shape=arg_shape_dict[k])
                else:
                    args[k] = mx.random.normal(loc=0, scale=0.01, shape=arg_shape_dict[k])
            else: # bias
                args[k] = mx.nd.zeros(shape=arg_shape_dict[k])
            '''

        for k in sym.list_auxiliary_states():
            auxs[k] = mx.nd.zeros()
            init(k, auxs[k])

    lr_factor = 0.1
    lr_epoch = config.LR_EPOCH
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(imdb) / config.BATCH_SIZE) for epoch in lr_epoch_diff
    ]
    print('lr:{},lr_epoch:{},lr_epoch_diff:{}'.format(lr, lr_epoch,
                                                      lr_epoch_diff))
    # print('lr', lr, 'lr_epoch', lr_epoch, 'lr_epoch_diff', lr_epoch_diff)

    # MXNet设置动态学习率,经过lr_iters次更新后,学习率变为lr*lr_factor
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)

    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    # 作用是每隔多少个batch显示一次结果
    batch_end_callback = mx.callback.Speedometer(train_data.batch_size,
                                                 frequent=frequent)
    # 作用是每隔period个epoch保存训练得到的模型
    epoch_end_callback = mx.callback.do_checkpoint(prefix)
    # 调用评价函数类
    eval_metrics = mx.metric.CompositeEvalMetric()
    metric1 = metric.Accuracy()
    metric2 = metric.LogLoss()
    metric3 = metric.BBOX_MSE()
    # 使用add方法添加评价函数类
    for child_metric in [metric1, metric2, metric3]:
        eval_metrics.add(child_metric)
    # 优化相关参数
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.00001,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': 5
    }
    # 创建一个可训练的模块
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx)
    # 训练模型
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=args,
            aux_params=auxs,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
예제 #23
0
from tools.load_model import load_param
from tools.image_processing import transform
from core.symbol import L106_Net112
import cv2
import numpy as np
import mxnet as mx

sym = L106_Net112('test')
pretrained='model/lnet106_112'
epoch=4070
data_size=112
imshow_size=640
ctx = mx.cpu()
args, auxs = load_param(pretrained, epoch, convert=True, ctx=ctx)
#print(args)
#print(auxs)
data_shapes = {'data': (1, 3, data_size, data_size)}
img=cv2.imread('./00_.jpg')
img=cv2.resize(img,(data_size,data_size))
print(img.shape)
newimg1 = transform(img,False)
args['data'] = mx.nd.array(newimg1, ctx)
executor = sym.simple_bind(ctx, grad_req='null', **dict(data_shapes))#mx.cpu(), x=(5,4), grad_req='null'
executor.copy_params_from(args, auxs)
out_list = [[] for _ in range(len(executor.outputs))]
executor.forward(is_train=False)
for o_list, o_nd in zip(out_list, executor.outputs):
    o_list.append(o_nd.asnumpy())
out = list()
for o in out_list:
    out.append(np.vstack(o))
예제 #24
0
def train_net(mode,
              sym,
              prefix,
              ctx,
              pretrained,
              epoch,
              begin_epoch,
              end_epoch,
              imdb,
              batch_size,
              thread_num,
              im_size,
              net=112,
              frequent=50,
              initialize=True,
              base_lr=0.01,
              lr_epoch=[6, 14]):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    train_data = ImageLoader(imdb,
                             net,
                             batch_size,
                             thread_num,
                             shuffle=True,
                             ctx=ctx)

    if not initialize:
        args, auxs = load_param(pretrained, epoch, convert=True)

    if initialize:
        print "init weights and bias:"
        data_shape_dict = dict(train_data.provide_data +
                               train_data.provide_label)
        print(data_shape_dict)
        arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
        #print(arg_shape)
        #print(aux_shape)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
        init = mx.init.Xavier(factor_type="in",
                              rnd_type='gaussian',
                              magnitude=2)
        args = dict()
        auxs = dict()
        #print 'hello3'

        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue

            #print 'init', k

            args[k] = mx.nd.zeros(arg_shape_dict[k])
            init(k, args[k])
            if k.startswith('fc'):
                args[k][:] /= 10

        for k in sym.list_auxiliary_states():
            auxs[k] = mx.nd.zeros(aux_shape_dict[k])
            #print aux_shape_dict[k]
            init(k, auxs[k])

    lr_factor = 0.1
    #lr_epoch = config.LR_EPOCH
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(imdb) / batch_size) for epoch in lr_epoch_diff]
    print 'lr', lr, 'lr_epoch', lr_epoch, 'lr_epoch_diff', lr_epoch_diff
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)

    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    batch_end_callback = mx.callback.Speedometer(train_data.batch_size,
                                                 frequent=frequent)
    epoch_end_callback = mx.callback.do_checkpoint(prefix)
    eval_metrics = mx.metric.CompositeEvalMetric()

    metric1 = metric.GenderAccuracy()
    metric2 = metric.GenderLogLoss()
    if mode == "gender_age":
        metric3 = metric.AGE_MAE()
        for child_metric in [metric1, metric2, metric3]:
            eval_metrics.add(child_metric)
    else:
        for child_metric in [metric1, metric2]:
            eval_metrics.add(child_metric)
    #eval_metrics = mx.metric.CompositeEvalMetric([metric.AccMetric(), metric.MAEMetric(), metric.CUMMetric()])
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.00001,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0
    }

    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx)
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=args,
            aux_params=auxs,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
예제 #25
0
파일: train.py 프로젝트: kidkid168/mtcnn
def train_net(sym, prefix, ctx, pretrained, epoch, begin_epoch, end_epoch, imdb,
              net=12, frequent=50, initialize=True, base_lr=0.01):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    train_data = ImageLoader(imdb, net, config.BATCH_SIZE, shuffle=True, ctx=ctx)

    if not initialize:
        args, auxs = load_param(pretrained, epoch, convert=True)

    if initialize:
        print("init weights and bias:")
        data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
        arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
        init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
        args = dict()
        auxs = dict()

        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue

            print('init', k)

            args[k] = mx.nd.zeros(arg_shape_dict[k])
            init(k, args[k])
            if k.startswith('fc'):
                args[k][:] /= 10

            '''
            if k.endswith('weight'):
                if k.startswith('conv'):
                    args[k] = mx.random.normal(loc=0, scale=0.001, shape=arg_shape_dict[k])
                else:
                    args[k] = mx.random.normal(loc=0, scale=0.01, shape=arg_shape_dict[k])
            else: # bias
                args[k] = mx.nd.zeros(shape=arg_shape_dict[k])
            '''

        for k in sym.list_auxiliary_states():
            auxs[k] = mx.nd.zeros()
            init(k, auxs[k])

    lr_factor = 0.1
    lr_epoch = config.LR_EPOCH
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(imdb) / config.BATCH_SIZE) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch', lr_epoch, 'lr_epoch_diff', lr_epoch_diff)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)

    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = mx.callback.do_checkpoint(prefix)
    eval_metrics = mx.metric.CompositeEvalMetric()
    metric1 = metric.Accuracy()
    metric2 = metric.LogLoss()
    metric3 = metric.BBOX_MSE()
    for child_metric in [metric1, metric2, metric3]:
        eval_metrics.add(child_metric)
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.00001,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0}

    mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
예제 #26
0
def test_net(prefix,
             epoch,
             batch_size,
             ctx,
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             slide_window=False):

    detectors = [None, None, None]

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    if slide_window:
        PNet = Detector(P_Net("test"), 12, batch_size[0], ctx, args, auxs)
    else:
        PNet = FcnDetector(P_Net("test"), ctx, args, auxs)
    detectors[0] = PNet

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)
    detectors[1] = RNet

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)
    detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors,
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=slide_window)

    video_capture = cv2.VideoCapture(0)
    boxes = []
    boxes_c = []
    while True:
        #img = cv2.imread('/home/zzg/Opensource/mtcnn-master/data/custom/02.jpg')
        _, img = video_capture.read()
        t1 = time.time()

        boxes, boxes_c = mtcnn_detector.detect_pnet(img)
        if boxes_c is None:
            continue
        boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
        if boxes_c is None:
            continue
        boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

        print 'time: ', time.time() - t1

        if boxes_c is not None:
            draw = img.copy()
            font = cv2.FONT_HERSHEY_SIMPLEX
            for b in boxes_c:
                cv2.rectangle(draw, (int(b[0]), int(b[1])),
                              (int(b[2]), int(b[3])), (0, 255, 255), 1)
                #cv2.putText(draw, '%.3f'%b[4], (int(b[0]), int(b[1])), font, 0.4, (255, 255, 255), 1)

            cv2.imshow("detection result", draw)
            #cv2.imwrite("o12.jpg",draw)
            cv2.waitKey(10)
예제 #27
0
def test_net(prefix=['model/pnet', 'model/rnet', 'model/onet'],
             epoch=[16, 16, 16],
             batch_size=[2048, 256, 16],
             ctx=mx.cpu(0),
             thresh=[0.6, 0.6, 0.7],
             min_face_size=24,
             stride=2,
             camera_path='0'):

    # load pnet model
    args, auxs = load_param(prefix[0], epoch[0], convert=True, ctx=ctx)
    PNet = FcnDetector(P_Net("test"), ctx, args, auxs)

    # load rnet model
    args, auxs = load_param(prefix[1], epoch[0], convert=True, ctx=ctx)
    RNet = Detector(R_Net("test"), 24, batch_size[1], ctx, args, auxs)

    # load onet model
    args, auxs = load_param(prefix[2], epoch[2], convert=True, ctx=ctx)
    ONet = Detector(O_Net("test"), 48, batch_size[2], ctx, args, auxs)

    mtcnn_detector = MtcnnDetector(detectors=[PNet, RNet, ONet],
                                   ctx=ctx,
                                   min_face_size=min_face_size,
                                   stride=stride,
                                   threshold=thresh,
                                   slide_window=False)

    try:
        capture = cv2.VideoCapture(int(camera_path))
    except ValueError as e:
        capture = cv2.VideoCapture(camera_path)

    try:
        first_loop = True
        while (capture.isOpened()):
            ret, img = capture.read()
            if img is None:
                continue

            # Initialize video writing
            if (first_loop):
                first_loop = False
                fourcc = cv2.VideoWriter_fourcc(*'H264')
                h, w = img.shape[:2]
                writer = cv2.VideoWriter('test.mkv', fourcc, 10, (w, h), True)

            t1 = time.time()

            boxes, boxes_c = mtcnn_detector.detect_pnet(img)
            boxes, boxes_c = mtcnn_detector.detect_rnet(img, boxes_c)
            boxes, boxes_c = mtcnn_detector.detect_onet(img, boxes_c)

            print('shape: ', img.shape, '--', 'time: ', time.time() - t1)

            draw = img.copy()
            if boxes_c is not None:
                font = cv2.FONT_HERSHEY_SIMPLEX
                for b in boxes_c:
                    cv2.rectangle(draw, (int(b[0]), int(b[1])),
                                  (int(b[2]), int(b[3])), (0, 255, 255), 1)
                    cv2.putText(draw, '%.3f' % b[4], (int(b[0]), int(b[1])),
                                font, 0.4, (255, 255, 255), 1)

            writer.write(draw)

    except KeyboardInterrupt as e:
        print("KeyboardInterrupt")
        writer.release()