Beispiel #1
0
def main(model_path, img_path, backend):
    fashion_model = FashionClassifier(model_path, backend, 1.0, (32, 32), (), False)

    avg_time = 0.0
    if os.path.isdir(img_path):
        images = os.listdir(img_path)
        folder_size = len(images)
        total_time = 0.0
        for img in images:
            image = cv2.imread(img_path+"/"+img)
            t1 = cv2.TickMeter()
            t1.start()
            print(img, " ", fashion_model.predict(image))
            t1.stop()
            print(t1.getTimeMilli())
            total_time = total_time + t1.getTimeMilli()
        avg_time = total_time / folder_size
    else:
        t1 = cv2.TickMeter()
        t1.start()
        image = cv2.imread(img_path)
        t1.stop()
        avg_time = t1.getTimeMilli()
        image_name = img_path.split("/")[len(img_path.split("/"))-1]
        print(image_name, " ", fashion_model.predict(image))
    print("\n AVG TIME PER IMAGE: %.2f" % avg_time + "ms")
Beispiel #2
0
def noise_estimate(im, pch_size=8):
    '''
    Implement of noise level estimation of the following paper:
    Chen G , Zhu F , Heng P A . An Efficient Statistical Method for Image Noise Level Estimation[C]// 2015 IEEE International Conference
    on Computer Vision (ICCV). IEEE Computer Society, 2015.
    Input:
        im: the noise image, H x W x 3 or H x W numpy tensor, range [0,1]
        pch_size: patch_size
    Output:
        noise_level: the estimated noise level
    '''

    if im.ndim == 3:
        im = im.transpose((2, 0, 1))
    else:
        im = np.expand_dims(im, axis=0)

    # image to patch
    pch = im2patch(im, pch_size,
                   16)  # C x pch_size x pch_size x num_pch tensor
    num_pch = pch.shape[3]
    pch = pch.reshape((-1, num_pch))  # d x num_pch matrix
    d = pch.shape[0]

    # pch = np.ones([32400,64], np.float32)
    #mu = pch.mean(axis=1, keepdims=True)  # d x 1

    tm1 = cv2.TickMeter()
    tm1.start()
    #mean,eigenvectors,eigenvalues = cv2.PCACompute2(pch.transpose(),mean=None)
    tm1.stop()

    tm = cv2.TickMeter()
    #start =cv2.getTickCount()
    #mu = pch.mean(axis=1, keepdims=True)  # d x 1
    #XX = np.ones([32400,64], np.float32)

    tm.start()
    mu = pch.mean(axis=1, keepdims=True)  # d x 1
    X = pch - mu
    sigma_X = np.matmul(X, X.transpose()) / num_pch
    sig_value, _ = np.linalg.eigh(sigma_X)
    tm.stop()

    tcv = tm.getTimeMilli()
    tcv1 = tm1.getTimeMilli()
    print('tcv {}ms, tcv1 {} ms', tcv / 100, tcv1 / 100)

    sig_value.sort()

    for ii in range(-1, -d - 1, -1):
        tau = np.mean(sig_value[:ii])
        if np.sum(sig_value[:ii] > tau) == np.sum(sig_value[:ii] < tau):
            return np.sqrt(tau)
 def __init__(self, source=0):
     self._stream = cv.VideoCapture(source)
     self._counter = cv.TickMeter()
     self._frames = 0
     self._runtime = 0
     self._fps = 0
     self._frame_time = 0
Beispiel #4
0
    def DetectQRFrmImage(self, inputfile):
        inputimg = cv.imread(inputfile, cv.IMREAD_COLOR)
        if inputimg is None:
            print('ERROR: Can not read image: {}'.format(inputfile))
            return
        print('Run {:s} on image [{:d}x{:d}]'.format(self.getQRModeString(),
                                                     inputimg.shape[1],
                                                     inputimg.shape[0]))
        qrCode = cv.QRCodeDetector()
        count = 10
        timer = cv.TickMeter()
        for _ in range(count):
            timer.start()
            points, decode_info = self.runQR(qrCode, inputimg)
            timer.stop()
        fps = count / timer.getTimeSec()
        print('FPS: {}'.format(fps))
        result = inputimg
        self.drawQRCodeResults(result, points, decode_info, fps)
        cv.imshow("QR", result)
        cv.waitKey(1)
        if self.out != '':
            outfile = self.fname + self.fext
            print("Saving Result: {}".format(outfile))
            cv.imwrite(outfile, result)

        print("Press any key to exit ...")
        cv.waitKey(0)
        print("Exit")
Beispiel #5
0
def init_model(transform):
    global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()

    # img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    return None, None
def test_one(title, a, b):
    # 创建类
    if "AverageHash" == title:
        hashFun = cv2.img_hash.AverageHash_create()
    elif "PHash" == title:
        hashFun = cv2.img_hash.PHash_create()
    elif "MarrHildrethHash" == title:
        hashFun = cv2.img_hash.MarrHildrethHash_create()
    elif "RadialVarianceHash" == title:
        hashFun = cv2.img_hash.RadialVarianceHash_create()
    elif "BlockMeanHash" == title:
        hashFun = cv2.img_hash.BlockMeanHash_create()
    elif "ColorMomentHash" == title:
        hashFun = cv2.img_hash.ColorMomentHash_create()

    tick = cv2.TickMeter()
    print("=== " + title + " ===")

    tick.reset()
    tick.start()
    # # 计算图a的哈希值
    hashA = hashFun.compute(a)
    tick.stop()
    print("compute1: " + str(tick.getTimeMilli()) + " ms")

    tick.reset()
    tick.start()
    # 计算图b的哈希值
    hashB = hashFun.compute(b)
    tick.stop()
    print("compute2: " + str(tick.getTimeMilli()) + " ms")
    # 比较两张图像哈希值的距离
    print("compare: " + str(hashFun.compare(hashA, hashB)))
Beispiel #7
0
def time_inverse():
    src = cv2.imread('lenna.bmp', cv2.IMREAD_GRAYSCALE)
    print(src.shape)
    print(src)

    if src is None :
        print('Image load failed!')
        return
    dst = np.empty(src.shape, dtype = src.dtype)

    tm = cv2.TickMeter()
    tm.start()

    for y in range(src.shape[0]):
        for x in range(src.shape[1]): 
            dst[y, x] = 255 - src[y, x]
    
    tm.stop()
    print('Image inverse implementation took %4.3f ms' % tm.getTimeMilli())
    print(dst)

    cv2.imshow('src', src)
    cv2.imshow('dst', dst) 
    cv2.waitKey()
    cv2.destroyAllWindows()   
def speed_test():

    images = get_images()

    detect_time = 0
    detect_num = 0
    timer = cv2.TickMeter()
    predictor = Predictor()

    # skip first predict because it may go longer
    predictor.predict_bounding_boxes(images[0])

    for image in images:
        image = cv2.resize(image, (640, 480))
        timer.start()
        predictor.predict_bounding_boxes(image)
        timer.stop()

        detect_time += timer.getTimeMilli()
        detect_num += 1

        timer.reset()

    average_time = detect_time / detect_num
    print("average mtcnn prediction_time {} msec".format(average_time))

    return
Beispiel #9
0
    def processQRCodeDetection(self, qrcode, frame):
        if len(frame.shape) == 2:
            result = cv.cvtColor(frame, cv.COLOR_GRAY2BGR)
        else:
            result = frame
        print('Run {:s} on video frame [{:d}x{:d}]'.format(
            self.getQRModeString(), frame.shape[1], frame.shape[0]))
        timer = cv.TickMeter()
        timer.start()
        points, decode_info = self.runQR(qrcode, frame)
        timer.stop()

        fps = 1 / timer.getTimeSec()
        self.drawQRCodeResults(result, points, decode_info, fps)
        return fps, result, points
Beispiel #10
0
	def __init__(self):
		# Load the parameters
		self.conf = config()
		# initialize dlib's face detector (HOG-based) and then create the
		# facial landmark predictor
		print("[INFO] loading facial landmark predictor...")
		self.detector = dlib.get_frontal_face_detector()
		self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path)
		
		# grab the indexes of the facial landmarks for the left and
		# right eye, respectively
		(self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
		(self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
		
		# initialize the video stream and sleep for a bit, allowing the
		# camera sensor to warm up
		self.cap = cv2.VideoCapture(0)
		if self.conf.vedio_path == 0:
			self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
		_, sample_frame = self.cap.read()
		
		# Introduce mark_detector to detect landmarks.
		self.mark_detector = MarkDetector()
		
		# Setup process and queues for multiprocessing.
		self.img_queue = Queue()
		self.box_queue = Queue()
		self.img_queue.put(sample_frame)
		self.box_process = Process(target=get_face, args=(
			self.mark_detector, self.img_queue, self.box_queue,))
		self.box_process.start()
		
		# Introduce pose estimator to solve pose. Get one frame to setup the
		# estimator according to the image size.
		self.height, self.width = sample_frame.shape[:2]
		self.pose_estimator = PoseEstimator(img_size=(self.height, self.width))
		
		# Introduce scalar stabilizers for pose.
		self.pose_stabilizers = [Stabilizer(
			state_num=2,
			measure_num=1,
			cov_process=0.1,
			cov_measure=0.1) for _ in range(6)]
		
		self.tm = cv2.TickMeter()
		# Gaze tracking
		self.gaze = GazeTracking()
Beispiel #11
0
def init():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    return cap, video_src, img_queue, box_queue, tm, mark_detector, pose_estimator
Beispiel #12
0
def dnn(frame):
        '''
        Applies googlenet onto a window
        '''
        #print(frame)
        img = cv2.resize(frame, (224,224), interpolation = cv2.INTER_CUBIC)
        if(img is None):
            return
        inputBlob = cv2.dnn.blobFromImage(img, 1.0, (224,224),(104,117,123),False)
        t = cv2.TickMeter()
        t.start()
        net.setInput(inputBlob, "data")
        prob = net.forward("prob")
        t.stop()
        classId,_ = getMaxClass(prob)
        classNames = readClassNames()
        print(classNames[classId].rstrip())
        return classNames[classId].rstrip()
def detect_video(video_file):
    cap = cv2.VideoCapture(video_file)
    assert cap.isOpened()
    
    face_detector =  pydetector.FaceDetector(model_path='./models', num_thread=1, scale=0.25)
    meter = cv2.TickMeter()
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
        meter.reset()
        meter.start()
        boxes = face_detector.detect(img_bgr=frame)
        meter.stop()
        for item in boxes:
            cv2.rectangle(frame, (item.x, item.y), (item.x + item.width, item.y + item.height), (0, 255, 255), 2)
        cv2.imshow('face-detect', frame)
        print('time={:.3}ms'.format(meter.getTimeMilli()))
        cv2.waitKeyEx(33)
 def on_current_plugin_update_needed(self):
     print("on_current_plugin_update_needed called")
     if not (self.originalMat is None):
         print("originalMat exists")
         self.processedMat = self.originalMat.copy()
     # else:
     if not self.currentPluginGui is None:
         print("starting to time process")
         print(type(self.currentPlugin))
         meter = cv.TickMeter()
         meter.start()
         self.processedMat = self.currentPluginGui.process_image(
             self.originalMat, self.processedMat)
         meter.stop()
         print("The process took ", meter.getTimeMilli(), " milliseconds")
         # cv.imshow("processed:", self.processedMat)
         # cv.imshow("original:", self.originalMat)
         # cv.waitKey(0); cv.destroyAllWindows()
     self.originalImage = QImage(self.originalMat.data,
                                 self.originalMat.shape[1],
                                 self.originalMat.shape[0],
                                 QImage.Format_RGB888)
     temp_original_pixmap = QPixmap.fromImage(
         self.originalImage.rgbSwapped())
     self.originalPixmap.setPixmap(temp_original_pixmap)
     # self.originalPixmap.fromImage(self.originalImage.rgbSwapped())
     print(type(self.originalPixmap))
     self.processedImage = QImage(self.processedMat.data,
                                  self.processedMat.shape[1],
                                  self.processedMat.shape[0],
                                  QImage.Format_RGB888)
     temp_processed_pixmap = QPixmap.fromImage(
         self.processedImage.rgbSwapped())
     self.processedPixmap.setPixmap(temp_processed_pixmap)
     print("last line completed")
     return
Beispiel #15
0
def main():
    backends = (cv.dnn.DNN_BACKEND_DEFAULT,
                cv.dnn.DNN_BACKEND_HALIDE,
                cv.dnn.DNN_BACKEND_INFERENCE_ENGINE,
                cv.dnn.DNN_BACKEND_OPENCV)
    targets = (cv.dnn.DNN_TARGET_CPU,
               cv.dnn.DNN_TARGET_OPENCL,
               cv.dnn.DNN_TARGET_OPENCL_FP16,
               cv.dnn.DNN_TARGET_MYRIAD)

    parser = argparse.ArgumentParser(description='A demo for running libfacedetection using OpenCV\'s DNN module.')
    parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                        help='Choose one of computation backends: '
                             '%d: automatically (by default), '
                             '%d: Halide language (http://halide-lang.org/), '
                             '%d: Intel\'s Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), '
                             '%d: OpenCV implementation' % backends)
    parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
                        help='Choose one of target computation devices: '
                             '%d: CPU target (by default), '
                             '%d: OpenCL, '
                             '%d: OpenCL fp16 (half-float precision), '
                             '%d: VPU' % targets)
    # Location
    parser.add_argument('--input', '-i', help='Path to the image. Omit to call default camera')
    parser.add_argument('--model', '-m', type=str, help='Path to .onnx model file.')
    # Inference parameters
    parser.add_argument('--score_threshold', default=0.6, type=float, help='Threshold for filtering out faces with conf < conf_thresh.')
    parser.add_argument('--nms_threshold', default=0.3, type=float, help='Threshold for non-max suppression.')
    parser.add_argument('--top_k', default=5000, type=int, help='Keep keep_top_k for results outputing.')
    # Result
    parser.add_argument('--vis', default=True, type=str2bool, help='Set True to visualize the result image. Invalid when using camera.')
    parser.add_argument('--save', default=False, type=str2bool, help='Set True to save as result.jpg. Invalid when using camera.')
    args = parser.parse_args()

    # Instantiate yunet
    yunet = cv.FaceDetectorYN.create(
        model=args.model,
        config='',
        input_size=(320, 320),
        score_threshold=args.score_threshold,
        nms_threshold=args.nms_threshold,
        top_k=5000,
        backend_id=args.backend,
        target_id=args.target
    )

    if args.input is not None:
        image = cv.imread(args.input)

        yunet.setInputSize((image.shape[1], image.shape[0]))
        _, faces = yunet.detect(image) # faces: None, or nx15 np.array

        vis_image = visualize(image, faces)
        if args.save:
            print('result.jpg saved.')
            cv.imwrite('result.jpg', vis_image)
        if args.vis:
            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
            cv.imshow(args.input, vis_image)
            cv.waitKey(0)
    else:
        device_id = 0
        cap = cv.VideoCapture(device_id)
        frame_w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
        frame_h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
        yunet.setInputSize([frame_w, frame_h])

        tm = cv.TickMeter()
        while cv.waitKey(1) < 0:
            has_frame, frame = cap.read()
            if not has_frame:
                print('No frames grabbed!')

            tm.start()
            _, faces = yunet.detect(frame) # # faces: None, or nx15 np.array
            tm.stop()

            frame = visualize(frame, faces, fps=tm.getFPS())
            cv.imshow('libfacedetection demo', frame)

            tm.reset()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    # Uncomment to prepare for preview, define the codec and create VideoWriter object for output video
    # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (width, height))

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()

        # Break if failure or video finished
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            # Get marks
            marks = mark_detector.detect_marks([face_img])

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw stable pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            pose_estimator.draw_axis(frame, steady_pose[0], steady_pose[1])

            # Uncomment following line to get the length of the line on the face
            # faced_line_length = pose_estimator.get_faced_line_length(frame, steady_pose[0], steady_pose[1])

            # Print status "FACING"
            # cv2.putText(frame, "FACING", (height//50, width//50), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
            print("FACING")
        else:
            # Print status "UNFACED"
            # cv2.putText(frame, "UNFACED", (height//50, width//50), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
            print("UNFACED")

        # Uncomment to write frame
        # out.write(frame)

        # Uncommet to show preview.
        # cv2.imshow("Preview", frame)

        # Wait
        if cv2.waitKey(10) == 27:
            break

    # Uncomment to release output file
    # out.release()

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #17
0
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)

    # Get the real frame resolution.
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_rate = cap.get(cv2.CAP_PROP_FPS)

    # Video output by video writer.
    if args.write_video:
        video_writer = cv2.VideoWriter('output.avi',
                                       cv2.VideoWriter_fourcc(*'avc1'),
                                       frame_rate, (frame_width, frame_height))

    # Introduce a metter to measure the FPS.
    tm_detection = cv2.TickMeter()
    tm_identification = cv2.TickMeter()

    # Loop through the video frames.
    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if not frame_got:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)
def main():
    # 图片路径
    img_path = "./image/image.png"
    # 算法名称 edsr, espcn, fsrcnn or lapsrn
    algorithm = "lapsrn"
    # 模型路径,根据算法确定
    model = "./model/LapSRN_x2.pb"
    # 放大系数
    scale = 2
    # 时间系数
    perf = []

    img = cv2.imread(img_path)

    if img is None:
        print("Couldn't load image: " + str(img_path))

    # Crop the image so the images will be aligned
    # 裁剪图像,使图像对齐
    width = img.shape[0] - (img.shape[0] % scale)
    height = img.shape[1] - (img.shape[1] % scale)
    cropped = img[0:width, 0:height]

    # Downscale the image for benchmarking
    # 缩小图像,以实现基准质量测试
    img_downscaled = cv2.resize(cropped, None, fx=1.0 / scale, fy=1.0 / scale)

    # Make dnn super resolution instance
    # 超分模型初始化
    sr = dnn_superres.DnnSuperResImpl_create()

    # Read and set the dnn model
    # 读取和设定模型
    sr.readModel(model)
    sr.setModel(algorithm, scale)

    timer = cv2.TickMeter()
    timer.start()
    # 放大图像
    img_new = sr.upsample(img_downscaled)
    timer.stop()
    # 运行时间s
    elapsed = timer.getTimeSec() / timer.getCounter()
    perf.append(elapsed)
    print(sr.getAlgorithm() + " : " + str(elapsed))

    # INTER_CUBIC - 三次样条插值放大图像
    timer.start()
    bicubic = cv2.resize(img_downscaled,
                         None,
                         fx=scale,
                         fy=scale,
                         interpolation=cv2.INTER_CUBIC)
    timer.stop()
    # 运行时间s
    elapsed = timer.getTimeSec() / timer.getCounter()
    perf.append(elapsed)
    print("Bicubic" + " : " + str(elapsed))

    # INTER_NEAREST - 最近邻插值
    timer.start()
    nearest = cv2.resize(img_downscaled,
                         None,
                         fx=scale,
                         fy=scale,
                         interpolation=cv2.INTER_NEAREST)
    timer.stop()
    # 运行时间s
    elapsed = timer.getTimeSec() / timer.getCounter()
    perf.append(elapsed)
    print("Nearest" + " : " + str(elapsed))

    # Lanczos插值放大图像
    timer.start()
    lanczos = cv2.resize(img_downscaled,
                         None,
                         fx=scale,
                         fy=scale,
                         interpolation=cv2.INTER_LANCZOS4)
    timer.stop()
    # 运行时间s
    elapsed = timer.getTimeSec() / timer.getCounter()
    perf.append(elapsed)
    print("Lanczos" + " : " + str(elapsed))

    imgs = [img_new, bicubic, nearest, lanczos]
    titles = [sr.getAlgorithm(), "Bicubic", "Nearest neighbor", "Lanczos"]
    showBenchmark(imgs, titles, perf)
def main():
    # Read and store arguments
    confThreshold = args.thr
    nmsThreshold = args.nms
    inpWidth = args.width
    inpHeight = args.height
    modelDetector = args.model
    modelRecognition = args.ocr

    # Load network
    detector = cv.dnn.readNet(modelDetector)
    recognizer = cv.dnn.readNet(modelRecognition)

    # Create a new named window
    kWinName = "EAST: An Efficient and Accurate Scene Text Detector"
    cv.namedWindow(kWinName, cv.WINDOW_NORMAL)
    outNames = []
    outNames.append("feature_fusion/Conv_7/Sigmoid")
    outNames.append("feature_fusion/concat_3")

    # Open a video file or an image file or a camera stream
    cap = cv.VideoCapture(args.input if args.input else 0)

    tickmeter = cv.TickMeter()
    while cv.waitKey(1) < 0:
        # Read frame
        hasFrame, frame = cap.read()
        if not hasFrame:
            cv.waitKey()
            break

        # Get frame height and width
        height_ = frame.shape[0]
        width_ = frame.shape[1]
        rW = width_ / float(inpWidth)
        rH = height_ / float(inpHeight)

        # Create a 4D blob from frame.
        blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)

        # Run the detection model
        detector.setInput(blob)

        tickmeter.start()
        outs = detector.forward(outNames)
        tickmeter.stop()

        # Get scores and geometry
        scores = outs[0]
        geometry = outs[1]
        [boxes, confidences] = decodeBoundingBoxes(scores, geometry, confThreshold)

        # Apply NMS
        indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
        for i in indices:
            # get 4 corners of the rotated rect
            vertices = cv.boxPoints(boxes[i[0]])
            # scale the bounding box coordinates based on the respective ratios
            for j in range(4):
                vertices[j][0] *= rW
                vertices[j][1] *= rH


            # get cropped image using perspective transform
            if modelRecognition:
                cropped = fourPointsTransform(frame, vertices)
                cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)

                # Create a 4D blob from cropped image
                blob = cv.dnn.blobFromImage(cropped, size=(100, 32), mean=127.5, scalefactor=1 / 127.5)
                recognizer.setInput(blob)

                # Run the recognition model
                tickmeter.start()
                result = recognizer.forward()
                tickmeter.stop()

                # decode the result into text
                wordRecognized = decodeText(result)
                cv.putText(frame, wordRecognized, (int(vertices[1][0]), int(vertices[1][1])), cv.FONT_HERSHEY_SIMPLEX,
                           0.5, (255, 0, 0))

            for j in range(4):
                p1 = (vertices[j][0], vertices[j][1])
                p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
                cv.line(frame, p1, p2, (0, 255, 0), 1)

        # Put efficiency information
        label = 'Inference time: %.2f ms' % (tickmeter.getTimeMilli())
        cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

        # Display the frame
        cv.imshow(kWinName, frame)
        tickmeter.reset()
Beispiel #20
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #21
0
def main(strargument):
    shutil.rmtree("./test")
    os.mkdir("./test")
    os.remove("result.txt")
    f = open("result.txt", "a")
    cap = cv2.VideoCapture(strargument)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap = cv2.VideoCapture("NTQ.mkv")
    #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi")
    #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg")
    # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview")
    success, frame = cap.read()
    startId = countIdFolder("./face_db/")
    # quit if unable to read the video file
    if not success:
        print('Failed to read video')
        sys.exit(1)
    #The color of the rectangle we draw around the face
    rectangleColor = (0, 165, 255)
    #variables holding the current frame number and the current faceid
    frameCounter = 0
    currentFaceID = 0
    #Variables holding the correlation trackers and the name per faceid
    conf = configparser.ConfigParser()
    conf.read("config/main.cfg")
    mtcnn_detector = load_mtcnn(conf)
    MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH")
    VERIFICATION_THRESHOLD = float(
        conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD"))
    FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH")
    BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH"))
    MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE"))
    MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL"))
    YAWL = int(conf.get("CUSTOM", "YAWL"))
    YAWR = int(conf.get("CUSTOM", "YAWR"))
    PITCHL = int(conf.get("CUSTOM", "PITCHL"))
    PITCHR = int(conf.get("CUSTOM", "PITCHR"))
    ROLLL = int(conf.get("CUSTOM", "ROLLL"))
    ROLLR = int(conf.get("CUSTOM", "ROLLR"))
    MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED"))
    IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH"))
    EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y"))
    EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X"))
    SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH"))
    MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN"))
    MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE"))
    LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME"))
    ROIXL = int(conf.get("CUSTOM", "ROIXL"))
    ROIXR = int(conf.get("CUSTOM", "ROIXR"))
    ROIYB = int(conf.get("CUSTOM", "ROIYB"))
    ROIYA = int(conf.get("CUSTOM", "ROIYA"))
    maxDisappeared = MAXDISAPPEARED  ## khong xuat hien toi da 100 frame
    faces_db = load_faces(FACE_DB_PATH, mtcnn_detector)
    # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector)
    time.sleep(10)
    for item in faces_db:
        print(item["name"])
    listTrackedFace = []
    mark_detector = MarkDetector()
    tm = cv2.TickMeter()
    _, sample_frame = cap.read()
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    with tf.Graph().as_default():
        with tf.Session() as sess:
            load_mobilefacenet(MODEL_PATH)
            inputs_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            try:
                start = time.time()
                while True:
                    start1 = time.time()
                    retval, frame = cap.read()

                    #Increase the framecounter
                    frameCounter += 1
                    if retval:
                        _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR]
                        cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB),
                                      (0, 0, 255), 2)
                        good_face_index = []
                        # faces_db = load_face_db.face_db
                        if (frameCounter % 1) == 0:
                            ### embed and compare name
                            for i, face_db in enumerate(faces_db):
                                if not os.path.isdir(
                                        "./face_db/" +
                                        face_db["name"].split("_")[0]):
                                    faces_db.pop(i)
                            faces, landmarks = mtcnn_detector.detect(_frame)
                            if faces.shape[0] is not 0:
                                input_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                save_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                (yaw, pitch, roll) = (0, 0, 0)
                                for i, face in enumerate(faces):
                                    if round(faces[i, 4], 6) > IS_FACE_THRESH:
                                        bbox = faces[i, 0:4]
                                        points = landmarks[i, :].reshape(
                                            (5, 2))
                                        nimg = face_preprocess.preprocess(
                                            _frame,
                                            bbox,
                                            points,
                                            image_size='112,112')
                                        save_images[i, :] = nimg
                                        nimg = nimg - 127.5
                                        nimg = nimg * 0.0078125
                                        input_images[i, :] = nimg
                                        (x1, y1, x2, y2) = bbox.astype("int")
                                        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2:
                                            continue
                                        temp = int((y2 - y1) / EXTEND_Y)
                                        y1 = y1 + temp
                                        y2 = y2 + temp
                                        temp = int((x2 - x1) / EXTEND_X)
                                        if x1 > temp:
                                            x1 = x1 - temp
                                        x2 = x2 + temp
                                        # cv2.imshow("mainframe",frame)
                                        # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2])
                                        face_img = cv2.resize(
                                            _frame[y1:y2, x1:x2], (128, 128))
                                        # cv2.imshow("ok",face_img)
                                        face_img = cv2.cvtColor(
                                            face_img, cv2.COLOR_BGR2RGB)
                                        tm.start()
                                        marks = mark_detector.detect_marks(
                                            [face_img])
                                        tm.stop()
                                        marks *= (x2 - x1)
                                        marks[:, 0] += x1
                                        marks[:, 1] += y1
                                        # mark_detector.draw_marks(
                                        #         frame, marks, color=(0, 255, 0))
                                        pose, (
                                            yaw, pitch, roll
                                        ) = pose_estimator.solve_pose_by_68_points(
                                            marks)
                                        # temp = frame
                                        # cv2.putText(temp,"yaw:  "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"roll:   "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.imshow("frame",temp)
                                        # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR):
                                        #     good_face_index.append(i)
                                        # cv2.waitKey(0)
                                # print(good_face_index)
                                feed_dict = {inputs_placeholder: input_images}
                                emb_arrays = sess.run(embeddings,
                                                      feed_dict=feed_dict)
                                emb_arrays = sklearn.preprocessing.normalize(
                                    emb_arrays)
                                names = []
                                sims = []
                                for i, embedding in enumerate(emb_arrays):
                                    # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False:
                                    #     names.append(listTrackedFace[i].name)
                                    #     continue
                                    embedding = embedding.flatten()
                                    temp_dict = {}
                                    for com_face in faces_db:
                                        ret, sim = feature_compare(
                                            embedding, com_face["feature"],
                                            0.65)
                                        temp_dict[com_face["name"]] = sim
                                    # print(temp_dict)
                                    dictResult = sorted(temp_dict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    # print(dictResult[:5])
                                    name = ""
                                    if len(dictResult) > 0 and dictResult[0][
                                            1] > VERIFICATION_THRESHOLD:
                                        name = dictResult[0][
                                            0]  #.split("_")[0]
                                        sim = dictResult[0][1]
                                        ## wite log
                                        t = time.time()
                                        f.write(name + "___" +
                                                str((t - start) // 60) + ":" +
                                                str(int(t - start) % 60) +
                                                "\n")
                                    else:
                                        name = "unknown"
                                        sim = 0
                                    names.append(name)
                                    sims.append(sim)

                                    # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:])
                                    # if len(dictResult)>0 :
                                    # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:])
                                    ################################ tracker
                                for i, embedding in enumerate(emb_arrays):
                                    embedding = embedding.flatten()
                                    ResultDict = {}
                                    for objectTrackFace in listTrackedFace:
                                        tempList = []
                                        (x1, y1, x2,
                                         y2) = objectTrackFace.latestBox
                                        for com_face in objectTrackFace.listEmbedding:
                                            ret, sim = feature_compare(
                                                embedding, com_face, 0.65)
                                            tempList.append(sim)
                                        tempList.sort(reverse=True)
                                        if len(tempList) > 0:
                                            if tempList[0] > 0.9 or (
                                                    similarIOU(
                                                        faces[i, :4].astype(
                                                            "int"),
                                                        objectTrackFace.
                                                        latestBox) and
                                                (frameCounter - objectTrackFace
                                                 .latestFrameCounter) < 3):
                                                ResultDict[objectTrackFace.
                                                           name] = tempList[0]
                                    dictResult = sorted(ResultDict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    if True:
                                        if len(
                                                ResultDict
                                        ) > 0 and dictResult[0][
                                                1] > SIMILAR_THRESH:  ## neu khop -- 0.5
                                            # for ik in range(len(dict)):
                                            #     if dict[ik][1]>SIMILAR_THRESH:

                                            nameTrackCurrent = dictResult[0][0]
                                            for index, tempFaceTrack in enumerate(
                                                    listTrackedFace):
                                                if tempFaceTrack.name == nameTrackCurrent:
                                                    if len(tempFaceTrack.
                                                           listImage
                                                           ) > MAX_LIST_LEN:
                                                        tempFaceTrack.listImage.pop(
                                                            0)
                                                        tempFaceTrack.listEmbedding.pop(
                                                            0)
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    else:
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    if names[i] != "unknown":
                                                        if RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            tempFaceTrack.name = names[
                                                                i]
                                                        # else: #################
                                                        #     names[i] = nameTrackCurrent
                                                    else:
                                                        if not RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            names[
                                                                i] = nameTrackCurrent
                                                    tempFaceTrack.countDisappeared = 0
                                                    tempFaceTrack.latestBox = faces[
                                                        i, 0:4].astype("int")
                                                    tempFaceTrack.latestFrameCounter = frameCounter
                                                    tempFaceTrack.liveTime = 0
                                                    tempFaceTrack.justAdded = True  ## but we still action with it
                                                    break

                                        else:  ## neu khong khop thi tao moi nhung chi them anh khi mat du tot
                                            if len(ResultDict) > 0:
                                                print(dictResult[0][1])
                                            if names[i] != "unknown":
                                                newTrackFace = trackedFace(
                                                    names[i])
                                            else:
                                                newTrackFace = trackedFace(
                                                    str(currentFaceID))
                                                currentFaceID = currentFaceID + 1
                                            if measureGoodFace(
                                                    MIN_FACE_SIZE,
                                                    MAX_BLACK_PIXEL,
                                                    save_images[i, :], yaw,
                                                    pitch, roll, BLUR_THRESH,
                                                    YAWL, YAWR, PITCHL, PITCHR,
                                                    ROLLL, ROLLR):
                                                newTrackFace.listImage.append(
                                                    save_images[i, :])
                                                newTrackFace.listEmbedding.append(
                                                    emb_arrays[i])
                                            newTrackFace.latestBox = faces[
                                                i, 0:4].astype("int")
                                            newTrackFace.latestFrameCounter = frameCounter
                                            # print(newTrackFace.latestBox)
                                            newTrackFace.justAdded = True
                                            listTrackedFace.append(
                                                newTrackFace)  ## add list
                                ### disappeared
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    if trackFace.justAdded == False:
                                        trackFace.countDisappeared = trackFace.countDisappeared + 1
                                        trackFace.liveTime = trackFace.liveTime + 1
                                    else:
                                        trackFace.justAdded = False
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                                for i, face in enumerate(faces):
                                    x1, y1, x2, y2 = faces[i][0], faces[i][
                                        1], faces[i][2], faces[i][3]
                                    x1 = max(int(x1), 0)
                                    y1 = max(int(y1), 0)
                                    x2 = min(int(x2), _frame.shape[1])
                                    y2 = min(int(y2), _frame.shape[0])
                                    cv2.rectangle(frame,
                                                  (x1 + ROIXL, y1 + ROIYA),
                                                  (x2 + ROIXL, y2 + ROIYA),
                                                  (0, 255, 0), 2)
                                    # if i in good_face_index:
                                    # if not RepresentsInt(names[i]):
                                    cv2.putText(frame, names[i].split("_")[0],
                                                (int(x1 / 2 + x2 / 2 + ROIXL),
                                                 int(y1 + ROIYA)),
                                                cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                (255, 255, 255), 2)
                            else:
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    trackFace.countDisappeared = trackFace.countDisappeared + 1
                                    trackFace.liveTime = trackFace.liveTime + 1
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                        continue
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                            end = time.time()
                            cv2.putText(frame,
                                        "FPS: " + str(1 // (end - start1)),
                                        (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 0, 0), 3)
                            cv2.putText(
                                frame, "Time: " + str((end - start) // 60) +
                                ":" + str(int(end - start) % 60), (200, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
                        cv2.imshow("frame", frame)
                        key = cv2.waitKey(30)
                        if key & 0xFF == ord('q'):
                            break
                        if key == 32:
                            cv2.waitKey(0)
                    else:
                        break
            except KeyboardInterrupt as e:
                pass
    gc.collect()
    cv2.destroyAllWindows()
    exit(0)
Beispiel #22
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    #video_src = "./head-pose-face-detection-female.mp4"
    cam = cv2.VideoCapture(video_src)
    if video_src == 0:
        cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    thread = threading.Thread(target=get_face,
                              args=(mark_detector, img_queue, box_queue))
    thread.daemon = True
    thread.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks(face_img)
            tm.stop()
            #print(tm.getTimeSec()/tm.count())

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               stabile_pose[0],
                                               stabile_pose[1],
                                               color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break
def process_video_capture(
        video_capture,
        save_file,
        inverter_filename='models/inverter/inverter_randforest_7000.pkl'):
    Z_DIM = 512
    CNN_INPUT_SIZE = 128
    sess = tf.InteractiveSession()

    video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    # Check success
    if not video_capture.isOpened():
        raise Exception("Could not open video device")
    _, sample_frame = video_capture.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    G, I = load_models(inverter_filename=inverter_filename)
    #print(I.coef_)
    #print(I.intercept_)
    age_net = cv2.dnn.readNetFromCaffe('models/race_age/deploy_age.prototxt',
                                       'models/race_age/age_net.caffemodel')
    gender_net = cv2.dnn.readNetFromCaffe(
        'models/race_age/deploy_gender.prototxt',
        'models/race_age/gender_net.caffemodel')

    i = 0
    black_img = np.zeros((1024, 1024, 3))
    while True:
        #I.intercept_ += np.random.normal(scale=0.1, size=I.intercept_.shape).astype(np.float32)
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = video_capture.read()
        if frame_got is False:
            break
        # Crop it if frame is larger than expected.
        #frame = frame[0:480, 300:940]
        # If frame comes from webcam, flip it so it looks like a mirror.
        #frame = cv2.flip(frame, 2)

        # Feed frame to image queue.
        facebox = mark_detector.extract_cnn_facebox(frame)

        if facebox is not None:
            feats = calculate_facial_features(frame, facebox, tm,
                                              CNN_INPUT_SIZE, mark_detector,
                                              age_net, gender_net)
            viewer_latent = I.predict(feats.reshape(1, -1))
            labels = np.zeros([viewer_latent.shape[0], 0], np.float32)
            viewer_generated = G.run(viewer_latent,
                                     labels,
                                     out_mul=127.5,
                                     out_add=127.5,
                                     out_dtype=np.uint8)
            viewer_generated = np.squeeze(viewer_generated)
            viewer_generated = np.transpose(viewer_generated, (1, 2, 0))
            viewer_generated = cv2.cvtColor(viewer_generated,
                                            cv2.COLOR_BGR2RGB)
            facebox = mark_detector.extract_cnn_facebox(viewer_generated)
            if facebox is not None:
                feats_gen = calculate_facial_features(viewer_generated,
                                                      facebox, tm,
                                                      CNN_INPUT_SIZE,
                                                      mark_detector, age_net,
                                                      gender_net)
                print("Frame: {}, mean diff in features: {} ".format(
                    i, np.mean(np.abs(feats_gen - feats))))
            # Show preview.
        else:
            print("Frame: {}, didn't generate face".format(i))
            viewer_generated = black_img

        frame = cv2.resize(frame, (1024, 1024))
        result_both_imgs = np.concatenate((viewer_generated, frame), axis=0)
        cv2.imwrite("images/output_{}.png".format(i), result_both_imgs)
        #last_image = viewer_generated
        #cv2.imshow("Preview", viewer_generated)
        if cv2.waitKey(10) == 27:
            break
        i += 1

    # Clean up the multiprocessing process.
    video_capture.release()
    frames_to_video.create_video_from_frames(dir_path='images',
                                             ext='png',
                                             output=save_file)
Beispiel #24
0
def ai_obj_det(capd_img_list):
    # Input Image Size into AI model
    IN_WIDTH = 300  # have to fix 300x300
    IN_HEIGHT = 300
    frame_width = 640
    frame_height = 480
    # Defined Label list of Studied Model (Mobilenet SSD COCO)
    CLASS_LABELS = {
        0: 'background',
        1: 'person',
        2: 'bicycle',
        3: 'car',
        4: 'motorcycle',
        5: 'airplane',
        6: 'bus',
        7: 'train',
        8: 'truck',
        9: 'boat',
        10: 'traffic light',
        11: 'fire hydrant',
        13: 'stop sign',
        14: 'parking meter',
        15: 'bench',
        16: 'bird',
        17: 'cat',
        18: 'dog',
        19: 'horse',
        20: 'sheep',
        21: 'cow',
        22: 'elephant',
        23: 'bear',
        24: 'zebra',
        25: 'giraffe',
        27: 'backpack',
        28: 'umbrella',
        31: 'handbag',
        32: 'tie',
        33: 'suitcase',
        34: 'frisbee',
        35: 'skis',
        36: 'snowboard',
        37: 'sports ball',
        38: 'kite',
        39: 'baseball bat',
        40: 'baseball glove',
        41: 'skateboard',
        42: 'surfboard',
        43: 'tennis racket',
        44: 'bottle',
        46: 'wine glass',
        47: 'cup',
        48: 'fork',
        49: 'knife',
        50: 'spoon',
        51: 'bowl',
        52: 'banana',
        53: 'apple',
        54: 'sandwich',
        55: 'orange',
        56: 'broccoli',
        57: 'carrot',
        58: 'hot dog',
        59: 'pizza',
        60: 'donut',
        61: 'cake',
        62: 'chair',
        63: 'couch',
        64: 'potted plant',
        65: 'bed',
        67: 'dining table',
        70: 'toilet',
        72: 'tv',
        73: 'laptop',
        74: 'mouse',
        75: 'remote',
        76: 'keyboard',
        77: 'cell phone',
        78: 'microwave',
        79: 'oven',
        80: 'toaster',
        81: 'sink',
        82: 'refrigerator',
        84: 'book',
        85: 'clock',
        86: 'vase',
        87: 'scissors',
        88: 'teddy bear',
        89: 'hair drier',
        90: 'toothbrush'
    }
    # Define "Argument(Set Command Option)"
    ap = argparse.ArgumentParser()
    ap.add_argument('-p', '--pbtxt', required=True, help='path to pbtxt file')
    ap.add_argument('-w',
                    '--weights',
                    required=True,
                    help='path to TensorFlow inference graph')
    ap.add_argument('-c',
                    '--confidence',
                    type=float,
                    default=0.3,
                    help='minimum probability')
    ap.add_argument('-i',
                    '--interval',
                    type=float,
                    default=0,
                    help='process interval to reduce CPU usage')
    args = vars(ap.parse_args())

    # Stop watch function from OpenCV as tm
    # kW: may not necesary
    tm = cv2.TickMeter()
    colors = {}
    # Random set on frame color for each label
    random.seed()

    for key in CLASS_LABELS.keys():
        colors[key] = (random.randrange(255), random.randrange(255),
                       random.randrange(255))
    # Loading AI model
    print('Loading AI Model...')
    net = cv2.dnn.readNet(args['weights'], args['pbtxt'])

    for j in range(len(capd_img_list)):
        image = cv2.imread(capd_img_list[j])
        # convert to blob format from modified iputimage
        blob = cv2.dnn.blobFromImage(image,
                                     size=(IN_WIDTH, IN_HEIGHT),
                                     swapRB=False,
                                     crop=False)
        # Set captured image into AImodel set as blob format
        net.setInput(blob)
        # Load image into AI
        tm.reset()
        tm.start()
        detections = net.forward(
        )  # Load Image into AI model(net = cv2.dnn.readNet)
        tm.stop()
        # Repeat detection
        for i in range(detections.shape[2]):
            # pick up "i"th detected object's correct answer ratio
            confidence = detections[0, 0, i, 2]
            # will not do nothing, if lower than correct answer ratio
            if confidence < args['confidence']:
                continue
            # Obtain Type and (x,y) of detected object
            class_id = int(detections[0, 0, i, 1])
            start_x = int(detections[0, 0, i, 3] * frame_width)
            start_y = int(detections[0, 0, i, 4] * frame_height)
            end_x = int(detections[0, 0, i, 5] * frame_width)
            end_y = int(detections[0, 0, i, 6] * frame_height)

            # Set the Name label like "peroson","cat" to detemin object name
            label = CLASS_LABELS[class_id]
            label += ': ' + str(round(confidence * 100, 2)) + '%'
            label_size, base_line = cv2.getTextSize(label,
                                                    cv2.FONT_HERSHEY_SIMPLEX,
                                                    0.5, 1)

            # Draw rectangle in the image
            if CLASS_LABELS[class_id] == 'person':  # Person or not
                #cv2.rectangle(image, (start_x, start_y), (end_x, end_y), colors[class_id], -1) # thickness : -1 patinted
                cv2.rectangle(image, (start_x, start_y), (end_x, end_y),
                              colors[class_id], 2)  # thickness : -1 patinted
            else:
                cv2.rectangle(image, (start_x, start_y), (end_x, end_y),
                              colors[class_id], 2)  # thickness : 2

            # Debugg
            print("Label: %s," % label, "Confidence: %f," % confidence)

            # Draw Rectangle
            cv2.rectangle(image, (start_x, start_y - label_size[1]),
                          (start_x + label_size[0], start_y + base_line),
                          (255, 255, 255), cv2.FILLED)
            # Draw Text
            cv2.putText(image, label, (start_x, start_y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

            # Measure Time and show on Text
            ai_time = tm.getTimeMilli()
            # Show AI Processing Time
            #cv2.putText(image, '{:.2f}(ms)'.format(ai_time), (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), thickness=2)
            '''
                        # Debug show image
                        cv2.imshow(capd_img_list[j],image)
                        cv2.waitKey(0)
                        cv2.destroyAllWindows()
                        '''

            # Save AI overlaied image
            cv2.imwrite(capd_img_list[j], image)

            # Overlay
            time.sleep(args['interval'])  # Set inteval to repeat do AI
            # Close Process
            print('Close AI Process')
            time.sleep(3)
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
Beispiel #26
0
    if not cap.isOpened():
        print('Error opening input video: {}'.format(args.video))
    else:
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Use video writer to write processed video file.
    if args.output:
        video_writer = cv2.VideoWriter(
            args.output, cv2.VideoWriter_fourcc('a', 'v', 'c', '1'),
            cap.get(cv2.CAP_PROP_FPS),
            (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
             int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    # Use meter to calculate FPS.
    meter = cv2.TickMeter()

    # Capture frame-by-frame
    while cap.isOpened():
        # Start the timmer.
        meter.start()

        # Read a frame.
        ret, frame = cap.read()
        if not ret:
            break

        # Prepare the input image.
        frame_raw = detector.preprocess(frame)

        # Run the model
Beispiel #27
0
# tm.reset() : 시간 측정 초기화

# tm.getTimeSec() : 측정 시간을 초 단위로 반환
# tm.getTimeMilli() : 측정 시간을 밀리 초 단위로 반환
# tm.getTimeMicro() : 측정 시간을 마이크로 초 단위로 반환


import sys
import time
import numpy as np
import cv2

img = cv2.imread('ch02/hongkong.jpg')

if img is None:
    print('Image load failed!')
    sys.exit()

tm = cv2.TickMeter()

tm.reset()
tm.start()
t1 = time.time()

edge = cv2.Canny(img, 50, 150)  # tm.start()와 tm.stop() 안에 넣어줘야 한다.

tm.stop()
ms = tm.getTimeMilli()

print('time : ', (time.time() - t1) * 1000)
print('Elapsed time : {}ms'.format(ms))
Beispiel #28
0
            print('Resutls saved to result.jpg\n')
            cv.imwrite('result.jpg', result)

        # Visualize results in a new window
        if args.vis:
            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
            cv.imshow(args.input, result)
            cv.waitKey(0)
    else:  # Omit input to call default camera
        deviceId = 0
        cap = cv.VideoCapture(deviceId)
        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
        detector.setInputSize([frameWidth, frameHeight])

        tm = cv.TickMeter()
        while cv.waitKey(1) < 0:
            hasFrame, frame = cap.read()
            if not hasFrame:
                print('No frames grabbed!')
                break

            # Inference
            tm.start()
            faces = detector.detect(frame)  # faces is a tuple
            tm.stop()

            # Draw results on the input image
            frame = visualize(frame, faces)

            cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15),
Beispiel #29
0
import cv2
import myCV
import net

face_net = net.face_detection()
landmark_net = net.face_lanmark()
face_reid_net = net.face_reid()
body_net = myCV.Net("mo_mobilenet-ssd.xml", "mo_mobilenet-ssd.bin", (300, 300))

stream = cv2.VideoCapture(0)
counter = cv2.TickMeter()
faces_data = {}

while True:
    counter.stop()
    counter.start()

    grab, frame = stream.read()
    if not grab:
        raise Exception('Image not found')

    img = frame.copy()

    # 15 = person id in mobilessd list
    bodies = myCV.detect(body_net, frame, 0.7, 15)
    for bxmin, bymin, bxmax, bymax in bodies:
        cv2.rectangle(img, (bxmin, bymin), (bxmax, bymax), (255, 255, 0), 2)

        bchip = frame[bymin:bymax, bxmin:bxmax]

        face = myCV.detect(face_net, bchip, 0.7)
Beispiel #30
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()