def main(model_path, img_path, backend): fashion_model = FashionClassifier(model_path, backend, 1.0, (32, 32), (), False) avg_time = 0.0 if os.path.isdir(img_path): images = os.listdir(img_path) folder_size = len(images) total_time = 0.0 for img in images: image = cv2.imread(img_path+"/"+img) t1 = cv2.TickMeter() t1.start() print(img, " ", fashion_model.predict(image)) t1.stop() print(t1.getTimeMilli()) total_time = total_time + t1.getTimeMilli() avg_time = total_time / folder_size else: t1 = cv2.TickMeter() t1.start() image = cv2.imread(img_path) t1.stop() avg_time = t1.getTimeMilli() image_name = img_path.split("/")[len(img_path.split("/"))-1] print(image_name, " ", fashion_model.predict(image)) print("\n AVG TIME PER IMAGE: %.2f" % avg_time + "ms")
def noise_estimate(im, pch_size=8): ''' Implement of noise level estimation of the following paper: Chen G , Zhu F , Heng P A . An Efficient Statistical Method for Image Noise Level Estimation[C]// 2015 IEEE International Conference on Computer Vision (ICCV). IEEE Computer Society, 2015. Input: im: the noise image, H x W x 3 or H x W numpy tensor, range [0,1] pch_size: patch_size Output: noise_level: the estimated noise level ''' if im.ndim == 3: im = im.transpose((2, 0, 1)) else: im = np.expand_dims(im, axis=0) # image to patch pch = im2patch(im, pch_size, 16) # C x pch_size x pch_size x num_pch tensor num_pch = pch.shape[3] pch = pch.reshape((-1, num_pch)) # d x num_pch matrix d = pch.shape[0] # pch = np.ones([32400,64], np.float32) #mu = pch.mean(axis=1, keepdims=True) # d x 1 tm1 = cv2.TickMeter() tm1.start() #mean,eigenvectors,eigenvalues = cv2.PCACompute2(pch.transpose(),mean=None) tm1.stop() tm = cv2.TickMeter() #start =cv2.getTickCount() #mu = pch.mean(axis=1, keepdims=True) # d x 1 #XX = np.ones([32400,64], np.float32) tm.start() mu = pch.mean(axis=1, keepdims=True) # d x 1 X = pch - mu sigma_X = np.matmul(X, X.transpose()) / num_pch sig_value, _ = np.linalg.eigh(sigma_X) tm.stop() tcv = tm.getTimeMilli() tcv1 = tm1.getTimeMilli() print('tcv {}ms, tcv1 {} ms', tcv / 100, tcv1 / 100) sig_value.sort() for ii in range(-1, -d - 1, -1): tau = np.mean(sig_value[:ii]) if np.sum(sig_value[:ii] > tau) == np.sum(sig_value[:ii] < tau): return np.sqrt(tau)
def __init__(self, source=0): self._stream = cv.VideoCapture(source) self._counter = cv.TickMeter() self._frames = 0 self._runtime = 0 self._fps = 0 self._frame_time = 0
def DetectQRFrmImage(self, inputfile): inputimg = cv.imread(inputfile, cv.IMREAD_COLOR) if inputimg is None: print('ERROR: Can not read image: {}'.format(inputfile)) return print('Run {:s} on image [{:d}x{:d}]'.format(self.getQRModeString(), inputimg.shape[1], inputimg.shape[0])) qrCode = cv.QRCodeDetector() count = 10 timer = cv.TickMeter() for _ in range(count): timer.start() points, decode_info = self.runQR(qrCode, inputimg) timer.stop() fps = count / timer.getTimeSec() print('FPS: {}'.format(fps)) result = inputimg self.drawQRCodeResults(result, points, decode_info, fps) cv.imshow("QR", result) cv.waitKey(1) if self.out != '': outfile = self.fname + self.fext print("Saving Result: {}".format(outfile)) cv.imwrite(outfile, result) print("Press any key to exit ...") cv.waitKey(0) print("Exit")
def init_model(transform): global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() # img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() return None, None
def test_one(title, a, b): # 创建类 if "AverageHash" == title: hashFun = cv2.img_hash.AverageHash_create() elif "PHash" == title: hashFun = cv2.img_hash.PHash_create() elif "MarrHildrethHash" == title: hashFun = cv2.img_hash.MarrHildrethHash_create() elif "RadialVarianceHash" == title: hashFun = cv2.img_hash.RadialVarianceHash_create() elif "BlockMeanHash" == title: hashFun = cv2.img_hash.BlockMeanHash_create() elif "ColorMomentHash" == title: hashFun = cv2.img_hash.ColorMomentHash_create() tick = cv2.TickMeter() print("=== " + title + " ===") tick.reset() tick.start() # # 计算图a的哈希值 hashA = hashFun.compute(a) tick.stop() print("compute1: " + str(tick.getTimeMilli()) + " ms") tick.reset() tick.start() # 计算图b的哈希值 hashB = hashFun.compute(b) tick.stop() print("compute2: " + str(tick.getTimeMilli()) + " ms") # 比较两张图像哈希值的距离 print("compare: " + str(hashFun.compare(hashA, hashB)))
def time_inverse(): src = cv2.imread('lenna.bmp', cv2.IMREAD_GRAYSCALE) print(src.shape) print(src) if src is None : print('Image load failed!') return dst = np.empty(src.shape, dtype = src.dtype) tm = cv2.TickMeter() tm.start() for y in range(src.shape[0]): for x in range(src.shape[1]): dst[y, x] = 255 - src[y, x] tm.stop() print('Image inverse implementation took %4.3f ms' % tm.getTimeMilli()) print(dst) cv2.imshow('src', src) cv2.imshow('dst', dst) cv2.waitKey() cv2.destroyAllWindows()
def speed_test(): images = get_images() detect_time = 0 detect_num = 0 timer = cv2.TickMeter() predictor = Predictor() # skip first predict because it may go longer predictor.predict_bounding_boxes(images[0]) for image in images: image = cv2.resize(image, (640, 480)) timer.start() predictor.predict_bounding_boxes(image) timer.stop() detect_time += timer.getTimeMilli() detect_num += 1 timer.reset() average_time = detect_time / detect_num print("average mtcnn prediction_time {} msec".format(average_time)) return
def processQRCodeDetection(self, qrcode, frame): if len(frame.shape) == 2: result = cv.cvtColor(frame, cv.COLOR_GRAY2BGR) else: result = frame print('Run {:s} on video frame [{:d}x{:d}]'.format( self.getQRModeString(), frame.shape[1], frame.shape[0])) timer = cv.TickMeter() timer.start() points, decode_info = self.runQR(qrcode, frame) timer.stop() fps = 1 / timer.getTimeSec() self.drawQRCodeResults(result, points, decode_info, fps) return fps, result, points
def __init__(self): # Load the parameters self.conf = config() # initialize dlib's face detector (HOG-based) and then create the # facial landmark predictor print("[INFO] loading facial landmark predictor...") self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path) # grab the indexes of the facial landmarks for the left and # right eye, respectively (self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] (self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] # initialize the video stream and sleep for a bit, allowing the # camera sensor to warm up self.cap = cv2.VideoCapture(0) if self.conf.vedio_path == 0: self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = self.cap.read() # Introduce mark_detector to detect landmarks. self.mark_detector = MarkDetector() # Setup process and queues for multiprocessing. self.img_queue = Queue() self.box_queue = Queue() self.img_queue.put(sample_frame) self.box_process = Process(target=get_face, args=( self.mark_detector, self.img_queue, self.box_queue,)) self.box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. self.height, self.width = sample_frame.shape[:2] self.pose_estimator = PoseEstimator(img_size=(self.height, self.width)) # Introduce scalar stabilizers for pose. self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] self.tm = cv2.TickMeter() # Gaze tracking self.gaze = GazeTracking()
def init(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() return cap, video_src, img_queue, box_queue, tm, mark_detector, pose_estimator
def dnn(frame): ''' Applies googlenet onto a window ''' #print(frame) img = cv2.resize(frame, (224,224), interpolation = cv2.INTER_CUBIC) if(img is None): return inputBlob = cv2.dnn.blobFromImage(img, 1.0, (224,224),(104,117,123),False) t = cv2.TickMeter() t.start() net.setInput(inputBlob, "data") prob = net.forward("prob") t.stop() classId,_ = getMaxClass(prob) classNames = readClassNames() print(classNames[classId].rstrip()) return classNames[classId].rstrip()
def detect_video(video_file): cap = cv2.VideoCapture(video_file) assert cap.isOpened() face_detector = pydetector.FaceDetector(model_path='./models', num_thread=1, scale=0.25) meter = cv2.TickMeter() while True: ret, frame = cap.read() if not ret: break # frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) meter.reset() meter.start() boxes = face_detector.detect(img_bgr=frame) meter.stop() for item in boxes: cv2.rectangle(frame, (item.x, item.y), (item.x + item.width, item.y + item.height), (0, 255, 255), 2) cv2.imshow('face-detect', frame) print('time={:.3}ms'.format(meter.getTimeMilli())) cv2.waitKeyEx(33)
def on_current_plugin_update_needed(self): print("on_current_plugin_update_needed called") if not (self.originalMat is None): print("originalMat exists") self.processedMat = self.originalMat.copy() # else: if not self.currentPluginGui is None: print("starting to time process") print(type(self.currentPlugin)) meter = cv.TickMeter() meter.start() self.processedMat = self.currentPluginGui.process_image( self.originalMat, self.processedMat) meter.stop() print("The process took ", meter.getTimeMilli(), " milliseconds") # cv.imshow("processed:", self.processedMat) # cv.imshow("original:", self.originalMat) # cv.waitKey(0); cv.destroyAllWindows() self.originalImage = QImage(self.originalMat.data, self.originalMat.shape[1], self.originalMat.shape[0], QImage.Format_RGB888) temp_original_pixmap = QPixmap.fromImage( self.originalImage.rgbSwapped()) self.originalPixmap.setPixmap(temp_original_pixmap) # self.originalPixmap.fromImage(self.originalImage.rgbSwapped()) print(type(self.originalPixmap)) self.processedImage = QImage(self.processedMat.data, self.processedMat.shape[1], self.processedMat.shape[0], QImage.Format_RGB888) temp_processed_pixmap = QPixmap.fromImage( self.processedImage.rgbSwapped()) self.processedPixmap.setPixmap(temp_processed_pixmap) print("last line completed") return
def main(): backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) parser = argparse.ArgumentParser(description='A demo for running libfacedetection using OpenCV\'s DNN module.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help='Choose one of computation backends: ' '%d: automatically (by default), ' '%d: Halide language (http://halide-lang.org/), ' '%d: Intel\'s Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), ' '%d: OpenCV implementation' % backends) parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, help='Choose one of target computation devices: ' '%d: CPU target (by default), ' '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) # Location parser.add_argument('--input', '-i', help='Path to the image. Omit to call default camera') parser.add_argument('--model', '-m', type=str, help='Path to .onnx model file.') # Inference parameters parser.add_argument('--score_threshold', default=0.6, type=float, help='Threshold for filtering out faces with conf < conf_thresh.') parser.add_argument('--nms_threshold', default=0.3, type=float, help='Threshold for non-max suppression.') parser.add_argument('--top_k', default=5000, type=int, help='Keep keep_top_k for results outputing.') # Result parser.add_argument('--vis', default=True, type=str2bool, help='Set True to visualize the result image. Invalid when using camera.') parser.add_argument('--save', default=False, type=str2bool, help='Set True to save as result.jpg. Invalid when using camera.') args = parser.parse_args() # Instantiate yunet yunet = cv.FaceDetectorYN.create( model=args.model, config='', input_size=(320, 320), score_threshold=args.score_threshold, nms_threshold=args.nms_threshold, top_k=5000, backend_id=args.backend, target_id=args.target ) if args.input is not None: image = cv.imread(args.input) yunet.setInputSize((image.shape[1], image.shape[0])) _, faces = yunet.detect(image) # faces: None, or nx15 np.array vis_image = visualize(image, faces) if args.save: print('result.jpg saved.') cv.imwrite('result.jpg', vis_image) if args.vis: cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) cv.imshow(args.input, vis_image) cv.waitKey(0) else: device_id = 0 cap = cv.VideoCapture(device_id) frame_w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) frame_h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) yunet.setInputSize([frame_w, frame_h]) tm = cv.TickMeter() while cv.waitKey(1) < 0: has_frame, frame = cap.read() if not has_frame: print('No frames grabbed!') tm.start() _, faces = yunet.detect(frame) # # faces: None, or nx15 np.array tm.stop() frame = visualize(frame, faces, fps=tm.getFPS()) cv.imshow('libfacedetection demo', frame) tm.reset()
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() # Uncomment to prepare for preview, define the codec and create VideoWriter object for output video # fourcc = cv2.VideoWriter_fourcc(*'mp4v') # out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (width, height)) while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() # Break if failure or video finished if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) # Get marks marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw stable pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. pose_estimator.draw_axis(frame, steady_pose[0], steady_pose[1]) # Uncomment following line to get the length of the line on the face # faced_line_length = pose_estimator.get_faced_line_length(frame, steady_pose[0], steady_pose[1]) # Print status "FACING" # cv2.putText(frame, "FACING", (height//50, width//50), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print("FACING") else: # Print status "UNFACED" # cv2.putText(frame, "UNFACED", (height//50, width//50), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print("UNFACED") # Uncomment to write frame # out.write(frame) # Uncommet to show preview. # cv2.imshow("Preview", frame) # Wait if cv2.waitKey(10) == 27: break # Uncomment to release output file # out.release() # Clean up the multiprocessing process. box_process.terminate() box_process.join()
if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # Get the real frame resolution. frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_rate = cap.get(cv2.CAP_PROP_FPS) # Video output by video writer. if args.write_video: video_writer = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'avc1'), frame_rate, (frame_width, frame_height)) # Introduce a metter to measure the FPS. tm_detection = cv2.TickMeter() tm_identification = cv2.TickMeter() # Loop through the video frames. while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if not frame_got: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2)
def main(): # 图片路径 img_path = "./image/image.png" # 算法名称 edsr, espcn, fsrcnn or lapsrn algorithm = "lapsrn" # 模型路径,根据算法确定 model = "./model/LapSRN_x2.pb" # 放大系数 scale = 2 # 时间系数 perf = [] img = cv2.imread(img_path) if img is None: print("Couldn't load image: " + str(img_path)) # Crop the image so the images will be aligned # 裁剪图像,使图像对齐 width = img.shape[0] - (img.shape[0] % scale) height = img.shape[1] - (img.shape[1] % scale) cropped = img[0:width, 0:height] # Downscale the image for benchmarking # 缩小图像,以实现基准质量测试 img_downscaled = cv2.resize(cropped, None, fx=1.0 / scale, fy=1.0 / scale) # Make dnn super resolution instance # 超分模型初始化 sr = dnn_superres.DnnSuperResImpl_create() # Read and set the dnn model # 读取和设定模型 sr.readModel(model) sr.setModel(algorithm, scale) timer = cv2.TickMeter() timer.start() # 放大图像 img_new = sr.upsample(img_downscaled) timer.stop() # 运行时间s elapsed = timer.getTimeSec() / timer.getCounter() perf.append(elapsed) print(sr.getAlgorithm() + " : " + str(elapsed)) # INTER_CUBIC - 三次样条插值放大图像 timer.start() bicubic = cv2.resize(img_downscaled, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) timer.stop() # 运行时间s elapsed = timer.getTimeSec() / timer.getCounter() perf.append(elapsed) print("Bicubic" + " : " + str(elapsed)) # INTER_NEAREST - 最近邻插值 timer.start() nearest = cv2.resize(img_downscaled, None, fx=scale, fy=scale, interpolation=cv2.INTER_NEAREST) timer.stop() # 运行时间s elapsed = timer.getTimeSec() / timer.getCounter() perf.append(elapsed) print("Nearest" + " : " + str(elapsed)) # Lanczos插值放大图像 timer.start() lanczos = cv2.resize(img_downscaled, None, fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) timer.stop() # 运行时间s elapsed = timer.getTimeSec() / timer.getCounter() perf.append(elapsed) print("Lanczos" + " : " + str(elapsed)) imgs = [img_new, bicubic, nearest, lanczos] titles = [sr.getAlgorithm(), "Bicubic", "Nearest neighbor", "Lanczos"] showBenchmark(imgs, titles, perf)
def main(): # Read and store arguments confThreshold = args.thr nmsThreshold = args.nms inpWidth = args.width inpHeight = args.height modelDetector = args.model modelRecognition = args.ocr # Load network detector = cv.dnn.readNet(modelDetector) recognizer = cv.dnn.readNet(modelRecognition) # Create a new named window kWinName = "EAST: An Efficient and Accurate Scene Text Detector" cv.namedWindow(kWinName, cv.WINDOW_NORMAL) outNames = [] outNames.append("feature_fusion/Conv_7/Sigmoid") outNames.append("feature_fusion/concat_3") # Open a video file or an image file or a camera stream cap = cv.VideoCapture(args.input if args.input else 0) tickmeter = cv.TickMeter() while cv.waitKey(1) < 0: # Read frame hasFrame, frame = cap.read() if not hasFrame: cv.waitKey() break # Get frame height and width height_ = frame.shape[0] width_ = frame.shape[1] rW = width_ / float(inpWidth) rH = height_ / float(inpHeight) # Create a 4D blob from frame. blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) # Run the detection model detector.setInput(blob) tickmeter.start() outs = detector.forward(outNames) tickmeter.stop() # Get scores and geometry scores = outs[0] geometry = outs[1] [boxes, confidences] = decodeBoundingBoxes(scores, geometry, confThreshold) # Apply NMS indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect vertices = cv.boxPoints(boxes[i[0]]) # scale the bounding box coordinates based on the respective ratios for j in range(4): vertices[j][0] *= rW vertices[j][1] *= rH # get cropped image using perspective transform if modelRecognition: cropped = fourPointsTransform(frame, vertices) cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY) # Create a 4D blob from cropped image blob = cv.dnn.blobFromImage(cropped, size=(100, 32), mean=127.5, scalefactor=1 / 127.5) recognizer.setInput(blob) # Run the recognition model tickmeter.start() result = recognizer.forward() tickmeter.stop() # decode the result into text wordRecognized = decodeText(result) cv.putText(frame, wordRecognized, (int(vertices[1][0]), int(vertices[1][1])), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0)) for j in range(4): p1 = (vertices[j][0], vertices[j][1]) p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) cv.line(frame, p1, p2, (0, 255, 0), 1) # Put efficiency information label = 'Inference time: %.2f ms' % (tickmeter.getTimeMilli()) cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) # Display the frame cv.imshow(kWinName, frame) tickmeter.reset()
def main(): #bagreader = BagFileReader(args.video, 640,480,848,480,30,30) bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15) # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = bagreader.get_color_frame() sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB) height, width, _ = sample_frame.shape fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps, (width, height)) # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: t1 = time.time() # Read frame, crop it, flip it, suits your needs. frame = bagreader.get_color_frame() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frame is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. # if video_src == 0: # frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. faceboxes = box_queue.get() print(faceboxes) mess = "Not detect pose" if faceboxes is not None: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), thickness=2) # Show preview. cv2.imshow("Preview", frame) out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break out.release() # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(strargument): shutil.rmtree("./test") os.mkdir("./test") os.remove("result.txt") f = open("result.txt", "a") cap = cv2.VideoCapture(strargument) # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #cap = cv2.VideoCapture("NTQ.mkv") #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi") #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg") # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview") success, frame = cap.read() startId = countIdFolder("./face_db/") # quit if unable to read the video file if not success: print('Failed to read video') sys.exit(1) #The color of the rectangle we draw around the face rectangleColor = (0, 165, 255) #variables holding the current frame number and the current faceid frameCounter = 0 currentFaceID = 0 #Variables holding the correlation trackers and the name per faceid conf = configparser.ConfigParser() conf.read("config/main.cfg") mtcnn_detector = load_mtcnn(conf) MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH") VERIFICATION_THRESHOLD = float( conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD")) FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH") BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH")) MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE")) MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL")) YAWL = int(conf.get("CUSTOM", "YAWL")) YAWR = int(conf.get("CUSTOM", "YAWR")) PITCHL = int(conf.get("CUSTOM", "PITCHL")) PITCHR = int(conf.get("CUSTOM", "PITCHR")) ROLLL = int(conf.get("CUSTOM", "ROLLL")) ROLLR = int(conf.get("CUSTOM", "ROLLR")) MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED")) IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH")) EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y")) EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X")) SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH")) MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN")) MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE")) LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME")) ROIXL = int(conf.get("CUSTOM", "ROIXL")) ROIXR = int(conf.get("CUSTOM", "ROIXR")) ROIYB = int(conf.get("CUSTOM", "ROIYB")) ROIYA = int(conf.get("CUSTOM", "ROIYA")) maxDisappeared = MAXDISAPPEARED ## khong xuat hien toi da 100 frame faces_db = load_faces(FACE_DB_PATH, mtcnn_detector) # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector) time.sleep(10) for item in faces_db: print(item["name"]) listTrackedFace = [] mark_detector = MarkDetector() tm = cv2.TickMeter() _, sample_frame = cap.read() height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) with tf.Graph().as_default(): with tf.Session() as sess: load_mobilefacenet(MODEL_PATH) inputs_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") try: start = time.time() while True: start1 = time.time() retval, frame = cap.read() #Increase the framecounter frameCounter += 1 if retval: _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR] cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB), (0, 0, 255), 2) good_face_index = [] # faces_db = load_face_db.face_db if (frameCounter % 1) == 0: ### embed and compare name for i, face_db in enumerate(faces_db): if not os.path.isdir( "./face_db/" + face_db["name"].split("_")[0]): faces_db.pop(i) faces, landmarks = mtcnn_detector.detect(_frame) if faces.shape[0] is not 0: input_images = np.zeros( (faces.shape[0], 112, 112, 3)) save_images = np.zeros( (faces.shape[0], 112, 112, 3)) (yaw, pitch, roll) = (0, 0, 0) for i, face in enumerate(faces): if round(faces[i, 4], 6) > IS_FACE_THRESH: bbox = faces[i, 0:4] points = landmarks[i, :].reshape( (5, 2)) nimg = face_preprocess.preprocess( _frame, bbox, points, image_size='112,112') save_images[i, :] = nimg nimg = nimg - 127.5 nimg = nimg * 0.0078125 input_images[i, :] = nimg (x1, y1, x2, y2) = bbox.astype("int") if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2: continue temp = int((y2 - y1) / EXTEND_Y) y1 = y1 + temp y2 = y2 + temp temp = int((x2 - x1) / EXTEND_X) if x1 > temp: x1 = x1 - temp x2 = x2 + temp # cv2.imshow("mainframe",frame) # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2]) face_img = cv2.resize( _frame[y1:y2, x1:x2], (128, 128)) # cv2.imshow("ok",face_img) face_img = cv2.cvtColor( face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks( [face_img]) tm.stop() marks *= (x2 - x1) marks[:, 0] += x1 marks[:, 1] += y1 # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) pose, ( yaw, pitch, roll ) = pose_estimator.solve_pose_by_68_points( marks) # temp = frame # cv2.putText(temp,"yaw: "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"roll: "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.imshow("frame",temp) # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR): # good_face_index.append(i) # cv2.waitKey(0) # print(good_face_index) feed_dict = {inputs_placeholder: input_images} emb_arrays = sess.run(embeddings, feed_dict=feed_dict) emb_arrays = sklearn.preprocessing.normalize( emb_arrays) names = [] sims = [] for i, embedding in enumerate(emb_arrays): # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False: # names.append(listTrackedFace[i].name) # continue embedding = embedding.flatten() temp_dict = {} for com_face in faces_db: ret, sim = feature_compare( embedding, com_face["feature"], 0.65) temp_dict[com_face["name"]] = sim # print(temp_dict) dictResult = sorted(temp_dict.items(), key=lambda d: d[1], reverse=True) # print(dictResult[:5]) name = "" if len(dictResult) > 0 and dictResult[0][ 1] > VERIFICATION_THRESHOLD: name = dictResult[0][ 0] #.split("_")[0] sim = dictResult[0][1] ## wite log t = time.time() f.write(name + "___" + str((t - start) // 60) + ":" + str(int(t - start) % 60) + "\n") else: name = "unknown" sim = 0 names.append(name) sims.append(sim) # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:]) # if len(dictResult)>0 : # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:]) ################################ tracker for i, embedding in enumerate(emb_arrays): embedding = embedding.flatten() ResultDict = {} for objectTrackFace in listTrackedFace: tempList = [] (x1, y1, x2, y2) = objectTrackFace.latestBox for com_face in objectTrackFace.listEmbedding: ret, sim = feature_compare( embedding, com_face, 0.65) tempList.append(sim) tempList.sort(reverse=True) if len(tempList) > 0: if tempList[0] > 0.9 or ( similarIOU( faces[i, :4].astype( "int"), objectTrackFace. latestBox) and (frameCounter - objectTrackFace .latestFrameCounter) < 3): ResultDict[objectTrackFace. name] = tempList[0] dictResult = sorted(ResultDict.items(), key=lambda d: d[1], reverse=True) if True: if len( ResultDict ) > 0 and dictResult[0][ 1] > SIMILAR_THRESH: ## neu khop -- 0.5 # for ik in range(len(dict)): # if dict[ik][1]>SIMILAR_THRESH: nameTrackCurrent = dictResult[0][0] for index, tempFaceTrack in enumerate( listTrackedFace): if tempFaceTrack.name == nameTrackCurrent: if len(tempFaceTrack. listImage ) > MAX_LIST_LEN: tempFaceTrack.listImage.pop( 0) tempFaceTrack.listEmbedding.pop( 0) if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) else: if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) if names[i] != "unknown": if RepresentsInt( nameTrackCurrent ): tempFaceTrack.name = names[ i] # else: ################# # names[i] = nameTrackCurrent else: if not RepresentsInt( nameTrackCurrent ): names[ i] = nameTrackCurrent tempFaceTrack.countDisappeared = 0 tempFaceTrack.latestBox = faces[ i, 0:4].astype("int") tempFaceTrack.latestFrameCounter = frameCounter tempFaceTrack.liveTime = 0 tempFaceTrack.justAdded = True ## but we still action with it break else: ## neu khong khop thi tao moi nhung chi them anh khi mat du tot if len(ResultDict) > 0: print(dictResult[0][1]) if names[i] != "unknown": newTrackFace = trackedFace( names[i]) else: newTrackFace = trackedFace( str(currentFaceID)) currentFaceID = currentFaceID + 1 if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): newTrackFace.listImage.append( save_images[i, :]) newTrackFace.listEmbedding.append( emb_arrays[i]) newTrackFace.latestBox = faces[ i, 0:4].astype("int") newTrackFace.latestFrameCounter = frameCounter # print(newTrackFace.latestBox) newTrackFace.justAdded = True listTrackedFace.append( newTrackFace) ## add list ### disappeared for index, trackFace in enumerate( listTrackedFace): if trackFace.justAdded == False: trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 else: trackFace.justAdded = False if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t for i, face in enumerate(faces): x1, y1, x2, y2 = faces[i][0], faces[i][ 1], faces[i][2], faces[i][3] x1 = max(int(x1), 0) y1 = max(int(y1), 0) x2 = min(int(x2), _frame.shape[1]) y2 = min(int(y2), _frame.shape[0]) cv2.rectangle(frame, (x1 + ROIXL, y1 + ROIYA), (x2 + ROIXL, y2 + ROIYA), (0, 255, 0), 2) # if i in good_face_index: # if not RepresentsInt(names[i]): cv2.putText(frame, names[i].split("_")[0], (int(x1 / 2 + x2 / 2 + ROIXL), int(y1 + ROIYA)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) else: for index, trackFace in enumerate( listTrackedFace): trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t continue if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t end = time.time() cv2.putText(frame, "FPS: " + str(1 // (end - start1)), (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.putText( frame, "Time: " + str((end - start) // 60) + ":" + str(int(end - start) % 60), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.imshow("frame", frame) key = cv2.waitKey(30) if key & 0xFF == ord('q'): break if key == 32: cv2.waitKey(0) else: break except KeyboardInterrupt as e: pass gc.collect() cv2.destroyAllWindows() exit(0)
def main(): """MAIN""" # Video source from webcam or video file. video_src = 0 #video_src = "./head-pose-face-detection-female.mp4" cam = cv2.VideoCapture(video_src) if video_src == 0: cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) thread = threading.Thread(target=get_face, args=(mark_detector, img_queue, box_queue)) thread.daemon = True thread.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() #print(tm.getTimeSec()/tm.count()) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break
def process_video_capture( video_capture, save_file, inverter_filename='models/inverter/inverter_randforest_7000.pkl'): Z_DIM = 512 CNN_INPUT_SIZE = 128 sess = tf.InteractiveSession() video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # Check success if not video_capture.isOpened(): raise Exception("Could not open video device") _, sample_frame = video_capture.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() G, I = load_models(inverter_filename=inverter_filename) #print(I.coef_) #print(I.intercept_) age_net = cv2.dnn.readNetFromCaffe('models/race_age/deploy_age.prototxt', 'models/race_age/age_net.caffemodel') gender_net = cv2.dnn.readNetFromCaffe( 'models/race_age/deploy_gender.prototxt', 'models/race_age/gender_net.caffemodel') i = 0 black_img = np.zeros((1024, 1024, 3)) while True: #I.intercept_ += np.random.normal(scale=0.1, size=I.intercept_.shape).astype(np.float32) # Read frame, crop it, flip it, suits your needs. frame_got, frame = video_capture.read() if frame_got is False: break # Crop it if frame is larger than expected. #frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. #frame = cv2.flip(frame, 2) # Feed frame to image queue. facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: feats = calculate_facial_features(frame, facebox, tm, CNN_INPUT_SIZE, mark_detector, age_net, gender_net) viewer_latent = I.predict(feats.reshape(1, -1)) labels = np.zeros([viewer_latent.shape[0], 0], np.float32) viewer_generated = G.run(viewer_latent, labels, out_mul=127.5, out_add=127.5, out_dtype=np.uint8) viewer_generated = np.squeeze(viewer_generated) viewer_generated = np.transpose(viewer_generated, (1, 2, 0)) viewer_generated = cv2.cvtColor(viewer_generated, cv2.COLOR_BGR2RGB) facebox = mark_detector.extract_cnn_facebox(viewer_generated) if facebox is not None: feats_gen = calculate_facial_features(viewer_generated, facebox, tm, CNN_INPUT_SIZE, mark_detector, age_net, gender_net) print("Frame: {}, mean diff in features: {} ".format( i, np.mean(np.abs(feats_gen - feats)))) # Show preview. else: print("Frame: {}, didn't generate face".format(i)) viewer_generated = black_img frame = cv2.resize(frame, (1024, 1024)) result_both_imgs = np.concatenate((viewer_generated, frame), axis=0) cv2.imwrite("images/output_{}.png".format(i), result_both_imgs) #last_image = viewer_generated #cv2.imshow("Preview", viewer_generated) if cv2.waitKey(10) == 27: break i += 1 # Clean up the multiprocessing process. video_capture.release() frames_to_video.create_video_from_frames(dir_path='images', ext='png', output=save_file)
def ai_obj_det(capd_img_list): # Input Image Size into AI model IN_WIDTH = 300 # have to fix 300x300 IN_HEIGHT = 300 frame_width = 640 frame_height = 480 # Defined Label list of Studied Model (Mobilenet SSD COCO) CLASS_LABELS = { 0: 'background', 1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' } # Define "Argument(Set Command Option)" ap = argparse.ArgumentParser() ap.add_argument('-p', '--pbtxt', required=True, help='path to pbtxt file') ap.add_argument('-w', '--weights', required=True, help='path to TensorFlow inference graph') ap.add_argument('-c', '--confidence', type=float, default=0.3, help='minimum probability') ap.add_argument('-i', '--interval', type=float, default=0, help='process interval to reduce CPU usage') args = vars(ap.parse_args()) # Stop watch function from OpenCV as tm # kW: may not necesary tm = cv2.TickMeter() colors = {} # Random set on frame color for each label random.seed() for key in CLASS_LABELS.keys(): colors[key] = (random.randrange(255), random.randrange(255), random.randrange(255)) # Loading AI model print('Loading AI Model...') net = cv2.dnn.readNet(args['weights'], args['pbtxt']) for j in range(len(capd_img_list)): image = cv2.imread(capd_img_list[j]) # convert to blob format from modified iputimage blob = cv2.dnn.blobFromImage(image, size=(IN_WIDTH, IN_HEIGHT), swapRB=False, crop=False) # Set captured image into AImodel set as blob format net.setInput(blob) # Load image into AI tm.reset() tm.start() detections = net.forward( ) # Load Image into AI model(net = cv2.dnn.readNet) tm.stop() # Repeat detection for i in range(detections.shape[2]): # pick up "i"th detected object's correct answer ratio confidence = detections[0, 0, i, 2] # will not do nothing, if lower than correct answer ratio if confidence < args['confidence']: continue # Obtain Type and (x,y) of detected object class_id = int(detections[0, 0, i, 1]) start_x = int(detections[0, 0, i, 3] * frame_width) start_y = int(detections[0, 0, i, 4] * frame_height) end_x = int(detections[0, 0, i, 5] * frame_width) end_y = int(detections[0, 0, i, 6] * frame_height) # Set the Name label like "peroson","cat" to detemin object name label = CLASS_LABELS[class_id] label += ': ' + str(round(confidence * 100, 2)) + '%' label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Draw rectangle in the image if CLASS_LABELS[class_id] == 'person': # Person or not #cv2.rectangle(image, (start_x, start_y), (end_x, end_y), colors[class_id], -1) # thickness : -1 patinted cv2.rectangle(image, (start_x, start_y), (end_x, end_y), colors[class_id], 2) # thickness : -1 patinted else: cv2.rectangle(image, (start_x, start_y), (end_x, end_y), colors[class_id], 2) # thickness : 2 # Debugg print("Label: %s," % label, "Confidence: %f," % confidence) # Draw Rectangle cv2.rectangle(image, (start_x, start_y - label_size[1]), (start_x + label_size[0], start_y + base_line), (255, 255, 255), cv2.FILLED) # Draw Text cv2.putText(image, label, (start_x, start_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) # Measure Time and show on Text ai_time = tm.getTimeMilli() # Show AI Processing Time #cv2.putText(image, '{:.2f}(ms)'.format(ai_time), (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), thickness=2) ''' # Debug show image cv2.imshow(capd_img_list[j],image) cv2.waitKey(0) cv2.destroyAllWindows() ''' # Save AI overlaied image cv2.imwrite(capd_img_list[j], image) # Overlay time.sleep(args['interval']) # Set inteval to repeat do AI # Close Process print('Close AI Process') time.sleep(3)
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print("Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) if args.out != None: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] tm = cv2.TickMeter() cnt = 0 input_path = args.input_path listdir = os.listdir(input_path) for v_name in listdir: v_path = os.path.join(input_path, v_name) cap = cv2.VideoCapture(v_path) while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box( frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) # Show preview. # cv2.imshow("Preview", frame) # if cv2.waitKey(10) == 27: # break if args.out != None: output_movie.write(frame) else: cv2.imshow("Preview", frame) cnt = cnt + 1 if cnt % 100 == 0: print(str(cnt), flush=True) # Clean up the multiprocessing process. box_process.terminate() box_process.join() cv2.destroyAllWindows()
if not cap.isOpened(): print('Error opening input video: {}'.format(args.video)) else: frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Use video writer to write processed video file. if args.output: video_writer = cv2.VideoWriter( args.output, cv2.VideoWriter_fourcc('a', 'v', 'c', '1'), cap.get(cv2.CAP_PROP_FPS), (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) # Use meter to calculate FPS. meter = cv2.TickMeter() # Capture frame-by-frame while cap.isOpened(): # Start the timmer. meter.start() # Read a frame. ret, frame = cap.read() if not ret: break # Prepare the input image. frame_raw = detector.preprocess(frame) # Run the model
# tm.reset() : 시간 측정 초기화 # tm.getTimeSec() : 측정 시간을 초 단위로 반환 # tm.getTimeMilli() : 측정 시간을 밀리 초 단위로 반환 # tm.getTimeMicro() : 측정 시간을 마이크로 초 단위로 반환 import sys import time import numpy as np import cv2 img = cv2.imread('ch02/hongkong.jpg') if img is None: print('Image load failed!') sys.exit() tm = cv2.TickMeter() tm.reset() tm.start() t1 = time.time() edge = cv2.Canny(img, 50, 150) # tm.start()와 tm.stop() 안에 넣어줘야 한다. tm.stop() ms = tm.getTimeMilli() print('time : ', (time.time() - t1) * 1000) print('Elapsed time : {}ms'.format(ms))
print('Resutls saved to result.jpg\n') cv.imwrite('result.jpg', result) # Visualize results in a new window if args.vis: cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) cv.imshow(args.input, result) cv.waitKey(0) else: # Omit input to call default camera deviceId = 0 cap = cv.VideoCapture(deviceId) frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) detector.setInputSize([frameWidth, frameHeight]) tm = cv.TickMeter() while cv.waitKey(1) < 0: hasFrame, frame = cap.read() if not hasFrame: print('No frames grabbed!') break # Inference tm.start() faces = detector.detect(frame) # faces is a tuple tm.stop() # Draw results on the input image frame = visualize(frame, faces) cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15),
import cv2 import myCV import net face_net = net.face_detection() landmark_net = net.face_lanmark() face_reid_net = net.face_reid() body_net = myCV.Net("mo_mobilenet-ssd.xml", "mo_mobilenet-ssd.bin", (300, 300)) stream = cv2.VideoCapture(0) counter = cv2.TickMeter() faces_data = {} while True: counter.stop() counter.start() grab, frame = stream.read() if not grab: raise Exception('Image not found') img = frame.copy() # 15 = person id in mobilessd list bodies = myCV.detect(body_net, frame, 0.7, 15) for bxmin, bymin, bxmax, bymax in bodies: cv2.rectangle(img, (bxmin, bymin), (bxmax, bymax), (255, 255, 0), 2) bchip = frame[bymin:bymax, bxmin:bxmax] face = myCV.detect(face_net, bchip, 0.7)
def main(): """MAIN""" cv2.namedWindow("Test") # Create a named window cv2.moveWindow("Test", 900, 600) # Move it to (40,30) screenWidth, screenHeight = pyautogui.size() st = 'Last command' cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Setting up process for listening to audio commands voice_command_queue = Q() stt_process = Thread(target=get_voice_command, args=(voice_command_queue, )) stt_process.setDaemon(True) stt_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(255, 128, 128)) # Uncomment following line to draw head axes on frame. endpoints = pose_estimator.getEndPoints(frame, steady_pose[0], steady_pose[1]) deltax = endpoints[1][0] - endpoints[0][0] deltay = endpoints[1][1] - endpoints[0][1] xpos = math.floor((deltax + 44) * screenWidth / 88) ypos = math.floor((deltay + 14) * screenHeight / 58) # print(xpos, ypos) pyautogui.moveTo(xpos, ypos) if not voice_command_queue.empty(): command = voice_command_queue.get_nowait() if 'click' in command or 'select' in command: pyautogui.click() st = 'Click' elif 'double' in command or 'in' in command: pyautogui.doubleClick() st = 'Double Click' elif 'right' in command or 'menu' in command or 'light' in command: pyautogui.rightClick() st = 'Right Click' print(command) cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255) scale_percent = 30 # calculate the 50 percent of original dimensions width = int(frame.shape[1] * scale_percent / 100) height = int(frame.shape[0] * scale_percent / 100) # dsize dsize = (width, height) # resize image output = cv2.resize(frame, dsize) cv2.moveWindow("Test", screenWidth - width, screenHeight - height) # Show preview. cv2.imshow("Test", output) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()