def performDetect(self, imagePath, txt_tag=True): # Open the image file # cap = VideoCapture(imagePath) # get frame from the video # hasFrame, self.frame = cap.read() # self.frame = Image.open(imagePath) # print(imagePath) self.frame = cv2.imdecode(np.fromfile(imagePath, dtype=np.uint8), -1) # cv2.imshow('', self.frame) # cv2.waitKey() # Create a 4D blob from a frame. blob = dnn.blobFromImage(self.frame, 1 / 255, (self.inpWidth, self.inpHeight), [0, 0, 0], 1, crop=False) # Sets the input to the network self.net.setInput(blob) # Runs the forward pass to get output of the output layers outs = self.net.forward(self.getOutputsNames()) # Remove the bounding boxes with low confidence detections = self.postprocess(outs, txt_tag) result = {"detections": detections, "image": self.frame} # cv2.imwrite('detect/' + imagePath[imagePath.rindex('\\'):], self.frame) return result
def getFaceBB(self, image: Image_Type, passThrough: PushPipe.PassThrough) -> Image_Type: height, width, channels = image.shape[0:3] assert channels == 3 blob = dnn.blobFromImage(resize(image, (300, 300)), 1.0, (300, 300), [104, 117, 123], True, False) self.net.setInput(blob) detections = self.net.forward() # get largest face largest = [0, 0, 0, 0] prevArea = 0 for i in range(0, detections.shape[2]): if (detections[0, 0, i, 2] < self.confidenceThreshold): continue (startX, startY, endX, endY) = detections[0, 0, i, 3:7] area = (startX - endX) * (startY - endY) if (prevArea < area): prevArea = area largest = [startX, startY, endX, endY] largest = [1 if X > 1 else X for X in largest] largest[0] *= width largest[2] *= width largest[1] *= height largest[3] *= height largest = [int(round(X)) for X in largest] if (largest[0] + largest[2] < 1 or largest[1] + largest[3] < 1): # no faces found, won't push forward self.setErrored("No face found.") else: rect = BoundingBox_twopoint(*largest) return rect return image
def detect_and_predict_mask(frame, faceNet, maskNet): # grab the dimensions of the frame and then construct a blob # from it (h, w) = frame.shape[:2] blob = dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0)) # pass the blob through the network and obtain the face detections faceNet.setInput(blob) detections = faceNet.forward() print(detections.shape) # initialize our list of faces, their corresponding locations, # and the list of predictions from our face mask network faces = [] locs = [] preds = [] # loop over the detections for i in range(0, detections.shape[2]): # extract the confidence (i.e., probability) associated with # the detection confidence = detections[0, 0, i, 2] # filter out weak detections by ensuring the confidence is # greater than the minimum confidence if confidence > 0.5: # compute the (x, y)-coordinates of the bounding box for # the object box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # ensure the bounding boxes fall within the dimensions of # the frame (startX, startY) = (max(0, startX), max(0, startY)) (endX, endY) = (min(w - 1, endX), min(h - 1, endY)) # extract the face ROI, convert it from BGR to RGB channel # ordering, resize it to 224x224, and preprocess it face = frame[startY:endY, startX:endX] face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) face = cv2.resize(face, (224, 224)) face = img_to_array(face) face = preprocess_input(face) # add the face and bounding boxes to their respective # lists faces.append(face) locs.append((startX, startY, endX, endY)) # only make a predictions if at least one face was detected if len(faces) > 0: # for faster inference we'll make batch predictions on *all* # faces at the same time rather than one-by-one predictions # in the above `for` loop faces = np.array(faces, dtype="float32") preds = maskNet.predict(faces, batch_size=32) # return a 2-tuple of the face locations and their corresponding # locations return (locs, preds)
def inference(): proto = '/Users/hongyanma/gitspace/python/python/Ultra-Light-Fast-Generic-Face-Detector-1MB/caffe/model/RFB-320/RFB-320.prototxt'; weights = '/Users/hongyanma/gitspace/python/python/Ultra-Light-Fast-Generic-Face-Detector-1MB/caffe/model/RFB-320/RFB-320.caffemodel'; net = cv2.dnn.readNetFromCaffe(proto, weights) # onnx version # net = dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path) # caffe model converted from onnx input_size = [int(v.strip()) for v in args.input_size.split(",")] witdh = input_size[0] height = input_size[1] priors = define_img_size(input_size) result_path = args.results_path # video_url = '/Users/hongyanma/Desktop/liu-wu.mp4' video_url = 0 cap = cv2.VideoCapture(video_url) while cap.isOpened(): ret, img_ori = cap.read() if ret : rect = cv2.resize(img_ori, (witdh, height)) rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB) net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127)) time_time = time.time() boxes, scores = net.forward(["boxes", "scores"]) print("inference time: {} s".format(round(time.time() - time_time, 4))) boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0) scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0) boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance) boxes = center_form_to_corner_form(boxes) boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, args.threshold) for i in range(boxes.shape[0]): box = boxes[i, :] cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori) key = cv2.waitKey(1) if key == ord("q"): break cv2.destroyAllWindows()
def _process_body(self, img: Image = None) -> (Image, FaceDetectorResult): img_array = img.get_work_img_array() h, w = img.get_height(), img.get_width() blob = dnn.blobFromImage(cv2.resize(img_array, (300, 300)), 1.0, (300, 300), (104.0, 117.0, 123.0)) self._net.setInput(blob) faces_coffee = self._net.forward() faces_rectangles = [] faces_dictionary = {} for i in range(faces_coffee.shape[2]): confidence = faces_coffee[0, 0, i, 2] if confidence > 0.5: box = faces_coffee[0, 0, i, 3:7] * np.array([w, h, w, h]) if self._find_best: faces_dictionary[confidence] = box.astype("int") else: (x, y, x1, y1) = box.astype("int") faces_rectangles.append( Rectangle(Point(x, y), Point(x1, y1))) if self._find_best and faces_dictionary: key = max(faces_dictionary.keys()) (x, y, x1, y1) = faces_dictionary[key] # best item faces_rectangles.append(Rectangle(Point(x, y), Point(x1, y1))) return img, FaceDetectorResult(self, rectangles=faces_rectangles)
def get_facebox(image=None, threshold=0.5): """ Get the bounding box of faces in image. """ rows = image.shape[0] cols = image.shape[1] confidences = [] faceboxes = [] NET.setInput(dnn.blobFromImage( image, 1.0, (WIDTH, HEIGHT), (104.0, 177.0, 123.0), False, False)) detections = NET.forward() for result in detections[0, 0, :, :]: confidence = result[2] if confidence > threshold: x_left_bottom = int(result[3] * cols) y_left_bottom = int(result[4] * rows) x_right_top = int(result[5] * cols) y_right_top = int(result[6] * rows) confidences.append(confidence) faceboxes.append( [x_left_bottom, y_left_bottom, x_right_top, y_right_top]) return confidences, faceboxes
def detect_face(image, net): """ 人脸检测 :param: numpy, RGB image :return: list, best face box [x1, y1, x2, y2] """ # net = dnn.readNetFromONNX(args.onnx_path) # onnx version witdh = input_size[0] height = input_size[1] priors = define_img_size(input_size) rect = cv2.resize(image, (witdh, height)) net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127)) boxes, scores = net.forward(["boxes", "scores"]) boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0) scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0) boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance) boxes = center_form_to_corner_form(boxes) boxes, labels, probs = predict(image.shape[1], image.shape[0], scores, boxes, threshold) if boxes.shape[0] == 0: return None else: best_id = np.argmax(probs) best_box = boxes[best_id] best_box = [ max(0, best_box[0]), max(0, best_box[1]), min(image.shape[1], best_box[2]), min(image.shape[0], best_box[3]) ] return best_box
def inference(): net = dnn.readNetFromONNX(args.onnx_path) # onnx version # net = dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path) # caffe model converted from onnx input_size = [int(v.strip()) for v in args.input_size.split(",")] witdh = input_size[0] height = input_size[1] priors = define_img_size(input_size) result_path = args.results_path imgs_path = args.imgs_path if not os.path.exists(result_path): os.makedirs(result_path) listdir = os.listdir(imgs_path) for file_path in listdir: img_path = os.path.join(imgs_path, file_path) img_ori = cv2.imread(img_path) rect = cv2.resize(img_ori, (witdh, height)) rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB) net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127)) time_time = time.time() boxes, scores = net.forward(["boxes", "scores"]) print("inference time: {} s".format(round(time.time() - time_time, 4))) boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0) scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0) boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance) boxes = center_form_to_corner_form(boxes) boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, args.threshold) for i in range(boxes.shape[0]): box = boxes[i, :] cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) cv2.imwrite(os.path.join(result_path, file_path), img_ori) print("result_pic is written to {}".format(os.path.join(result_path, file_path))) cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori) cv2.waitKey(-1) cv2.destroyAllWindows()
def recognition_word(img, boxes, rec_modelPath): deploy = os.path.join(rec_modelPath, 'deploy.prototxt') weights = os.path.join(rec_modelPath, 'weights.caffemodel') labelPath = os.path.join(rec_modelPath, 'label.txt') text_ = [] Confidence_ = [] row = open(labelPath, encoding='gbk').read().strip().split("\n") class_label = row net = dnn.readNetFromCaffe(deploy, weights) for box in boxes: img_word = img[int(box[1]):int(box[5]), int(box[0]):int(box[4])].copy() img_word = cv.cvtColor(img_word, cv.COLOR_RGB2GRAY) #img_word = img_word[:, :, 0] img_word = cv.resize(img_word, (64, 64)) blob = dnn.blobFromImage(img_word, 1, (64, 64), (0.)) text, Confidence = model_predict(blob, net, class_label, 1) text_.append(text[0]) Confidence_.append(round(Confidence[0], 2)) # print(text_) # print(Confidence_) return text_, Confidence_
def inference(frame, show=False): #net = dnn.readNetFromONNX(args.onnx_path) # onnx version input_size = [int(v.strip()) for v in args.input_size.split(",")] #print("input size", input_size) img_ori = frame img_cln = frame.copy() ori_size = img_ori.shape witdh = input_size[0] height = input_size[1] priors = define_img_size(input_size) #video_midpoint = (int(input_size[0] / 2),int(input_size[1] / 2)) video_midpoint = (int(ori_size[1] / 2), int(ori_size[0] / 2)) #print("ori size", ori_size) rect = cv2.resize(img_ori, (witdh, height)) #img_cln = rect #cv2.circle(img_ori, video_midpoint, 4, (250, 200, 0), 6) rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB) net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127)) time_time = time.time() boxes, scores = net.forward(["boxes", "scores"]) # print("inference time: {} s".format(round(time.time() - time_time, 4))) boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0) scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0) boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance) boxes = center_form_to_corner_form(boxes) boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, args.threshold) rectangles = [] locations = [] for i in range(boxes.shape[0]): box = boxes[i, :] cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) rectangle = [box[0], box[1], box[2], box[3]] loc = (int(box[0] + (box[2] - box[0]) / 2), int(box[1] + (box[3] - box[1]) / 2)) loc_from_center = (loc[0] - video_midpoint[0], loc[1] - video_midpoint[1]) #cv2.line(img_ori, video_midpoint, loc, (0, 200, 0), 5) #cv2.circle(img_ori, loc, 4, (0, 200, 200), 3) rectangles.append(rectangle) locations.append(loc_from_center) if show: cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori) cv2.waitKey(1) #print(locations) #print(rectangles) #print(img_ori) return locations, rectangles, img_ori, img_cln
def get_face_bounding_boxes(image, threshold=0.7, max_faces=400, min_face_size=(25, 25)): """ Gets a list of tuples that contains information on the bounding boxes of faces found in an image. The tuples are of the form (confidence, min x, min y, max x, max y). If no faces are found, an empty list is returned. @param image RGB image to extract faces from. @param threshold The minimum confidence required to extract a face. @param max_faces The maximum number of faces one wants to extract. Can not be more than 400. @param min_face_size Minimum size of extracted face. If face bounds is less than this it is discarded. @return A list of tuples array where each tuple corresponds to face instances. The first tuple entry is the confidence, and the remaining four entries are the mininum x coordinate, minimum y coordinate, maximum x coordinate, and maximum y coordinate of the faces bounding box. """ (height, width) = image.shape[:2] padded_image = np.zeros((height * 2, width * 2, image.shape[2]), dtype=np.uint8) padded_image[:height, :width] = image blob = blobFromImage(resize(padded_image, (300, 300)), 1, (300, 300), (103.93, 116.77, 123.68), swapRB=True) net.setInput(blob) detections = net.forward() face_bounds_data = [] possible_faces = detections[0, 0, detections[0, 0, :, 2] > threshold, 2:7] possible_faces = possible_faces[0:max_faces] # Filter the possible faces to make sure they fit in the non padded image. # If part (but not all) of the face is in the padded reason, then clip the bounds so it would lie # completely in the non padded region. for possible_face in possible_faces: (confidence, min_x, min_y, max_x, max_y) = possible_face min_x = max(int(min_x * width * 2), 0) min_y = max(int(min_y * height * 2), 0) max_x = min(int(max_x * width * 2), width * 2) max_y = min(int(max_y * height) * 2, height * 2) # Check if minimum face coordinates are in padded section, or if face is to small. if min_x > width or min_y > height or max_x - min_x < min_face_size[ 0] or max_y - min_y < min_face_size[1]: continue face_bounds_data.append((confidence, min_x, min_y, max_x, max_y)) return face_bounds_data
def detect(cpp): frame_origin = cv2.imread(cpp) cv2.imshow("detection_or", frame_origin) frame=cv2.resize(frame_origin,(1024,720),interpolation=cv2.INTER_CUBIC) cv2.imshow("detection_res", frame) blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal) net.setInput(blob) t0 = time.time() detections = net.forward() print (time.time() - t0) cols = frame.shape[1] rows = frame.shape[0] ## if cols / float(rows) > WHRatio: ## cropSize = (int(rows * WHRatio), rows) ## else: ## cropSize = (cols, int(cols / WHRatio)) ## ## y1 = (rows - cropSize[1]) // 2 ## y2 = y1 + cropSize[1] ## x1 = (cols - cropSize[0]) // 2 ## x2 = x1 + cropSize[0] ## frame = frame[y1:y2, x1:x2] ## ## cols = frame.shape[1] ## rows = frame.shape[0] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.0: class_id = int(detections[0, 0, i, 1]) xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 255)) image_sub = frame[yLeftBottom:yRightTop,xLeftBottom:xRightTop] print (confidence) print (yLeftBottom,yRightTop, xLeftBottom,xRightTop) # e2e.recognizeOne(image_sub) label = classNames[class_id] + ": " + str(confidence) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) #cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), # (xLeftBottom + labelSize[0], yLeftBottom + baseLine), # (255, 255, 255), 2,cv2.FILLED) cv2.putText(frame, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cv2.imshow("detection", frame) return frame
def detect(frame): blob = dnn.blobFromImage(frame, 1, (1024, 768), (0, 0, 0),True) net = dnn.readNetFromCaffe(cm_path + 'density.prototxt', cm_path + 'density.caffemodel') net.setInput(blob) density = net.forward() density = density/1000.0 person_num = np.sum(density[:]) return int(person_num)
def text_pehchano(image,min_confidence=0.85,width=320,height=320,padding=0.00): ''' image=input image (type=numpy.ndarray) min_confidence= minimum confidence threshold to detect text from image (default=0.85) width=resizing width of image (default=320) height=resizing height of image (default=320) padding = ''' east='freshlybuiltimagebol/models/frozen_east_text_detection.pb' orig = image.copy() (origH, origW) = image.shape[:2] (newW, newH) = (width, height) rW = origW / float(newW) rH = origH / float(newH) image = resize(image, (newW, newH)) (H, W) = image.shape[:2] layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] net = readNet(east) blob = blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) (rects, confidences) = NaturalPhotoShabd.result_vyakhya_kro(scores, geometry,min_confidence) boxes = non_max_suppression(array(rects), probs=confidences) results = [] for (startX, startY, endX, endY) in boxes: startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) dX = int((endX - startX) * padding) dY = int((endY - startY) * padding) startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) roi = orig[startY:endY, startX:endX] config = ("-l eng --psm 7") text = image_to_string(roi, config=config) results.append(((startX, startY, endX, endY), text)) results = sorted(results, key=lambda r:r[0][1]) output = orig.copy() for ((startX, startY, endX, endY), text) in results: #print("OCR TEXT : ",text) text = "".join([c if ord(c) < 128 else "" for c in text]).strip() #ShabdDhwani.shabd_se_dhwani(text,'english',"out.mp3") #playsound('out.mp3') rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 1) putText(output, text, (startX, startY - 20), FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) return output
def detect(cpp): frame = cv2.imread(cpp) blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal) net.setInput(blob) t0 = time.time() detections = net.forward() print time.time() - t0 cols = frame.shape[1] rows = frame.shape[0] if cols / float(rows) > WHRatio: cropSize = (int(rows * WHRatio), rows) else: cropSize = (cols, int(cols / WHRatio)) y1 = (rows - cropSize[1]) / 2 y2 = y1 + cropSize[1] x1 = (cols - cropSize[0]) / 2 x2 = x1 + cropSize[0] frame = frame[y1:y2, x1:x2] cols = frame.shape[1] rows = frame.shape[0] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.2: class_id = int(detections[0, 0, i, 1]) xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 255)) image_sub = frame[yLeftBottom:yRightTop, xLeftBottom:xRightTop] print yLeftBottom, yRightTop, xLeftBottom, xRightTop # e2e.recognizeOne(image_sub) label = classNames[class_id] + ": " + str(confidence) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), # (xLeftBottom + labelSize[0], yLeftBottom + baseLine), # (255, 255, 255), 2,cv2.FILLED) cv2.putText(frame, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) return frame
def _run(self, image): """ image: input image return: detections, perf_stats """ self.net.setInput(dnn.blobFromImage(image, 1.0, (self.inWidth, self.inHeight), (104.0, 177.0, 123.0), False, False)) self.detections = self.net.forward() self.perf_stats = self.net.getPerfProfile() return self.detections, self.perf_stats
def createBlobFromImage(image, newSize, scale=(1.0 / 255), meanSubtract=(0, 0, 0), swapRB=True, crop=False): return dnn.blobFromImage(image, scale, newSize, meanSubtract, swapRB=swapRB, crop=crop)
def detect(cpp): frame = cv2.imread(cpp) blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal) net.setInput(blob) t0 = time.time() detections = net.forward() print time.time() - t0 cols = frame.shape[1] rows = frame.shape[0] if cols / float(rows) > WHRatio: cropSize = (int(rows * WHRatio), rows) else: cropSize = (cols, int(cols / WHRatio)) y1 = (rows - cropSize[1]) / 2 y2 = y1 + cropSize[1] x1 = (cols - cropSize[0]) / 2 x2 = x1 + cropSize[0] frame = frame[y1:y2, x1:x2] cols = frame.shape[1] rows = frame.shape[0] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.2: class_id = int(detections[0, 0, i, 1]) xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 255)) image_sub = frame[yLeftBottom:yRightTop,xLeftBottom:xRightTop] print yLeftBottom,yRightTop, xLeftBottom,xRightTop # e2e.recognizeOne(image_sub) label = classNames[class_id] + ": " + str(confidence) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), # (xLeftBottom + labelSize[0], yLeftBottom + baseLine), # (255, 255, 255), 2,cv2.FILLED) cv2.putText(frame, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) return frame
def face_detection(): net = dnn.readNetFromCaffe(prototxt, caffemodel) #cap = cv.VideoCapture(0) #cap = cv.VideoCapture("E:/视频库/srcImage/OneStopMoveEnter1cor.avi") frame = cv.imread("face01.jpg") while True: #ret, frame = cap.read() cols = frame.shape[1] rows = frame.shape[0] net.setInput( dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False)) detections = net.forward() perf_stats = net.getPerfProfile() print('Inference time: %.2f ms' % (perf_stats[0] / cv.getTickFrequency() * 1000)) for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > confThreshold: global count count += 1 print(confidence) xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 0)) label = "face: %.4f" % confidence labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv.rectangle( frame, (xLeftBottom, yLeftBottom - labelSize[1]), (xLeftBottom + labelSize[0], yLeftBottom + baseLine), (255, 255, 255), cv.FILLED) cv.putText(frame, label, (xLeftBottom, yLeftBottom), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cv.imshow("detections", frame) print('the num of face: %d ' % count) if cv.waitKey(0) != -1: break
def detect_res10(net, frame): inWidth = 300 inHeight = 300 means = (104., 177., 123.) ratio = 1.0 #net.setInput(dnn.blobFromImage(cv2.resize(frame, (inWidth, inHeight)), ratio, (inWidth, inHeight), means)) net.setInput( dnn.blobFromImage(frame, ratio, (inWidth, inHeight), means, swapRB=True, crop=False)) detections = net.forward() return detections
def detect_mobilenet_widerface(net, frame): inWidth = 300 inHeight = 300 means = (127.5, 127.5, 127.5) ratio = 1.0 / 127.5 #net.setInput(dnn.blobFromImage(cv2.resize(frame, (inWidth, inHeight)), ratio, (inWidth, inHeight), means)) net.setInput( dnn.blobFromImage(frame, ratio, (inWidth, inHeight), means, swapRB=True, crop=False)) detections = net.forward() return detections
def detect_faces(image, detector="cnn"): '''detect every face inside image. By default it uses the cnn detector, pass "dlib" to use the dlib frontal face detector. Returns a list of tuple\\rectangles: (top, left, right, bottom)''' # detect faces with dlib.frontal_face_detector if detector == "dlib": # load detector if needed global face_det if face_det is None: face_det = dlib.get_frontal_face_detector() dets = face_det(image, 1) boxes = [] for d in dets: boxes.append(Region.dlib(d)) return boxes # detect faces with opencv caffe cnn detector assert(caffeNet) # get image dimension (h, w) = image.shape[:2] np_arr = np.array([w, h, w, h]) if h <= 0 or w <= 0: return [] # convert image to blob (that do some preprocessing..) blob = blobFromImage(cv2.resize(image, cf_size), cf_scale, size=cf_size, mean=cf_values, swapRB=True) # obtain detections and predictions caffeNet.setInput(blob) detections = caffeNet.forward() # detected face-boxes boxes = [] for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence >= confidence_threshold: # compute the bounding box of the face box = detections[0, 0, i, 3:7] * np_arr boxes.append(Region.tuple(box.astype("int"))) return boxes
def detect(self, frame): """ detect face on image :param frame: image in opencv format :return: face information array in shape of [N * 7] """ self.net.setInput(dnn.blobFromImage(frame, 1.0, (self.inWidth, self.inHeight), (104.0, 177.0, 123.0), False, False)) detections = self.net.forward() detections = detections[detections[:, :, :, 2] > 0.5] perf_stats = self.net.getPerfProfile() return detections
def dnn_predict(net: dnn_Net, input_shape, frame: Image, classes): """ https://github.com/opencv/opencv/tree/master/samples/dnn https://github.com/tensorflow/models/tree/master/research e.g. object_detection (https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API), TF-slim https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py """ blob = blobFromImage(frame, size=input_shape) # Run the model net.setInput(blob) out = net.forward() # Single object classification: Class with the highest score flat_out = out.flatten() class_id = argmax(flat_out) confidence = flat_out[class_id] # Predicted class info( '%s: %.4f' % (classes[class_id] if classes else 'Class #%d' % class_id, confidence)) # Multiple object detection: bbox_color = (0, 255, 0) threshold = 0.5 # 0.3 for detection in out[0, 0, :, :]: score = float(detection[2]) if score > threshold: left = detection[3] * frame.width top = detection[4] * frame.height right = detection[5] * frame.width bottom = detection[6] * frame.height tl = (uint16(left), uint16(top)) br = (uint16(right), uint16(bottom)) rectangle(frame, tl, br, bbox_color) # Efficiency information t, _ = net.getPerfProfile() info('Inference time: %.2f ms' % (t * 1000.0 / getTickFrequency()))
def detect(self, image): net = self.classifier height, width = image.shape[:2] blob = blobFromImage(resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) detections = net.forward() faces = [] for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence < self.confidenceThreshold: continue box = detections[0, 0, i, 3:7] * np.array( [width, height, width, height]) startX, startY, endX, endY = box.astype("int") faces.append( np.array([startX, startY, endX - startX, endY - startY])) return faces
def detect(self, image): result = [] (h, w, c) = image.shape blob = dnn.blobFromImage(cv2.resize(image, (self.width, self.height)), self.scale, (self.width, self.height), self.mean_val) self.net.setInput(blob) detections = self.net.forward() for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > self.threshold: idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) box = box.astype("int") obj = Target() obj.class_name = self.dict[idx] obj.box = box obj.conf = confidence result.append(obj) return result
def recognize_face(self, face): # load the serialized face embedding model from disk # construct a blob for the face ROI, then pass the blob # through our face embedding model to obtain the 128-d # quantification of the face faceBlob = blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False) self.embedder.setInput(faceBlob) vector = self.embedder.forward() # perform classification to recognize the face face_recognizer_preds = self.recognizer.predict_proba(vector)[0] proba = face_recognizer_preds[np.argmax(face_recognizer_preds)] name = self.le.classes_[ np.argmax(face_recognizer_preds )] if proba > self.confidenceThreshold else 'Unknown' return name
def detect(self, image): """ Takes an image and, using the neural network, provides the locations of human hands in that image. Args: image: image/frame to make the detections on. Returns: indexes: indexes of the detected hands. (In order to provide multiple hand detections.) boxes: location(s) of the hand(s) in the frame. """ # ref: https://github.com/darshanadakane/yolov3_objectdetection height, width, _ = image.shape blob = dnn.blobFromImage(image, 0.00392, (416, 416), swapRB=True, crop=False) self.network.setInput(blob) outs = self.network.forward(self.output_layers) confidences = [] boxes = [] for out in outs: for detection in out: scores = detection[5:] confidence = scores[0] box = detection[0:4] * np.array([width, height, width, height]) (center_x, center_y, w, h) = box.astype("int") x = int(center_x - w / 2) y = int(center_y - h / 2) boxes.append([x, y, int(w), int(h)]) confidences.append(float(confidence)) indexes = dnn.NMSBoxes(boxes, confidences, 0.4, 0.6) return indexes, boxes, confidences
def face_detection(self): net = dnn.readNetFromCaffe(prototxt, caffemodel) net.setInput( dnn.blobFromImage(self.image, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False)) detections = net.forward() # print(detections.shape) # print(detections) cols = self.image.shape[1] rows = self.image.shape[0] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > confThreshold: self.count += 1 # print(confidence) xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv2.rectangle(self.image, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), (0, 255, 0)) label = "face: %.4f" % confidence # labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.putText(self.image, label, (xLeftBottom, yLeftBottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cvRGBImg = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) qi = QImage(cvRGBImg.data, cvRGBImg.shape[1], cvRGBImg.shape[0], cvRGBImg.shape[1] * 3, QImage.Format_RGB888) pix = QPixmap.fromImage(qi) self.label2.setPixmap(pix) self.label2.show() self.label3.clear() if self.count > 0: self.label3.setText("图中检测到{}张人脸".format(self.count)) else: self.label3.setText("图中未检测到人脸")
def load_data(self, data): self.output_layer_names = self.model.getLayerNames() self.output_layer_names = [ self.output_layer_names[i[0] - 1] for i in self.model.getUnconnectedOutLayers() ] if len(data.shape) == 4: self.height, self.width = data.shape[1:3] # TODO: image shape should not be hard coded. blob = blobFromImages(data, SCALE_FACTOR, (IMG_SIDE_SIZE, IMG_SIDE_SIZE), swapRB=True, crop=False) self.model.setInput(blob) else: self.height, self.width = data.shape[:2] blob = blobFromImage(data, SCALE_FACTOR, (IMG_SIDE_SIZE, IMG_SIDE_SIZE), swapRB=True, crop=False) self.model.setInput(blob)
def detect(self, image): """method to detect faces in input image""" classifier = self.classifier height, width = image.shape[:2] image_blob = blobFromImage(resize(image, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68)) classifier.setInput(image_blob) detections = classifier.forward() faces = [] # loop over the detections for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] # filter out weak detections by ensuring the 'confidence' is greater than the minimum confidence if confidence > self.confidence_threshold: # compute the coordinates of the bounding box for the object box = detections[0, 0, i, 3:7] * np.array([width, height, width, height]) start_x, start_y, end_x, end_y = box.astype("int") # ensuring the bounding boxes fall within the dimensions of the frame faces.append(np.array([start_x, start_y, end_x - start_x, end_y - start_y])) return faces
from __future__ import print_function import numpy as np import cv2 from cv2 import dnn import timeit def timeit_forward(net): print("Runtime:", timeit.timeit(lambda: net.forward(), number=10)) def get_class_list(): with open('synset_words.txt', 'rt') as f: return [x[x.find(" ") + 1:] for x in f] blob = dnn.blobFromImage(cv2.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123), False) print("Input:", blob.shape, blob.dtype) net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel') net.setInput(blob) prob = net.forward() #timeit_forward(net) #Uncomment to check performance print("Output:", prob.shape, prob.dtype) classes = get_class_list() print("Best match", classes[prob.argmax()])
inWidth = 300 inHeight = 300 confThreshold = 0.5 prototxt = 'face_detector/deploy.prototxt' caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel' if __name__ == '__main__': net = dnn.readNetFromCaffe(prototxt, caffemodel) cap = cv.VideoCapture(0) while True: ret, frame = cap.read() cols = frame.shape[1] rows = frame.shape[0] net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False)) detections = net.forward() perf_stats = net.getPerfProfile() print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000)) for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > confThreshold: xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),