Ejemplo n.º 1
0
 def performDetect(self, imagePath, txt_tag=True):
     # Open the image file
     # cap = VideoCapture(imagePath)
     # get frame from the video
     # hasFrame, self.frame = cap.read()
     # self.frame = Image.open(imagePath)
     # print(imagePath)
     self.frame = cv2.imdecode(np.fromfile(imagePath, dtype=np.uint8), -1)
     # cv2.imshow('', self.frame)
     # cv2.waitKey()
     # Create a 4D blob from a frame.
     blob = dnn.blobFromImage(self.frame,
                              1 / 255, (self.inpWidth, self.inpHeight),
                              [0, 0, 0],
                              1,
                              crop=False)
     # Sets the input to the network
     self.net.setInput(blob)
     # Runs the forward pass to get output of the output layers
     outs = self.net.forward(self.getOutputsNames())
     # Remove the bounding boxes with low confidence
     detections = self.postprocess(outs, txt_tag)
     result = {"detections": detections, "image": self.frame}
     # cv2.imwrite('detect/' + imagePath[imagePath.rindex('\\'):], self.frame)
     return result
Ejemplo n.º 2
0
 def getFaceBB(self, image: Image_Type,
               passThrough: PushPipe.PassThrough) -> Image_Type:
     height, width, channels = image.shape[0:3]
     assert channels == 3
     blob = dnn.blobFromImage(resize(image, (300, 300)), 1.0, (300, 300),
                              [104, 117, 123], True, False)
     self.net.setInput(blob)
     detections = self.net.forward()
     # get largest face
     largest = [0, 0, 0, 0]
     prevArea = 0
     for i in range(0, detections.shape[2]):
         if (detections[0, 0, i, 2] < self.confidenceThreshold):
             continue
         (startX, startY, endX, endY) = detections[0, 0, i, 3:7]
         area = (startX - endX) * (startY - endY)
         if (prevArea < area):
             prevArea = area
             largest = [startX, startY, endX, endY]
     largest = [1 if X > 1 else X for X in largest]
     largest[0] *= width
     largest[2] *= width
     largest[1] *= height
     largest[3] *= height
     largest = [int(round(X)) for X in largest]
     if (largest[0] + largest[2] < 1 or largest[1] + largest[3] < 1):
         # no faces found, won't push forward
         self.setErrored("No face found.")
     else:
         rect = BoundingBox_twopoint(*largest)
         return rect
     return image
Ejemplo n.º 3
0
def detect_and_predict_mask(frame, faceNet, maskNet):
    # grab the dimensions of the frame and then construct a blob
    # from it
    (h, w) = frame.shape[:2]
    blob = dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0))

    # pass the blob through the network and obtain the face detections
    faceNet.setInput(blob)
    detections = faceNet.forward()
    print(detections.shape)

    # initialize our list of faces, their corresponding locations,
    # and the list of predictions from our face mask network
    faces = []
    locs = []
    preds = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > 0.5:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # ensure the bounding boxes fall within the dimensions of
            # the frame
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

            # extract the face ROI, convert it from BGR to RGB channel
            # ordering, resize it to 224x224, and preprocess it
            face = frame[startY:endY, startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = preprocess_input(face)

            # add the face and bounding boxes to their respective
            # lists
            faces.append(face)
            locs.append((startX, startY, endX, endY))

    # only make a predictions if at least one face was detected
    if len(faces) > 0:
        # for faster inference we'll make batch predictions on *all*
        # faces at the same time rather than one-by-one predictions
        # in the above `for` loop
        faces = np.array(faces, dtype="float32")
        preds = maskNet.predict(faces, batch_size=32)

    # return a 2-tuple of the face locations and their corresponding
    # locations
    return (locs, preds)
Ejemplo n.º 4
0
def inference():
    proto = '/Users/hongyanma/gitspace/python/python/Ultra-Light-Fast-Generic-Face-Detector-1MB/caffe/model/RFB-320/RFB-320.prototxt';
    weights = '/Users/hongyanma/gitspace/python/python/Ultra-Light-Fast-Generic-Face-Detector-1MB/caffe/model/RFB-320/RFB-320.caffemodel';
    net = cv2.dnn.readNetFromCaffe(proto, weights)  # onnx version
    # net = dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path)  # caffe model converted from onnx
    input_size = [int(v.strip()) for v in args.input_size.split(",")]
    witdh = input_size[0]
    height = input_size[1]
    priors = define_img_size(input_size)
    result_path = args.results_path
    # video_url = '/Users/hongyanma/Desktop/liu-wu.mp4'
    video_url = 0
    cap = cv2.VideoCapture(video_url)
    while cap.isOpened():
        ret, img_ori = cap.read()
        if ret :
            rect = cv2.resize(img_ori, (witdh, height))
            rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
            net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127))
            time_time = time.time()
            boxes, scores = net.forward(["boxes", "scores"])
            print("inference time: {} s".format(round(time.time() - time_time, 4)))
            boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
            scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
            boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance)
            boxes = center_form_to_corner_form(boxes)
            boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, args.threshold)
            for i in range(boxes.shape[0]):
                box = boxes[i, :]
                cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
            cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori)
            key = cv2.waitKey(1)
            if key == ord("q"):
                break
    cv2.destroyAllWindows()
Ejemplo n.º 5
0
    def _process_body(self, img: Image = None) -> (Image, FaceDetectorResult):
        img_array = img.get_work_img_array()
        h, w = img.get_height(), img.get_width()
        blob = dnn.blobFromImage(cv2.resize(img_array, (300, 300)), 1.0,
                                 (300, 300), (104.0, 117.0, 123.0))
        self._net.setInput(blob)
        faces_coffee = self._net.forward()
        faces_rectangles = []
        faces_dictionary = {}

        for i in range(faces_coffee.shape[2]):
            confidence = faces_coffee[0, 0, i, 2]
            if confidence > 0.5:
                box = faces_coffee[0, 0, i, 3:7] * np.array([w, h, w, h])
                if self._find_best:
                    faces_dictionary[confidence] = box.astype("int")
                else:
                    (x, y, x1, y1) = box.astype("int")
                    faces_rectangles.append(
                        Rectangle(Point(x, y), Point(x1, y1)))

        if self._find_best and faces_dictionary:
            key = max(faces_dictionary.keys())
            (x, y, x1, y1) = faces_dictionary[key]  # best item
            faces_rectangles.append(Rectangle(Point(x, y), Point(x1, y1)))

        return img, FaceDetectorResult(self, rectangles=faces_rectangles)
def get_facebox(image=None, threshold=0.5):
    """
    Get the bounding box of faces in image.
    """
    rows = image.shape[0]
    cols = image.shape[1]

    confidences = []
    faceboxes = []

    NET.setInput(dnn.blobFromImage(
        image, 1.0, (WIDTH, HEIGHT), (104.0, 177.0, 123.0), False, False))
    detections = NET.forward()

    for result in detections[0, 0, :, :]:
        confidence = result[2]
        if confidence > threshold:
            x_left_bottom = int(result[3] * cols)
            y_left_bottom = int(result[4] * rows)
            x_right_top = int(result[5] * cols)
            y_right_top = int(result[6] * rows)
            confidences.append(confidence)
            faceboxes.append(
                [x_left_bottom, y_left_bottom, x_right_top, y_right_top])
    return confidences, faceboxes
Ejemplo n.º 7
0
def detect_face(image, net):
    """
    人脸检测
    :param: numpy, RGB image
    :return: list, best face box [x1, y1, x2, y2]
    """
    # net = dnn.readNetFromONNX(args.onnx_path)  # onnx version
    witdh = input_size[0]
    height = input_size[1]
    priors = define_img_size(input_size)

    rect = cv2.resize(image, (witdh, height))
    net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127))
    boxes, scores = net.forward(["boxes", "scores"])
    boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
    scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
    boxes = convert_locations_to_boxes(boxes, priors, center_variance,
                                       size_variance)
    boxes = center_form_to_corner_form(boxes)
    boxes, labels, probs = predict(image.shape[1], image.shape[0], scores,
                                   boxes, threshold)
    if boxes.shape[0] == 0:
        return None
    else:
        best_id = np.argmax(probs)
        best_box = boxes[best_id]
        best_box = [
            max(0, best_box[0]),
            max(0, best_box[1]),
            min(image.shape[1], best_box[2]),
            min(image.shape[0], best_box[3])
        ]
        return best_box
def inference():
    net = dnn.readNetFromONNX(args.onnx_path)  # onnx version
    # net = dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path)  # caffe model converted from onnx
    input_size = [int(v.strip()) for v in args.input_size.split(",")]
    witdh = input_size[0]
    height = input_size[1]
    priors = define_img_size(input_size)
    result_path = args.results_path
    imgs_path = args.imgs_path
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    listdir = os.listdir(imgs_path)
    for file_path in listdir:
        img_path = os.path.join(imgs_path, file_path)
        img_ori = cv2.imread(img_path)
        rect = cv2.resize(img_ori, (witdh, height))
        rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
        net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127))
        time_time = time.time()
        boxes, scores = net.forward(["boxes", "scores"])
        print("inference time: {} s".format(round(time.time() - time_time, 4)))
        boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
        scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
        boxes = convert_locations_to_boxes(boxes, priors, center_variance, size_variance)
        boxes = center_form_to_corner_form(boxes)
        boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores, boxes, args.threshold)
        for i in range(boxes.shape[0]):
            box = boxes[i, :]
            cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        cv2.imwrite(os.path.join(result_path, file_path), img_ori)
        print("result_pic is written to {}".format(os.path.join(result_path, file_path)))
        cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori)
        cv2.waitKey(-1)
    cv2.destroyAllWindows()
Ejemplo n.º 9
0
def recognition_word(img, boxes, rec_modelPath):
    deploy = os.path.join(rec_modelPath, 'deploy.prototxt')
    weights = os.path.join(rec_modelPath, 'weights.caffemodel')
    labelPath = os.path.join(rec_modelPath, 'label.txt')

    text_ = []
    Confidence_ = []


    row = open(labelPath, encoding='gbk').read().strip().split("\n")
    class_label = row

    net = dnn.readNetFromCaffe(deploy, weights)

    for box in boxes:
        img_word = img[int(box[1]):int(box[5]), int(box[0]):int(box[4])].copy()
        img_word = cv.cvtColor(img_word, cv.COLOR_RGB2GRAY)
        #img_word = img_word[:, :, 0]
        img_word = cv.resize(img_word, (64, 64))

        blob = dnn.blobFromImage(img_word, 1, (64, 64), (0.))
        text, Confidence = model_predict(blob, net, class_label, 1)

        text_.append(text[0])
        Confidence_.append(round(Confidence[0], 2))

    # print(text_)
    # print(Confidence_)
    return text_, Confidence_
Ejemplo n.º 10
0
def inference(frame, show=False):
    #net = dnn.readNetFromONNX(args.onnx_path)  # onnx version
    input_size = [int(v.strip()) for v in args.input_size.split(",")]
    #print("input size", input_size)
    img_ori = frame
    img_cln = frame.copy()
    ori_size = img_ori.shape

    witdh = input_size[0]
    height = input_size[1]
    priors = define_img_size(input_size)
    #video_midpoint = (int(input_size[0] / 2),int(input_size[1] / 2))

    video_midpoint = (int(ori_size[1] / 2), int(ori_size[0] / 2))
    #print("ori size", ori_size)
    rect = cv2.resize(img_ori, (witdh, height))
    #img_cln = rect
    #cv2.circle(img_ori, video_midpoint, 4, (250, 200, 0), 6)
    rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)

    net.setInput(dnn.blobFromImage(rect, 1 / image_std, (witdh, height), 127))
    time_time = time.time()
    boxes, scores = net.forward(["boxes", "scores"])

    # print("inference time: {} s".format(round(time.time() - time_time, 4)))
    boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
    scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
    boxes = convert_locations_to_boxes(boxes, priors, center_variance,
                                       size_variance)
    boxes = center_form_to_corner_form(boxes)
    boxes, labels, probs = predict(img_ori.shape[1], img_ori.shape[0], scores,
                                   boxes, args.threshold)

    rectangles = []
    locations = []
    for i in range(boxes.shape[0]):

        box = boxes[i, :]
        cv2.rectangle(img_ori, (box[0], box[1]), (box[2], box[3]), (0, 255, 0),
                      2)
        rectangle = [box[0], box[1], box[2], box[3]]
        loc = (int(box[0] + (box[2] - box[0]) / 2),
               int(box[1] + (box[3] - box[1]) / 2))
        loc_from_center = (loc[0] - video_midpoint[0],
                           loc[1] - video_midpoint[1])

        #cv2.line(img_ori, video_midpoint, loc, (0, 200, 0), 5)
        #cv2.circle(img_ori, loc, 4, (0, 200, 200), 3)
        rectangles.append(rectangle)
        locations.append(loc_from_center)
    if show:
        cv2.imshow("ultra_face_ace_opencvdnn_py", img_ori)
        cv2.waitKey(1)

    #print(locations)
    #print(rectangles)
    #print(img_ori)
    return locations, rectangles, img_ori, img_cln
Ejemplo n.º 11
0
def get_face_bounding_boxes(image,
                            threshold=0.7,
                            max_faces=400,
                            min_face_size=(25, 25)):
    """
    Gets a list of tuples that contains information on the bounding boxes of faces found in an image.
    The tuples are of the form (confidence, min x, min y, max x, max y). If no faces are found, an
    empty list is returned. 

    @param image RGB image to extract faces from.
    @param threshold The minimum confidence required to extract a face.
    @param max_faces The maximum number of faces one wants to extract. Can not be more than 400.
    @param min_face_size Minimum size of extracted face. If face bounds is less than this it is discarded.

    @return A list of tuples array where each tuple corresponds to face instances. The first tuple entry is the confidence, and the remaining
            four entries are the mininum x coordinate, minimum y coordinate, maximum x coordinate, and maximum y coordinate
            of the faces bounding box.
    """

    (height, width) = image.shape[:2]

    padded_image = np.zeros((height * 2, width * 2, image.shape[2]),
                            dtype=np.uint8)
    padded_image[:height, :width] = image

    blob = blobFromImage(resize(padded_image, (300, 300)),
                         1, (300, 300), (103.93, 116.77, 123.68),
                         swapRB=True)

    net.setInput(blob)
    detections = net.forward()

    face_bounds_data = []
    possible_faces = detections[0, 0, detections[0, 0, :, 2] > threshold, 2:7]
    possible_faces = possible_faces[0:max_faces]

    # Filter the possible faces to make sure they fit in the non padded image.
    # If part (but not all) of the face is in the padded reason, then clip the bounds so it would lie
    # completely in the non padded region.

    for possible_face in possible_faces:

        (confidence, min_x, min_y, max_x, max_y) = possible_face

        min_x = max(int(min_x * width * 2), 0)
        min_y = max(int(min_y * height * 2), 0)
        max_x = min(int(max_x * width * 2), width * 2)
        max_y = min(int(max_y * height) * 2, height * 2)

        # Check if minimum face coordinates are in padded section, or if face is to small.
        if min_x > width or min_y > height or max_x - min_x < min_face_size[
                0] or max_y - min_y < min_face_size[1]:
            continue

        face_bounds_data.append((confidence, min_x, min_y, max_x, max_y))

    return face_bounds_data
Ejemplo n.º 12
0
def detect(cpp):
    frame_origin = cv2.imread(cpp)
    cv2.imshow("detection_or", frame_origin)
    frame=cv2.resize(frame_origin,(1024,720),interpolation=cv2.INTER_CUBIC)
    cv2.imshow("detection_res", frame)
    blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal)
    net.setInput(blob)
    t0 = time.time()
    detections = net.forward()
    print (time.time() - t0)

    cols = frame.shape[1]
    rows = frame.shape[0]

##    if cols / float(rows) > WHRatio:
##        cropSize = (int(rows * WHRatio), rows)
##    else:
##        cropSize = (cols, int(cols / WHRatio))
##
##    y1 = (rows - cropSize[1]) // 2
##    y2 = y1 + cropSize[1]
##    x1 = (cols - cropSize[0]) // 2
##    x2 = x1 + cropSize[0]
##    frame = frame[y1:y2, x1:x2]
##
##    cols = frame.shape[1]
##    rows = frame.shape[0]

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.0:
            class_id = int(detections[0, 0, i, 1])

            xLeftBottom = int(detections[0, 0, i, 3] * cols)
            yLeftBottom = int(detections[0, 0, i, 4] * rows)
            xRightTop = int(detections[0, 0, i, 5] * cols)
            yRightTop = int(detections[0, 0, i, 6] * rows)

            cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
                          (0, 255, 255))

            image_sub = frame[yLeftBottom:yRightTop,xLeftBottom:xRightTop]
            print (confidence)
            print (yLeftBottom,yRightTop, xLeftBottom,xRightTop)
            # e2e.recognizeOne(image_sub)

            label = classNames[class_id] + ": " + str(confidence)
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

            #cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
            #               (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
            #               (255, 255, 255), 2,cv2.FILLED)
            cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
        cv2.imshow("detection", frame)
    return frame
Ejemplo n.º 13
0
def detect(frame):
    blob = dnn.blobFromImage(frame, 1, (1024, 768), (0, 0, 0),True)
    net = dnn.readNetFromCaffe(cm_path + 'density.prototxt', cm_path + 'density.caffemodel')

    net.setInput(blob)
    density = net.forward()

    density = density/1000.0

    person_num = np.sum(density[:])
    return int(person_num)
 def text_pehchano(image,min_confidence=0.85,width=320,height=320,padding=0.00):
     '''
     image=input image (type=numpy.ndarray)
     min_confidence= minimum confidence threshold to detect text from image (default=0.85)
     width=resizing width of image (default=320)
     height=resizing height of image (default=320)
     padding =
     '''
     east='freshlybuiltimagebol/models/frozen_east_text_detection.pb'
     orig = image.copy()
     (origH, origW) = image.shape[:2]
     (newW, newH) = (width, height)
     rW = origW / float(newW)
     rH = origH / float(newH)
     image = resize(image, (newW, newH))
     (H, W) = image.shape[:2]
     layerNames = [
         "feature_fusion/Conv_7/Sigmoid",
         "feature_fusion/concat_3"]
     net = readNet(east)
     blob = blobFromImage(image, 1.0, (W, H),
         (123.68, 116.78, 103.94), swapRB=True, crop=False)
     net.setInput(blob)
     (scores, geometry) = net.forward(layerNames)
     (rects, confidences) = NaturalPhotoShabd.result_vyakhya_kro(scores, geometry,min_confidence)
     boxes = non_max_suppression(array(rects), probs=confidences)
     results = []
     for (startX, startY, endX, endY) in boxes:
         startX = int(startX * rW)
         startY = int(startY * rH)
         endX = int(endX * rW)
         endY = int(endY * rH)
         dX = int((endX - startX) * padding)
         dY = int((endY - startY) * padding)
         startX = max(0, startX - dX)
         startY = max(0, startY - dY)
         endX = min(origW, endX + (dX * 2))
         endY = min(origH, endY + (dY * 2))
         roi = orig[startY:endY, startX:endX]
         config = ("-l eng --psm 7")
         text = image_to_string(roi, config=config)
         results.append(((startX, startY, endX, endY), text))
     results = sorted(results, key=lambda r:r[0][1])
     output = orig.copy()
     for ((startX, startY, endX, endY), text) in results:
         #print("OCR TEXT : ",text)
         text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
         #ShabdDhwani.shabd_se_dhwani(text,'english',"out.mp3")
         #playsound('out.mp3')
         rectangle(output, (startX, startY), (endX, endY),
             (0, 0, 255), 1)
         putText(output, text, (startX, startY - 20),
             FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
     return output
def detect(cpp):
    frame = cv2.imread(cpp)
    blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight),
                             meanVal)
    net.setInput(blob)
    t0 = time.time()
    detections = net.forward()
    print time.time() - t0

    cols = frame.shape[1]
    rows = frame.shape[0]

    if cols / float(rows) > WHRatio:
        cropSize = (int(rows * WHRatio), rows)
    else:
        cropSize = (cols, int(cols / WHRatio))

    y1 = (rows - cropSize[1]) / 2
    y2 = y1 + cropSize[1]
    x1 = (cols - cropSize[0]) / 2
    x2 = x1 + cropSize[0]
    frame = frame[y1:y2, x1:x2]

    cols = frame.shape[1]
    rows = frame.shape[0]

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.2:
            class_id = int(detections[0, 0, i, 1])

            xLeftBottom = int(detections[0, 0, i, 3] * cols)
            yLeftBottom = int(detections[0, 0, i, 4] * rows)
            xRightTop = int(detections[0, 0, i, 5] * cols)
            yRightTop = int(detections[0, 0, i, 6] * rows)

            cv2.rectangle(frame, (xLeftBottom, yLeftBottom),
                          (xRightTop, yRightTop), (0, 255, 255))

            image_sub = frame[yLeftBottom:yRightTop, xLeftBottom:xRightTop]
            print yLeftBottom, yRightTop, xLeftBottom, xRightTop
            # e2e.recognizeOne(image_sub)

            label = classNames[class_id] + ": " + str(confidence)
            labelSize, baseLine = cv2.getTextSize(label,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.5, 1)

            # cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
            #               (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
            #               (255, 255, 255), 2,cv2.FILLED)
            cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
    return frame
    def _run(self, image):
        """
        image: input image
        return: detections, perf_stats
        """

        self.net.setInput(dnn.blobFromImage(image, 1.0, (self.inWidth, self.inHeight), (104.0, 177.0, 123.0), False, False))
        self.detections = self.net.forward()

        self.perf_stats = self.net.getPerfProfile()
        return self.detections, self.perf_stats
def createBlobFromImage(image,
                        newSize,
                        scale=(1.0 / 255),
                        meanSubtract=(0, 0, 0),
                        swapRB=True,
                        crop=False):
    return dnn.blobFromImage(image,
                             scale,
                             newSize,
                             meanSubtract,
                             swapRB=swapRB,
                             crop=crop)
def detect(cpp):
    frame = cv2.imread(cpp)
    blob = dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal)
    net.setInput(blob)
    t0 = time.time()
    detections = net.forward()
    print time.time() - t0

    cols = frame.shape[1]
    rows = frame.shape[0]

    if cols / float(rows) > WHRatio:
        cropSize = (int(rows * WHRatio), rows)
    else:
        cropSize = (cols, int(cols / WHRatio))

    y1 = (rows - cropSize[1]) / 2
    y2 = y1 + cropSize[1]
    x1 = (cols - cropSize[0]) / 2
    x2 = x1 + cropSize[0]
    frame = frame[y1:y2, x1:x2]

    cols = frame.shape[1]
    rows = frame.shape[0]

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.2:
            class_id = int(detections[0, 0, i, 1])

            xLeftBottom = int(detections[0, 0, i, 3] * cols)
            yLeftBottom = int(detections[0, 0, i, 4] * rows)
            xRightTop = int(detections[0, 0, i, 5] * cols)
            yRightTop = int(detections[0, 0, i, 6] * rows)

            cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
                          (0, 255, 255))

            image_sub = frame[yLeftBottom:yRightTop,xLeftBottom:xRightTop]
            print yLeftBottom,yRightTop, xLeftBottom,xRightTop
            # e2e.recognizeOne(image_sub)

            label = classNames[class_id] + ": " + str(confidence)
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

            # cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
            #               (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
            #               (255, 255, 255), 2,cv2.FILLED)
            cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
    return frame
def face_detection():
    net = dnn.readNetFromCaffe(prototxt, caffemodel)
    #cap = cv.VideoCapture(0)
    #cap = cv.VideoCapture("E:/视频库/srcImage/OneStopMoveEnter1cor.avi")
    frame = cv.imread("face01.jpg")

    while True:
        #ret, frame = cap.read()

        cols = frame.shape[1]
        rows = frame.shape[0]

        net.setInput(
            dnn.blobFromImage(frame, 1.0, (inWidth, inHeight),
                              (104.0, 177.0, 123.0), False, False))
        detections = net.forward()

        perf_stats = net.getPerfProfile()
        print('Inference time:  %.2f ms' %
              (perf_stats[0] / cv.getTickFrequency() * 1000))

        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > confThreshold:
                global count
                count += 1
                print(confidence)

                xLeftBottom = int(detections[0, 0, i, 3] * cols)
                yLeftBottom = int(detections[0, 0, i, 4] * rows)
                xRightTop = int(detections[0, 0, i, 5] * cols)
                yRightTop = int(detections[0, 0, i, 6] * rows)

                cv.rectangle(frame, (xLeftBottom, yLeftBottom),
                             (xRightTop, yRightTop), (0, 255, 0))
                label = "face: %.4f" % confidence
                labelSize, baseLine = cv.getTextSize(label,
                                                     cv.FONT_HERSHEY_SIMPLEX,
                                                     0.5, 1)

                cv.rectangle(
                    frame, (xLeftBottom, yLeftBottom - labelSize[1]),
                    (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
                    (255, 255, 255), cv.FILLED)
                cv.putText(frame, label, (xLeftBottom, yLeftBottom),
                           cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

        cv.imshow("detections", frame)
        print('the num of face: %d ' % count)
        if cv.waitKey(0) != -1:
            break
Ejemplo n.º 20
0
def detect_res10(net, frame):
    inWidth = 300
    inHeight = 300
    means = (104., 177., 123.)
    ratio = 1.0
    #net.setInput(dnn.blobFromImage(cv2.resize(frame, (inWidth, inHeight)), ratio, (inWidth, inHeight), means))
    net.setInput(
        dnn.blobFromImage(frame,
                          ratio, (inWidth, inHeight),
                          means,
                          swapRB=True,
                          crop=False))
    detections = net.forward()
    return detections
Ejemplo n.º 21
0
def detect_mobilenet_widerface(net, frame):
    inWidth = 300
    inHeight = 300
    means = (127.5, 127.5, 127.5)
    ratio = 1.0 / 127.5
    #net.setInput(dnn.blobFromImage(cv2.resize(frame, (inWidth, inHeight)), ratio, (inWidth, inHeight), means))
    net.setInput(
        dnn.blobFromImage(frame,
                          ratio, (inWidth, inHeight),
                          means,
                          swapRB=True,
                          crop=False))
    detections = net.forward()
    return detections
Ejemplo n.º 22
0
def detect_faces(image, detector="cnn"):
    '''detect every face inside image. By default it uses the cnn detector,
       pass "dlib" to use the dlib frontal face detector.
       Returns a list of tuple\\rectangles: (top, left, right, bottom)'''

    # detect faces with dlib.frontal_face_detector
    if detector == "dlib":
        # load detector if needed
        global face_det

        if face_det is None:
            face_det = dlib.get_frontal_face_detector()

        dets = face_det(image, 1)
        boxes = []

        for d in dets:
            boxes.append(Region.dlib(d))

        return boxes

    # detect faces with opencv caffe cnn detector
    assert(caffeNet)

    # get image dimension
    (h, w) = image.shape[:2]
    np_arr = np.array([w, h, w, h])

    if h <= 0 or w <= 0:
        return []

    # convert image to blob (that do some preprocessing..)
    blob = blobFromImage(cv2.resize(image, cf_size), cf_scale,
                         size=cf_size, mean=cf_values, swapRB=True)

    # obtain detections and predictions
    caffeNet.setInput(blob)
    detections = caffeNet.forward()

    # detected face-boxes
    boxes = []
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence >= confidence_threshold:
            # compute the bounding box of the face
            box = detections[0, 0, i, 3:7] * np_arr
            boxes.append(Region.tuple(box.astype("int")))

    return boxes
Ejemplo n.º 23
0
    def detect(self, frame):
        """
        detect face on image
        :param frame: image in opencv format
        :return: face information array in shape of [N * 7]
        """

        self.net.setInput(dnn.blobFromImage(frame, 1.0, (self.inWidth, self.inHeight), (104.0, 177.0, 123.0), False, False))
        detections = self.net.forward()

        detections = detections[detections[:, :, :, 2] > 0.5]

        perf_stats = self.net.getPerfProfile()

        return detections
Ejemplo n.º 24
0
def dnn_predict(net: dnn_Net, input_shape, frame: Image, classes):
    """
    https://github.com/opencv/opencv/tree/master/samples/dnn

    https://github.com/tensorflow/models/tree/master/research
    e.g. object_detection (https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API), TF-slim

    https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py
    """
    blob = blobFromImage(frame, size=input_shape)

    # Run the model
    net.setInput(blob)
    out = net.forward()

    # Single object classification: Class with the highest score
    flat_out = out.flatten()
    class_id = argmax(flat_out)
    confidence = flat_out[class_id]

    # Predicted class
    info(
        '%s: %.4f' %
        (classes[class_id] if classes else 'Class #%d' % class_id, confidence))

    # Multiple object detection:
    bbox_color = (0, 255, 0)
    threshold = 0.5  # 0.3
    for detection in out[0, 0, :, :]:
        score = float(detection[2])
        if score > threshold:
            left = detection[3] * frame.width
            top = detection[4] * frame.height

            right = detection[5] * frame.width
            bottom = detection[6] * frame.height

            tl = (uint16(left), uint16(top))
            br = (uint16(right), uint16(bottom))

            rectangle(frame, tl, br, bbox_color)

    # Efficiency information
    t, _ = net.getPerfProfile()
    info('Inference time: %.2f ms' % (t * 1000.0 / getTickFrequency()))
 def detect(self, image):
     net = self.classifier
     height, width = image.shape[:2]
     blob = blobFromImage(resize(image, (300, 300)), 1.0, (300, 300),
                          (104.0, 177.0, 123.0))
     net.setInput(blob)
     detections = net.forward()
     faces = []
     for i in range(0, detections.shape[2]):
         confidence = detections[0, 0, i, 2]
         if confidence < self.confidenceThreshold:
             continue
         box = detections[0, 0, i, 3:7] * np.array(
             [width, height, width, height])
         startX, startY, endX, endY = box.astype("int")
         faces.append(
             np.array([startX, startY, endX - startX, endY - startY]))
     return faces
Ejemplo n.º 26
0
 def detect(self, image):
     result = []
     (h, w, c) = image.shape
     blob = dnn.blobFromImage(cv2.resize(image, (self.width, self.height)), self.scale,
                              (self.width, self.height), self.mean_val)
     self.net.setInput(blob)
     detections = self.net.forward()
     for i in range(detections.shape[2]):
         confidence = detections[0, 0, i, 2]
         if confidence > self.threshold:
             idx = int(detections[0, 0, i, 1])
             box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
             box = box.astype("int")
             obj = Target()
             obj.class_name = self.dict[idx]
             obj.box = box
             obj.conf = confidence
             result.append(obj)
     return result
    def recognize_face(self, face):
        # load the serialized face embedding model from disk
        # construct a blob for the face ROI, then pass the blob
        # through our face embedding model to obtain the 128-d
        # quantification of the face
        faceBlob = blobFromImage(face,
                                 1.0 / 255, (96, 96), (0, 0, 0),
                                 swapRB=True,
                                 crop=False)
        self.embedder.setInput(faceBlob)
        vector = self.embedder.forward()

        # perform classification to recognize the face
        face_recognizer_preds = self.recognizer.predict_proba(vector)[0]
        proba = face_recognizer_preds[np.argmax(face_recognizer_preds)]
        name = self.le.classes_[
            np.argmax(face_recognizer_preds
                      )] if proba > self.confidenceThreshold else 'Unknown'
        return name
    def detect(self, image):
        """
        Takes an image and, using the neural network, provides the locations of
        human hands in that image.
        Args:
            image: image/frame to make the detections on.

        Returns:
            indexes: indexes of the detected hands. (In order to provide
            multiple hand detections.)
            boxes: location(s) of the hand(s) in the frame.
        """

        # ref: https://github.com/darshanadakane/yolov3_objectdetection
        height, width, _ = image.shape
        blob = dnn.blobFromImage(image,
                                 0.00392, (416, 416),
                                 swapRB=True,
                                 crop=False)
        self.network.setInput(blob)
        outs = self.network.forward(self.output_layers)

        confidences = []
        boxes = []

        for out in outs:
            for detection in out:
                scores = detection[5:]
                confidence = scores[0]

                box = detection[0:4] * np.array([width, height, width, height])
                (center_x, center_y, w, h) = box.astype("int")

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, int(w), int(h)])
                confidences.append(float(confidence))

        indexes = dnn.NMSBoxes(boxes, confidences, 0.4, 0.6)

        return indexes, boxes, confidences
    def face_detection(self):
        net = dnn.readNetFromCaffe(prototxt, caffemodel)
        net.setInput(
            dnn.blobFromImage(self.image, 1.0, (inWidth, inHeight),
                              (104.0, 177.0, 123.0), False))
        detections = net.forward()
        # print(detections.shape)
        # print(detections)
        cols = self.image.shape[1]
        rows = self.image.shape[0]
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > confThreshold:
                self.count += 1
                # print(confidence)

                xLeftBottom = int(detections[0, 0, i, 3] * cols)
                yLeftBottom = int(detections[0, 0, i, 4] * rows)
                xRightTop = int(detections[0, 0, i, 5] * cols)
                yRightTop = int(detections[0, 0, i, 6] * rows)

                cv2.rectangle(self.image, (xLeftBottom, yLeftBottom),
                              (xRightTop, yRightTop), (0, 255, 0))
                label = "face: %.4f" % confidence
                # labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                cv2.putText(self.image, label, (xLeftBottom, yLeftBottom),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

        cvRGBImg = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
        qi = QImage(cvRGBImg.data, cvRGBImg.shape[1], cvRGBImg.shape[0],
                    cvRGBImg.shape[1] * 3, QImage.Format_RGB888)
        pix = QPixmap.fromImage(qi)

        self.label2.setPixmap(pix)
        self.label2.show()

        self.label3.clear()
        if self.count > 0:
            self.label3.setText("图中检测到{}张人脸".format(self.count))
        else:
            self.label3.setText("图中未检测到人脸")
Ejemplo n.º 30
0
 def load_data(self, data):
     self.output_layer_names = self.model.getLayerNames()
     self.output_layer_names = [
         self.output_layer_names[i[0] - 1]
         for i in self.model.getUnconnectedOutLayers()
     ]
     if len(data.shape) == 4:
         self.height, self.width = data.shape[1:3]
         # TODO: image shape should not be hard coded.
         blob = blobFromImages(data,
                               SCALE_FACTOR, (IMG_SIDE_SIZE, IMG_SIDE_SIZE),
                               swapRB=True,
                               crop=False)
         self.model.setInput(blob)
     else:
         self.height, self.width = data.shape[:2]
         blob = blobFromImage(data,
                              SCALE_FACTOR, (IMG_SIDE_SIZE, IMG_SIDE_SIZE),
                              swapRB=True,
                              crop=False)
         self.model.setInput(blob)
Ejemplo n.º 31
0
    def detect(self, image):
        """method to detect faces in input image"""
        classifier = self.classifier
        height, width = image.shape[:2]
        image_blob = blobFromImage(resize(image, (300, 300)), 1.0, (300, 300), (103.93, 116.77, 123.68))
        classifier.setInput(image_blob)
        detections = classifier.forward()
        faces = []

        # loop over the detections
        for i in range(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            # filter out weak detections by ensuring the 'confidence' is greater than the minimum confidence
            if confidence > self.confidence_threshold:
                # compute the coordinates of the bounding box for the object
                box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
                start_x, start_y, end_x, end_y = box.astype("int")
                # ensuring the bounding boxes fall within the dimensions of the frame
                faces.append(np.array([start_x, start_y, end_x - start_x, end_y - start_y]))

        return faces
Ejemplo n.º 32
0
from __future__ import print_function
import numpy as np
import cv2
from cv2 import dnn
import timeit

def timeit_forward(net):
    print("Runtime:", timeit.timeit(lambda: net.forward(), number=10))

def get_class_list():
    with open('synset_words.txt', 'rt') as f:
        return [x[x.find(" ") + 1:] for x in f]

blob = dnn.blobFromImage(cv2.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123), False)
print("Input:", blob.shape, blob.dtype)

net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel')
net.setInput(blob)
prob = net.forward()
#timeit_forward(net)        #Uncomment to check performance

print("Output:", prob.shape, prob.dtype)
classes = get_class_list()
print("Best match", classes[prob.argmax()])
Ejemplo n.º 33
0
inWidth = 300
inHeight = 300
confThreshold = 0.5

prototxt = 'face_detector/deploy.prototxt'
caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'

if __name__ == '__main__':
    net = dnn.readNetFromCaffe(prototxt, caffemodel)
    cap = cv.VideoCapture(0)
    while True:
        ret, frame = cap.read()
        cols = frame.shape[1]
        rows = frame.shape[0]

        net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False))
        detections = net.forward()

        perf_stats = net.getPerfProfile()

        print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000))

        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > confThreshold:
                xLeftBottom = int(detections[0, 0, i, 3] * cols)
                yLeftBottom = int(detections[0, 0, i, 4] * rows)
                xRightTop = int(detections[0, 0, i, 5] * cols)
                yRightTop = int(detections[0, 0, i, 6] * rows)

                cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),