Beispiel #1
0
def calculate_social_distancing(vid_path, net, output_dir, output_vid, ln1):
    
    count = 0
    vs = cv2.VideoCapture(vid_path)    

    # Get video height, width and fps
    height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(vs.get(cv2.CAP_PROP_FPS))
    
    # Set scale for birds eye view
    # Bird's eye view will only show ROI
    scale_w, scale_h = utills.get_scale(width, height)

    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    output_movie = cv2.VideoWriter("./output_vid/distancing.avi", fourcc, fps, (width, height))
    bird_movie = cv2.VideoWriter("./output_vid/bird_eye_view.avi", fourcc, fps, (int(width * scale_w), int(height * scale_h)))
        
    points = []
    global image
    
    while True:

        (grabbed, frame) = vs.read()

        if not grabbed:
            print('here')
            break
            
        (H, W) = frame.shape[:2]
        
        # first frame will be used to draw ROI and horizontal and vertical 180 cm distance(unit length in both directions)
        if count == 0:
            while True:
                image = frame
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 8:
                    cv2.destroyWindow("image")
                    break
               
            points = mouse_pts      
                 
        # Using first 4 points or coordinates for perspective transformation. The region marked by these 4 points are 
        # considered ROI. This polygon shaped ROI is then warped into a rectangle which becomes the bird eye view. 
        # This bird eye view then has the property property that points are distributed uniformally horizontally and 
        # vertically(scale for horizontal and vertical direction will be different). So for bird eye view points are 
        # equally distributed, which was not case for normal view.
        src = np.float32(np.array(points[:4]))
        dst = np.float32([[0, H], [W, H], [W, 0], [0, 0]])
        prespective_transform = cv2.getPerspectiveTransform(src, dst)

        # using next 3 points for horizontal and vertical unit length(in this case 180 cm)
        pts = np.float32(np.array([points[4:7]]))
        warped_pt = cv2.perspectiveTransform(pts, prespective_transform)[0]
        
        # since bird eye view has property that all points are equidistant in horizontal and vertical direction.
        # distance_w and distance_h will give us 180 cm distance in both horizontal and vertical directions
        # (how many pixels will be there in 180cm length in horizontal and vertical direction of birds eye view),
        # which we can use to calculate distance between two humans in transformed view or bird eye view
        distance_w = np.sqrt((warped_pt[0][0] - warped_pt[1][0]) ** 2 + (warped_pt[0][1] - warped_pt[1][1]) ** 2)
        distance_h = np.sqrt((warped_pt[0][0] - warped_pt[2][0]) ** 2 + (warped_pt[0][1] - warped_pt[2][1]) ** 2)
        pnts = np.array(points[:4], np.int32)
        cv2.polylines(frame, [pnts], True, (70, 70, 70), thickness=2)
    
    ####################################################################################
    
        # YOLO v3
        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        start = time.time()
        layerOutputs = net.forward(ln1)
        end = time.time()
        boxes = []
        confidences = []
        classIDs = []   
    
        for output in layerOutputs:
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                # detecting humans in frame
                if classID == 0:

                    if confidence > confid:

                        box = detection[0:4] * np.array([W, H, W, H])
                        (centerX, centerY, width, height) = box.astype("int")

                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))

                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        classIDs.append(classID)
                    
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, confid, thresh)
        font = cv2.FONT_HERSHEY_PLAIN
        boxes1 = []
        for i in range(len(boxes)):
            if i in idxs:
                boxes1.append(boxes[i])
                x,y,w,h = boxes[i]
                
        if len(boxes1) == 0:
            count = count + 1
            continue
            
        # Here we will be using bottom center point of bounding box for all boxes and will transform all those
        # bottom center points to bird eye view
        person_points = utills.get_transformed_points(boxes1, prespective_transform)
        
        # Here we will calculate distance between transformed points(humans)
        distances_mat, bxs_mat = utills.get_distances(boxes1, person_points, distance_w, distance_h)
        risk_count = utills.get_count(distances_mat)
    
        frame1 = np.copy(frame)
        
        # Draw bird eye view and frame with bouding boxes around humans according to risk factor    
        bird_image = plot.bird_eye_view(frame, distances_mat, person_points, scale_w, scale_h, risk_count)
        img = plot.social_distancing_view(frame1, bxs_mat, boxes1, risk_count)
        
        # Show/write image and videos
        if count != 0:
            output_movie.write(img)
            bird_movie.write(bird_image)
    
            cv2.imshow('Bird Eye View', bird_image)
            cv2.imwrite(output_dir+"frame%d.jpg" % count, img)
            cv2.imwrite(output_dir+"bird_eye_view/frame%d.jpg" % count, bird_image)
    
        count = count + 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
     
    vs.release()
    cv2.destroyAllWindows() 
def calculate_social_distancing(vid_path, net, ln1):
    count = 0
    vs = cv2.VideoCapture(vid_path)

    points = []
    global image

    while True:

        (grabbed, frame) = vs.read()
        frame = imutils.resize(frame, width=500)  # if changing width, must also change offset of face

        if not grabbed:
            print('here')
            break
        (H, W) = frame.shape[:2]

        # first frame will be used to draw ROI and horizontal and vertical 180 cm distance(unit length in both directions)
        if count == 0:
            while True:
                image = frame
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 8:
                    cv2.destroyWindow("image")
                    start1=time.time()
                    break

            points = mouse_pts

        if count % 50 == 0:
            (locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
            # Using first 4 points or coordinates for perspective transformation. The region marked by these 4 points are
            # considered ROI. This polygon shaped ROI is then warped into a rectangle which becomes the bird eye view.
            # This bird eye view then has the property property that points are distributed uniformally horizontally and
            # vertically(scale for horizontal and vertical direction will be different). So for bird eye view points are
            # equally distributed, which was not case for normal view.
            src = np.float32(np.array(points[:4]))
            dst = np.float32([[0, H], [W, H], [W, 0], [0, 0]])
            prespective_transform = cv2.getPerspectiveTransform(src, dst)

            # using next 3 points for horizontal and vertical unit length(in this case 180 cm)
            pts = np.float32(np.array([points[4:7]]))
            warped_pt = cv2.perspectiveTransform(pts, prespective_transform)[0]

            # since bird eye view has property that all points are equidistant in horizontal and vertical direction.
            # distance_w and distance_h will give us 180 cm distance in both horizontal and vertical directions
            # (how many pixels will be there in 180cm length in horizontal and vertical direction of birds eye view),
            # which we can use to calculate distance between two humans in transformed view or bird eye view
            distance_w = np.sqrt((warped_pt[0][0] - warped_pt[1][0]) ** 2 + (warped_pt[0][1] - warped_pt[1][1]) ** 2)
            distance_h = np.sqrt((warped_pt[0][0] - warped_pt[2][0]) ** 2 + (warped_pt[0][1] - warped_pt[2][1]) ** 2)
            pnts = np.array(points[:4], np.int32)
            cv2.polylines(frame, [pnts], True, (70, 70, 70), thickness=2)

            ####################################################################################

            # YOLO v3
            blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
            net.setInput(blob)
            layerOutputs = net.forward(ln1)
            boxes = []
            confidences = []
            classIDs = []

            for (box, pred) in zip(locs, preds):
                # unpack the bounding box and predictions
                (startX, startY, endX, endY) = box
                (mask, withoutMask) = pred

                # determine the class label and color we'll use to draw the bounding box and text
                if mask > withoutMask:
                    if mask > 0.90:
                        label = "Mask"
                        color = (0, 255, 0)  # green
                        img1 = cv2.imread("emoji_happy.png", -1)

                    else:
                        label = "Mask not on Properly"
                        color = (0, 220, 220)  # yellow
                        img1 = cv2.imread("emoji_middle.png", -1)
                else:
                    label = "No Mask"
                    color = (0, 0, 255)  # red
                    img1 = cv2.imread("emoji_worried.png", -1)

            img1 = cv2.resize(img1, (int(img1.shape[1] * 0.25), int(img1.shape[0] * 0.25)))

            label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
            cv2.putText(frame, label, (startX, startY - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
            x_offset = y_offset = 50
            y1, y2 = y_offset, y_offset + img1.shape[0]
            x1, x2 = x_offset, x_offset + img1.shape[1]

            alpha_s = img1[:, :, 3] / 255.0
            alpha_l = 1.0 - alpha_s

            for c in range(0, 3):
                frame[y1:y2, x1:x2, c] = (alpha_s * img1[:, :, c] +
                                          alpha_l * frame[y1:y2, x1:x2, c])
            for output in layerOutputs:
                for detection in output:
                    scores = detection[5:]
                    classID = np.argmax(scores)
                    confidence = scores[classID]
                    # detecting humans in frame
                    if classID == 0:

                        if confidence > confid:
                            box = detection[0:4] * np.array([W, H, W, H])
                            (centerX, centerY, width, height) = box.astype("int")
                            cv2.putText(frame, label, (startX, startY - 10),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
                            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                            x = int(centerX - (width / 2))
                            y = int(centerY - (height / 2))

                            boxes.append([x, y-10, int(width), int(height)+25])
                            confidences.append(float(confidence))
                            classIDs.append(classID)

            idxs = cv2.dnn.NMSBoxes(boxes, confidences, confid, thresh)
            boxes1 = []
            for i in range(len(boxes)):
                if i in idxs:
                    boxes1.append(boxes[i])
                    x, y, w, h = boxes[i]

            if len(boxes1) == 0:
                count = count + 1
                continue

            # Here we will be using bottom center point of bounding box for all boxes and will transform all those
            # bottom center points to bird eye view
            person_points = utills.get_transformed_points(boxes1, prespective_transform)

            # Here we will calculate distance between transformed points(humans)
            distances_mat, bxs_mat = utills.get_distances(boxes1, person_points, distance_w, distance_h)
            risk_count = utills.get_count(distances_mat)

            frame1 = np.copy(frame)

            # Draw bird eye view and frame with bouding boxes around humans according to risk factor
            img = plot.social_distancing_view(frame1, bxs_mat, boxes1, risk_count)
            # Show/write image and videos
            cv2.imshow('Camera View', img)


        count += 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print(time.time()-start1)
    vs.release()
    cv2.destroyAllWindows()
Beispiel #3
0
def calculate_social_distancing(vid_path, net, output_dir, output_vid, ln1):

    count = 0
    vs = cv2.VideoCapture(vid_path)

    # Get video height, width and fps
    height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(vs.get(cv2.CAP_PROP_FPS))

    # Set scale for birds eye view
    scale_w, scale_h = utills.get_scale(width, height)

    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    # Initialize writer objects
    output_movie = cv2.VideoWriter("Output.avi", fourcc, fps, (width, height))
    output_movie2 = cv2.VideoWriter("Output2.avi", fourcc, fps, (1920, 1080))
    bird_movie = cv2.VideoWriter("./output_vid/bird_eye_view.avi", fourcc, fps,
                                 (int(width * scale_w), int(height * scale_h)))

    points = []
    global image

    while True:
        # Read frames
        (grabbed, frame) = vs.read()

        if not grabbed:
            print('here')
            break

        (H, W) = frame.shape[:2]

        if count == 0:
            while True:
                image = frame
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 8:
                    cv2.destroyWindow("image")
                    break

            points = mouse_pts

        src = np.float32(np.array(points[:4]))
        dst = np.float32([[0, H], [W, H], [W, 0], [0, 0]])
        # Transform perspective using opencv method
        prespective_transform = cv2.getPerspectiveTransform(src, dst)

        # using next 3 points for horizontal and vertical unit length(in this case 6 Feets ~= 180 cm)
        pts = np.float32(np.array([points[4:7]]))
        warped_pt = cv2.perspectiveTransform(pts, prespective_transform)[0]

        # Calculate distance scale using marked points by user
        distance_w = np.sqrt((warped_pt[0][0] - warped_pt[1][0])**2 +
                             (warped_pt[0][1] - warped_pt[1][1])**2)
        distance_h = np.sqrt((warped_pt[0][0] - warped_pt[2][0])**2 +
                             (warped_pt[0][1] - warped_pt[2][1])**2)
        pnts = np.array(points[:4], np.int32)
        cv2.polylines(frame, [pnts], True, (70, 70, 70), thickness=2)

        # Using YOLO v3 model using dnn method
        blob = cv2.dnn.blobFromImage(frame,
                                     1 / 255.0, (416, 416),
                                     swapRB=True,
                                     crop=False)
        net.setInput(blob)
        start = time.time()
        layerOutputs = net.forward(ln1)
        end = time.time()
        boxes = []
        confidences = []
        classIDs = []

        for output in layerOutputs:
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                # detecting humans in frame
                if classID == 0:

                    if confidence > confid:
                        # Finding bounding boxes dimensions
                        box = detection[0:4] * np.array([W, H, W, H])
                        (centerX, centerY, width, height) = box.astype("int")

                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))

                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        classIDs.append(classID)
        # Applying Non Maximum Suppression to remove multiple bounding boxes around same object
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, confid, thresh)
        font = cv2.FONT_HERSHEY_PLAIN
        boxes1 = []
        for i in range(len(boxes)):
            if i in idxs:
                boxes1.append(boxes[i])
                x, y, w, h = boxes[i]

        if len(boxes1) == 0:
            count = count + 1
            continue

        # Get transformed points using perspective transform
        person_points = utills.get_transformed_points(boxes1,
                                                      prespective_transform)

        # Get distances between the points
        distances_mat, bxs_mat = utills.get_distances(boxes1, person_points,
                                                      distance_w, distance_h)

        # Get the risk counts
        risk_count = utills.get_count(distances_mat)

        frame1 = np.copy(frame)

        bird_image = plot.bird_eye_view(frame, distances_mat, person_points,
                                        scale_w, scale_h, risk_count)
        img = plot.social_distancing_view(frame1, bxs_mat, boxes1, risk_count)
        if count != 0:

            bird_movie.write(bird_image)

            cv2.imshow('Social Distancing Detect', img)
            output_movie.write(img)
            output_movie2.write(img)
            cv2.imwrite(output_dir + "frame%d.jpg" % count, img)
            cv2.imwrite(output_dir + "bird_eye_view/frame%d.jpg" % count,
                        bird_image)

        count = count + 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        output_movie.write(img)

    vs.release()
    cv2.destroyAllWindows()
Beispiel #4
0
    cv2.polylines(frame, [pnts], True, (70, 70, 70), thickness=2)

    font = cv2.FONT_HERSHEY_PLAIN
    boxes1 = []
    for i in range(len(boxes)):
        if i in idxs:
            boxes1.append(boxes[i])
            x, y, w, h = boxes[i]

    if len(boxes1) == 0:
        count = count + 1
        continue

    # Here we will be using bottom center point of bounding box for all boxes and will transform all those
    # bottom center points to bird eye view
    person_points = utills.get_transformed_points(boxes1,
                                                  perspective_transform)

    # Here we will calculate distance between transformed points(humans)
    distances_mat, bxs_mat = utills.get_distances(boxes1, person_points,
                                                  distance_w, distance_h)
    risk_count = utills.get_count(distances_mat)

    frame1 = np.copy(frame)

    # Draw bird eye view and frame with bouding boxes around humans according to risk factor
    bird_image = draw.bird_eye_view(frame, distances_mat, person_points,
                                    scale_w, scale_h, risk_count)
    img = draw.social_distancing_view(frame1, bxs_mat, boxes1, risk_count)

    # closing part
    count = count + 1
Beispiel #5
0
def calculate_social_distancing(vid_path, net, output_dir, output_vid, ln1):

    count = 0
    vs = cv2.VideoCapture(vid_path)

    height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(vs.get(cv2.CAP_PROP_FPS))

    scale_w, scale_h = utills.get_scale(width, height)

    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    output_movie = cv2.VideoWriter("./output_vid/distancing.avi", fourcc, fps,
                                   (width, height))
    bird_movie = cv2.VideoWriter("./output_vid/bird_eye_view.avi", fourcc, fps,
                                 (int(width * scale_w), int(height * scale_h)))

    points = []
    global image

    while True:

        (grabbed, frame) = vs.read()

        if not grabbed:
            print('here')
            break

        (H, W) = frame.shape[:2]

        if count == 0:
            while True:
                image = frame
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 8:
                    cv2.destroyWindow("image")
                    break

            points = mouse_pts

        src = np.float32(np.array(points[:4]))
        dst = np.float32([[0, H], [W, H], [W, 0], [0, 0]])
        prespective_transform = cv2.getPerspectiveTransform(src, dst)

        pts = np.float32(np.array([points[4:7]]))
        warped_pt = cv2.perspectiveTransform(pts, prespective_transform)[0]

        distance_w = np.sqrt((warped_pt[0][0] - warped_pt[1][0])**2 +
                             (warped_pt[0][1] - warped_pt[1][1])**2)
        distance_h = np.sqrt((warped_pt[0][0] - warped_pt[2][0])**2 +
                             (warped_pt[0][1] - warped_pt[2][1])**2)
        pnts = np.array(points[:4], np.int32)
        cv2.polylines(frame, [pnts], True, (70, 70, 70), thickness=2)

        blob = cv2.dnn.blobFromImage(frame,
                                     1 / 255.0, (416, 416),
                                     swapRB=True,
                                     crop=False)
        net.setInput(blob)
        start = time.time()
        layerOutputs = net.forward(ln1)
        end = time.time()
        boxes = []
        confidences = []
        classIDs = []

        for output in layerOutputs:
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                if classID == 0:

                    if confidence > confid:

                        box = detection[0:4] * np.array([W, H, W, H])
                        (centerX, centerY, width, height) = box.astype("int")

                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))

                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        classIDs.append(classID)

        idxs = cv2.dnn.NMSBoxes(boxes, confidences, confid, thresh)
        font = cv2.FONT_HERSHEY_PLAIN
        boxes1 = []
        for i in range(len(boxes)):
            if i in idxs:
                boxes1.append(boxes[i])
                x, y, w, h = boxes[i]

        if len(boxes1) == 0:
            count = count + 1
            continue

        person_points = utills.get_transformed_points(boxes1,
                                                      prespective_transform)

        distances_mat, bxs_mat = utills.get_distances(boxes1, person_points,
                                                      distance_w, distance_h)
        risk_count = utills.get_count(distances_mat)

        frame1 = np.copy(frame)

        bird_image = plot.bird_eye_view(frame, distances_mat, person_points,
                                        scale_w, scale_h, risk_count)
        img = plot.social_distancing_view(frame1, bxs_mat, boxes1, risk_count)

        if count != 0:
            output_movie.write(img)
            bird_movie.write(bird_image)

            cv2.imshow('Bird Eye View', bird_image)
            cv2.imwrite(output_dir + "frame%d.jpg" % count, img)
            cv2.imwrite(output_dir + "bird_eye_view/frame%d.jpg" % count,
                        bird_image)

        count = count + 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vs.release()
    cv2.destroyAllWindows()