コード例 #1
0
class videoSession(object):
    def __init__(self, videoPath, visualize=False, cnn=False):
        self.visualizeFlag = visualize
        self.cnnFlag = cnn
        self.doFace = False
        # Yolo
        self.yolo = YOLO()
        # Definition of the parameters
        max_cosine_distance = 0.3
        nn_budget = None
        self.nms_max_overlap = 1.0

        # deep_sort
        model_filename = 'model_data/mars-small128.pb'
        self.encoder = gdet.create_box_encoder(model_filename, batch_size=1)

        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric)

        self.video_capture = filevideostream.FileVideoStream(videoPath)

        #cutID
        self.cut_id = 0

    def start(self):
        self.video_capture.start()

    def release(self):
        self.video_capture.stopprocess()
        self.video_capture.stream.release()

    def nextFrame(self):
        ret, frame_no, frame, isCut = self.video_capture.read()
        resDict = {}
        if ret != True:
            return None

        # get yolo boxes
        boxs = self.yolo.detect_image(frame)
        # print("box_num",len(boxs))
        features = self.encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        if isCut:
            self.tracker.delete_all()
            self.cut_id += 1
        self.tracker.predict()
        self.tracker.update(detections)

        resDict['frame_no'] = frame_no
        resDict['is_cut'] = isCut
        resDict['cut_id'] = self.cut_id
        resDict['person'] = []

        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            x1 = int(bbox[0]) if int(bbox[0]) >= 0 else 0
            y1 = int(bbox[1]) if int(bbox[1]) >= 0 else 0
            x2 = int(bbox[2]) if int(bbox[2]) <= 1920 else 1920
            y2 = int(bbox[3]) if int(bbox[3]) <= 1080 else 1080
            pifDict = {
                'trackId': track.track_id,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2
            }
            if (y2 - y1) * (x2 - x1) < 500:
                continue
            peopleFrame = frame[y1:y2, x1:x2]
            faceDict = None
            face_locations = []
            if self.doFace:
                if self.cnnFlag:
                    try:
                        face_locations = face_recognition.face_locations(
                            peopleFrame, model='cnn')
                    except RuntimeError as e:
                        print('error!')
                        print(e)
                        face_locations = []
                else:
                    face_locations = face_recognition.face_locations(
                        peopleFrame)
            for top, right, bottom, left in face_locations:
                faceDict = {'x1': left, 'x2': right, 'y1': top, 'y2': bottom}
                break
            pifDict['face'] = faceDict
            resDict['person'].append(pifDict)
        if self.visualizeFlag:
            cv2.putText(
                frame,
                'frame %d, cut %d' % (resDict['frame_no'], resDict['cut_id']),
                (0, 20), 0, 1, (0, 255, 0), 2)
            for people in resDict['person']:
                cv2.rectangle(frame, (people['x1'], people['y1']),
                              (people["x2"], people['y2']), (255, 255, 255), 2)
                if people['face']:
                    x1 = people['x1'] + people['face']['x1']
                    x2 = people['x1'] + people['face']['x2']
                    y1 = people['y1'] + people['face']['y1']
                    y2 = people['y1'] + people['face']['y2']
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.imshow('', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                return
        return resDict
コード例 #2
0
ファイル: demo.py プロジェクト: zixiiu/ProjectNaCut
def main():
    yolo = YOLO()
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = False
    doFace_flag = True

    video_capture = filevideostream.FileVideoStream(
        "./testVideo/604_0_new.mp4")
    video_capture.start()
    # cv2.VideoCapture("/media/seb101-user/DATA/TestV_videos/447_1_old.mp4")

    #CutDetector
    #cutDetector = Util.CutDetectior.CutDetector(threshold=0.3)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        print(w)
        print(h)
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    #yolo multiprocess queues/values

    while True:
        tfps = time.time()
        tget = time.time()
        ret, frame_no, frame, isCut = video_capture.read()
        if ret != True:
            break
        tget = time.time() - tget

        # isCut = cutDetector.putFrame(frame)
        # people_frame = []
        #face_locations = face_recognition.face_locations(frame, model="cnn")
        # image = Image.fromarray(frame)
        #image = Image.fromarray(frame[...,::-1]) #bgr to rgb

        tyolo = time.time()
        boxs = yolo.detect_image(frame)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        tyolo = time.time() - tyolo

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        ttrack = time.time()
        if isCut:
            tracker.delete_all()
        tracker.predict()
        tracker.update(detections)
        ttrack = time.time() - ttrack

        #draw image
        tface = time.time()
        for det in detections:
            bbox = det.to_tlbr()
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            peopleFrame = frame[y1:y2, x1:x2]
            if doFace_flag:
                face_locations = face_recognition.face_locations(peopleFrame,
                                                                 model='cnn')
                for top, right, bottom, left in face_locations:
                    top += y1
                    bottom += y1
                    right += x1
                    left += x1
                    cv2.rectangle(frame, (left, top), (right, bottom),
                                  (0, 0, 255), 2)
        tface = time.time() - tface

        tvis = time.time()
        for det in detections:
            bbox = det.to_tlbr()
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            #cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]) + 50, int(bbox[1]) + 50), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        cv2.imshow('', frame)
        tvis = time.time() - tvis

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = 1 / (time.time() - tfps)
        print(
            "fps= %.2f, frame:%0.f, tget:%.2f tyolo:%.2f, ttrack:%.2f, tface:%.2f, tvis: %.2f"
            % (fps, frame_no, tget * 1000, tyolo * 1000, ttrack * 1000,
               tface * 1000, tvis * 1000))
        if isCut:
            print(
                "=============================================================================================="
            )
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.stop()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()