コード例 #1
0
ファイル: demo.py プロジェクト: dapladoc/people_counter
def main():
    input_video_filepath = sys.argv[1]
    write_video_flag = False
    output_video_filepath = ""
    if len(sys.argv) > 2:
        output_video_filepath = sys.argv[2]
        write_video_flag = True
    async_video_flag = False

    config = OmegaConf.load("config.yaml")
    detector = LineCrossingDetector(config)
    counters = [PeopleCounter(**c) for c in config.people_counter]

    if async_video_flag:
        video_capture = VideoCaptureAsync(input_video_filepath)
    else:
        video_capture = cv2.VideoCapture(input_video_filepath)

    if async_video_flag:
        video_capture.start()

    if write_video_flag:
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))

        fourcc = cv2.VideoWriter_fourcc(*"XVID")
        output_writer = cv2.VideoWriter(output_video_filepath, fourcc, 30,
                                        (w, h))

    frame_index = 0
    pbar = tqdm(total=int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)))
    while True and frame_index < 12000:
        ret, frame = video_capture.read()
        frame_index = frame_index + 1
        if not ret:
            break
        if frame_index < 10000:
            continue

        detections = detector.detect(frame, visualize=True)
        for counter in counters:
            counter.update(detections, frame_index)
            counter.visualize(frame)
        for d in detections:
            print(
                f"Frame: {frame_index}. Track id: {d.track_id}. Line id: {d.line_id}"
            )
        if write_video_flag:
            output_writer.write(frame)
        pbar.update()

    if async_video_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if write_video_flag:
        output_writer.release()
コード例 #2
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0

    output_format = 'mp4'

    initialize_door_by_yourself = False
    door_array = None
    # Deep SORT
    model_filename = '../model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    error_values = []
    check_gpu()
    files = sorted(os.listdir('data_files/videos'))

    for video_name in files:
        print("opening video: {}".format(video_name))
        file_path = join('data_files/videos', video_name)
        output_name = 'save_data/out_' + video_name[0:-3] + output_format
        counter = Counter(counter_in=0, counter_out=0, track_id=0)
        truth = get_truth(video_name)

        if asyncVideo_flag:
            video_capture = VideoCaptureAsync(file_path)
        else:
            video_capture = cv2.VideoCapture(file_path)

        if asyncVideo_flag:
            video_capture.start()

        if writeVideo_flag:
            if asyncVideo_flag:
                w = int(video_capture.cap.get(3))
                h = int(video_capture.cap.get(4))
            else:
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_name, fourcc, 15, (w, h))
            frame_index = -1

        fps = 0.0
        fps_imutils = imutils.video.FPS().start()

        all_doors = read_door_info('data_files/doors_info_links.json')
        door_array = all_doors[video_name]
        rect_door = Rectangle(door_array[0], door_array[1], door_array[2],
                              door_array[3])

        border_door = door_array[3]
        while True:
            ret, frame = video_capture.read()  # frame shape 640*480*3
            if not ret:
                y1 = (counter.counter_in - truth.inside)**2
                y2 = (counter.counter_out - truth.outside)**2
                total_count = counter.return_total_count()
                true_total = truth.inside + truth.outside
                if true_total != 0:
                    err = abs(total_count - true_total) / true_total
                else:
                    err = abs(total_count - true_total)
                mse = (y1 + y2) / 2
                log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \
                          " total: {} / {}\n error: {}\n mse error: {}\n______________\n".format(video_name,
                                                                                                 counter.counter_in,
                                                                                                 truth.inside,
                                                                                                 counter.counter_out,
                                                                                                 truth.outside,
                                                                                                 total_count,
                                                                                                 true_total, err, mse)
                with open('../log_results.txt', 'a') as log:
                    log.write(log_res)
                print(log_res)
                error_values.append(err)
                break

            t1 = time.time()

            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)

            features = encoder(frame, boxes)
            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.cls for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])),
                          (23, 158, 21), 3)
            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    score = "%.2f" % (det.confidence * 100) + "%"
                    # rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                    # rect_door = Rectangle( int(door_array[0]), int(door_array[1]), int(door_array[2]), int(door_array[3]) )
                    # intersection = rect_head & rect_door
                    #
                    # if intersection:
                    #     squares_coeff = rect_square(*intersection)/ rect_square(*rect_head)
                    #     cv2.putText(frame, score + " inter: " + str(round(squares_coeff, 3)), (int(bbox[0]), int(bbox[3])), 0,
                    #             1e-3 * frame.shape[0], (0, 100, 255), 5)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                # first appearence of object with id=track.id

                if track.track_id not in counter.people_init or counter.people_init[
                        track.track_id] == 0:
                    counter.obj_initialized(track.track_id)
                    rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])

                    intersection = rect_head & rect_door
                    if intersection:

                        intersection_square = rect_square(*intersection)
                        head_square = rect_square(*rect_head)
                        rat = intersection_square / head_square

                        #             1e-3 * frame.shape[0], (0, 100, 255), 5)

                        #     was initialized in door, probably going in
                        if rat >= 0.7:
                            counter.people_init[track.track_id] = 2
                            #     initialized in the bus, mb going out
                        elif rat <= 0.4 or bbox[3] > border_door:
                            counter.people_init[track.track_id] = 1
                        #     initialized between the exit and bus, not obvious state
                        elif rat > 0.4 and rat < 0.7:
                            counter.people_init[track.track_id] = 3
                            counter.rat_init[track.track_id] = rat
                    # res is None, means that object is not in door contour
                    else:
                        counter.people_init[track.track_id] = 1
                    counter.people_bbox[track.track_id] = bbox
                counter.cur_bbox[track.track_id] = bbox

                adc = "%.2f" % (track.adc *
                                100) + "%"  # Average detection confidence
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 5)

                if not show_detections:
                    track_cls = track.cls
                    cv2.putText(frame, str(track_cls),
                                (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.putText(
                        frame, 'ADC: ' + adc,
                        (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])),
                        0, 1e-3 * frame.shape[0], (0, 255, 0), 1)

            id_get_lost = [
                track.track_id for track in tracker.tracks
                if track.time_since_update >= 35
            ]
            # and track.age >= 29]
            # id_inside_tracked = [track.track_id for track in tracker.tracks if track.age > 60]
            for val in counter.people_init.keys():
                # check bbox also
                inter_square = 0
                cur_square = 0
                ratio = 0
                cur_c = find_centroid(counter.cur_bbox[val])
                init_c = find_centroid(counter.people_bbox[val])
                vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

                if val in id_get_lost and counter.people_init[val] != -1:
                    rect_сur = Rectangle(counter.cur_bbox[val][0],
                                         counter.cur_bbox[val][1],
                                         counter.cur_bbox[val][2],
                                         counter.cur_bbox[val][3])
                    inter = rect_сur & rect_door
                    if inter:

                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_сur)
                        try:
                            ratio = inter_square / cur_square
                        except ZeroDivisionError:
                            ratio = 0

                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction
                    if vector_person[1] > 70 and counter.people_init[val] == 2 \
                            and ratio < 0.9:  # and counter.people_bbox[val][3] > border_door \
                        counter.get_in()

                    elif vector_person[1] < -70 and counter.people_init[val] == 1 \
                            and ratio >= 0.6:
                        counter.get_out()

                    elif vector_person[1] < -70 and counter.people_init[val] == 3 \
                            and ratio > counter.rat_init[val] and ratio >= 0.6:
                        counter.get_out()
                    elif vector_person[1] > 70 and counter.people_init[val] == 3 \
                            and ratio < counter.rat_init[val] and ratio < 0.6:
                        counter.get_in()

                    counter.people_init[val] = -1
                    del val

            ins, outs = counter.show_counter()
            cv2.rectangle(frame, (0, 0), (250, 50), (0, 0, 0), -1, 8)
            cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 35),
                        0, 1e-3 * frame.shape[0], (255, 255, 255), 3)

            cv2.namedWindow('video', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('video', 1422, 800)
            cv2.imshow('video', frame)

            if writeVideo_flag:
                # save a frame
                out.write(frame)
                frame_index = frame_index + 1

            fps_imutils.update()

            if not asyncVideo_flag:
                fps = (fps + (1. / (time.time() - t1))) / 2
                # print("FPS = %f" % fps)

            # Press Q to stop!
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        fps_imutils.stop()
        print('imutils FPS: {}'.format(fps_imutils.fps()))

        if asyncVideo_flag:
            video_capture.stop()
        else:
            video_capture.release()

        if writeVideo_flag:
            out.release()

        cv2.destroyAllWindows()
        del door_array

    mean_error = np.mean(error_values)
    print("mean error for {} videos: {}".format(len(files), mean_error))
コード例 #3
0
ファイル: baseline.py プロジェクト: anminhhung/test_top2
    def run(self):
        asyncVideo_flag = self.args.asyncVideo_flag
        writeVideo_flag = self.args.writeVideo_flag

        if asyncVideo_flag:
            video_capture = VideoCaptureAsync(self.video_path)
        else:
            video_capture = cv2.VideoCapture(self.video_path)

        if asyncVideo_flag:
            video_capture.start()

        if writeVideo_flag:
            if asyncVideo_flag:
                w = int(video_capture.cap.get(3))
                h = int(video_capture.cap.get(4))
            else:
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
            frame_index = -1

        fps = 0.0
        fps_imutils = imutils.video.FPS().start()

        count_frame = 0
        while True:
            count_frame += 1
            t1 = time.time()
            ret, frame = video_capture.read()
            if ret != True:
                break

            _frame = frame
            # process
            frame = self.process(frame, _frame, count_frame)

            # visualize
            if self.args.visualize:
                frame = imutils.resize(frame, width=1000)
                cv2.imshow("Final result", frame)

            if writeVideo_flag:  # and not asyncVideo_flag:
                # save a frame
                out.write(frame)
                frame_index = frame_index + 1

            fps_imutils.update()

            if not asyncVideo_flag:
                fps = (fps + (1. / (time.time() - t1))) / 2
                print("FPS = %f" % (fps))

            # Press Q to stop!
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        fps_imutils.stop()
        print('imutils FPS: {}'.format(fps_imutils.fps()))

        if asyncVideo_flag:
            video_capture.stop()
        else:
            video_capture.release()

        if writeVideo_flag:
            out.release()

        cv2.destroyAllWindows()
コード例 #4
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0
    output_format = 'mp4'

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    fpeses = []

    check_gpu()
    video_name = 'test1.mp4'

    print("opening video: {}".format(video_name))
    file_path = join('data_files/videos', video_name)
    output_name = 'save_data/out_' + video_name[0:-3] + output_format
    counter = Counter(counter_in=0, counter_out=0, track_id=0)

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()
        w = int(video_capture.cap.get(3))
        h = int(video_capture.cap.get(4))
    else:
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))

    if writeVideo_flag:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_name, fourcc, 15, (w, h))

    left_array = [0, 0, w / 2, h]
    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    rect_left = Rectangle(left_array[0], left_array[1], left_array[2],
                          left_array[3])

    border_door = left_array[3]
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            with open('log_results.txt', 'a') as log:
                log.write('1')
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        cv2.rectangle(frame, (int(left_array[0]), int(left_array[1])),
                      (int(left_array[2]), int(left_array[3])), (23, 158, 21),
                      3)
        if len(detections) != 0:
            counter.someone_inframe()
            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    score = "%.2f" % (det.confidence * 100) + "%"
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3)
        else:
            if counter.need_to_clear():
                counter.clear_all()

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            if track.track_id not in counter.people_init or counter.people_init[
                    track.track_id] == 0:
                counter.obj_initialized(track.track_id)
                ratio_init = find_ratio_ofbboxes(bbox=bbox,
                                                 rect_compare=rect_left)

                if ratio_init > 0:
                    if ratio_init >= 0.8 and bbox[3] < left_array[3]:
                        counter.people_init[
                            track.track_id] = 2  # man in left side
                    elif ratio_init < 0.8 and bbox[3] > left_array[
                            3]:  # initialized in the bus, mb going out
                        counter.people_init[track.track_id] = 1
                else:
                    counter.people_init[track.track_id] = 1
                counter.people_bbox[track.track_id] = bbox
            counter.cur_bbox[track.track_id] = bbox

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1]) + 50), 0,
                        1e-3 * frame.shape[0], (0, 255, 0), 5)

            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

        id_get_lost = [
            track.track_id for track in tracker.tracks
            if track.time_since_update >= 5
        ]

        # TODO clear people_init and other dicts
        for val in counter.people_init.keys():
            ratio = 0
            cur_c = find_centroid(counter.cur_bbox[val])
            init_c = find_centroid(counter.people_bbox[val])
            vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

            if val in id_get_lost and counter.people_init[val] != -1:
                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val],
                                            rect_compare=rect_left)

                if vector_person[0] > 200 and counter.people_init[val] == 2 \
                        and ratio < 0.7:  # and counter.people_bbox[val][3] > border_door \
                    counter.get_out()
                    print(vector_person[0], counter.people_init[val], ratio)

                elif vector_person[0] < -100 and counter.people_init[val] == 1 \
                        and ratio >= 0.7:
                    counter.get_in()
                    print(vector_person[0], counter.people_init[val], ratio)

                counter.people_init[val] = -1
                del val

        ins, outs = counter.show_counter()
        cv2.rectangle(frame, (700, 0), (950, 50), (0, 0, 0), -1, 8)
        cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (710, 35), 0,
                    1e-3 * frame.shape[0], (255, 255, 255), 3)

        cv2.namedWindow('video', cv2.WINDOW_NORMAL)
        # cv2.resizeWindow('video', 1422, 800)
        cv2.imshow('video', frame)

        if writeVideo_flag:
            out.write(frame)

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % fps)

            if len(fpeses) < 15:
                fpeses.append(round(fps, 2))

            elif len(fpeses) == 15:
                # fps = round(np.median(np.array(fpeses)))
                median_fps = float(np.median(np.array(fpeses)))
                fps = round(median_fps, 1)
                print('max fps: ', fps)
                fps = 20
                counter.fps = fps
                fpeses.append(fps)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #5
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    frame_wid = 1280
    frame_hei = 720

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    # 存放所有视频数据的上级目录
    #root_video_path = '/home/yhzn/ws/clip/'
    root_video_path = '/home/yhzn/ws/citywalks/'
    all_video_path = []
    read_video(root_video_path, all_video_path)

    break_flag = 0
    result = []
    myvideo_result = []
    print("Video number = ", len(all_video_path))
    just_video_cnt = 0

    # 依次便利所有视频的路径并读取,each_video代表的是str类型路径
    for each_video in all_video_path:

        tracker = Tracker(metric)

        file_path = each_video
        # 将城市名与视频编号识别出来
        video_name_split = file_path.split('/')
        city_name = video_name_split[-2]
        video_number = video_name_split[-1]
        print(city_name, video_number)

        if asyncVideo_flag:
            video_capture = VideoCaptureAsync(file_path)
        else:
            video_capture = cv2.VideoCapture(file_path)

        if asyncVideo_flag:
            video_capture.start()

        if writeVideo_flag:
            if asyncVideo_flag:
                w = int(video_capture.cap.get(3))
                h = int(video_capture.cap.get(4))
            else:
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
            frame_index = -1

        fps = 0.0
        fps_imutils = imutils.video.FPS().start()

        frame_num = 0

        # 确认保存路径的存在
        city_dir = ''.join(['/home/yhzn/ws/crop_image/', city_name])
        if not os.path.exists(city_dir):
            os.mkdir(city_dir)
        video_dir = ''.join(
            ['/home/yhzn/ws/crop_image/', city_name, '/', video_number])
        if not os.path.exists(video_dir):
            os.mkdir(video_dir)

        # 计数功能,仅仅为了显示处理进度用
        just_frame_cnt = 0
        just_video_cnt += 1
        while True:
            ret, frame = video_capture.read()
            if ret != True:
                break

            t1 = time.time()

            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)

            features = encoder(frame, boxes)
            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.cls for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            # 检测框是否显示
            # for det in detections:
            #     bbox = det.to_tlbr()
            #     if show_detections and len(classes) > 0:
            #         det_cls = det.cls
            #         score = "%.2f" % (det.confidence * 100) + "%"
            #         cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
            #                     1e-3 * frame.shape[0], (0, 0, 255), 1)
            #         cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

            # 仅仅使用了跟踪框
            tmp_frame = frame.copy()

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                # print("ord1: ", bbox[0], bbox[1], bbox[2], bbox[3])
                if bbox[0] < 0:
                    bbox[0] = 0
                if bbox[0] >= frame_wid:
                    bbox[0] = frame_wid - 1
                if bbox[1] < 0:
                    bbox[1] = 0
                if bbox[1] >= frame_hei:
                    bbox[1] = frame_hei - 1
                if bbox[2] < 0:
                    bbox[2] = 0
                if bbox[2] >= frame_wid:
                    bbox[2] = frame_wid - 1
                if bbox[3] < 0:
                    bbox[3] = 0
                if bbox[3] >= frame_hei:
                    bbox[3] = frame_hei - 1
                # print("ord2: ", bbox[0], bbox[1], bbox[2], bbox[3])

                # if int(bbox[0]) < 0 or int(bbox[1]) < 0 or int(bbox[2]) < 0 or int(bbox[3]) < 0:
                #     continue

                # 行人图像的裁剪
                crop_image = frame[int(bbox[1]):int(bbox[3]),
                                   int(bbox[0]):int(bbox[2])]  # 裁剪
                crop_image = cv2.resize(crop_image, (128, 256))
                cv2.imwrite(
                    video_dir + '/frame_' + str(frame_num).zfill(4) + '_ped_' +
                    str(track.track_id) + '.png', crop_image)

                # Average detection confidence
                adc = "%.2f" % (track.adc * 100) + "%"
                cv2.rectangle(tmp_frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
                cv2.putText(tmp_frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 0, 255), 1)

                if not show_detections:
                    track_cls = track.cls
                    cv2.putText(tmp_frame, str(track_cls),
                                (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.putText(
                        tmp_frame, 'ADC: ' + adc,
                        (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])),
                        0, 1e-3 * frame.shape[0], (0, 255, 0), 1)

                cx = int((int(bbox[0]) + int(bbox[2])) / 2)
                cy = int((int(bbox[1]) + int(bbox[3])) / 2)
                w = int(int(bbox[2]) - int(bbox[0]))
                h = int(int(bbox[3]) - int(bbox[1]))

                # 下面所有的append操作是为了保存所有跟踪到的结果,方便后需存放npy文件
                tmp_result = []
                tmp_result.append(video_number)
                tmp_result.append(city_name)
                tmp_result.append(str(frame_num))
                tmp_result.append(str(track.track_id))
                tmp_result.append(str(cx))
                tmp_result.append(str(cy))
                tmp_result.append(str(w))
                tmp_result.append(str(h))
                tmp_result.append('0')
                tmp_result.append('0')
                tmp_result.append('0')
                if h > 50:
                    result.append(tmp_result)

                # my video detection
                # 这个文件是保存成DTP项目所需要的格式,与上面的保存大致类似,测试用
                tmp_myvideo = []
                tmp_myvideo.append(city_name + '/' + video_number)
                tmp_myvideo.append(str(frame_num))
                tmp_myvideo.append(int(bbox[0]))
                tmp_myvideo.append(int(bbox[1]))
                tmp_myvideo.append(int(bbox[2]))
                tmp_myvideo.append(int(bbox[3]))
                # track
                tmp_myvideo.append(str(track.track_id))
                # detection length
                tmp_myvideo.append(str(0))
                # Height
                tmp_myvideo.append(str(h))
                if h > 50:
                    myvideo_result.append(tmp_myvideo)

            frame_num += 1

            # cv2.imshow('', tmp_frame)

            if writeVideo_flag:
                # save a frame
                out.write(frame)
                frame_index = frame_index + 1

            fps_imutils.update()

            # 添加一些提示信息
            just_frame_cnt += 1
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("[%d / %d]" % (just_video_cnt, len(all_video_path)), end=' ')
            print("[frame: %d] [fps: %f]" %
                  (just_frame_cnt, (fps + (1. / (time.time() - t1))) / 2))

            # 显示FPS
            # if not asyncVideo_flag:
            #     fps = (fps + (1. / (time.time() - t1))) / 2
            #     print("FPS = %f" % (fps))

            # Press Q to stop!
            #if cv2.waitKey(1) & 0xFF == ord('q'):
            #   break_flag = 1
            #  break
        #if break_flag == 1:
        #   break
    '''
        带label的跟踪结果保存
    '''
    f = open('./clip_data/tracking_result.csv', 'w')
    csv_writer = csv.writer(f)
    # 写入csv第一行
    csv_writer.writerow([
        "vid", "filename", "frame_num", "track", "cx", "cy", "w", "h",
        "track_length", "labeled", "requires_features"
    ])
    # 按照行人序号、城市名的顺序排序,帧数自动排序了
    result.sort(key=lambda x: (x[1], eval(x[3])))

    # 处理track条目
    for i in range(len(result)):
        if i == 0:
            pass
        else:
            if result[i][3] == result[i - 1][3] and eval(
                    result[i][2]) == eval(result[i - 1][2]) + 1:
                result[i][8] = str(eval(result[i - 1][8]) + 1)
            else:
                result[i][8] = '0'

    # 处理labeled and require_feature条目
    for i in range(len(result)):
        # 不满足过去30帧、并且存在未来60帧的不进行预测,因为数据帧不足
        if i <= 28 or i >= len(result) - 60:
            pass
            # csv_writer.writerow(result[i])
        else:
            track_index_now = eval(result[i][8])
            track_index_pre29 = eval(result[i - 29][8])
            track_index_post60 = eval(result[i + 60][8])
            if result[i][3] == result[i - 29][3] and result[i][3] == result[i + 60][
                    3] and track_index_now == track_index_pre29 + 29 and track_index_now == track_index_post60 - 60:
                result[i][9] = '1'
                result[i][10] = '1'
                for j in range(i - 29, i):
                    result[j][10] = '1'
            # csv_writer.writerow(result[i])

    for i in range(len(result)):
        csv_writer.writerow(result[i])

    # 写DTP的tracking result csv
    train_file1 = open('./clip_data/myvideo_yolo_detection_train1.csv', 'w')
    train_file2 = open('./clip_data/myvideo_yolo_detection_train2.csv', 'w')
    train_file3 = open('./clip_data/myvideo_yolo_detection_train3.csv', 'w')
    val_file1 = open('./clip_data/myvideo_yolo_detection_val1.csv', 'w')
    val_file2 = open('./clip_data/myvideo_yolo_detection_val2.csv', 'w')
    val_file3 = open('./clip_data/myvideo_yolo_detection_val3.csv', 'w')
    test_file1 = open('./clip_data/myvideo_yolo_detection_test1.csv', 'w')
    test_file2 = open('./clip_data/myvideo_yolo_detection_test2.csv', 'w')
    test_file3 = open('./clip_data/myvideo_yolo_detection_test3.csv', 'w')

    # 定义csv_writer
    csv_train1 = csv.writer(train_file1)
    csv_train1.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_train2 = csv.writer(train_file2)
    csv_train2.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_train3 = csv.writer(train_file3)
    csv_train3.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])

    csv_val1 = csv.writer(val_file1)
    csv_val1.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_val2 = csv.writer(val_file2)
    csv_val2.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_val3 = csv.writer(val_file3)
    csv_val3.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])

    csv_test1 = csv.writer(test_file1)
    csv_test1.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_test2 = csv.writer(test_file2)
    csv_test2.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    csv_test3 = csv.writer(test_file3)
    csv_test3.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])

    for i in range(len(myvideo_result)):
        tmp_path = myvideo_result[i][0]
        city = tmp_path.split('/')[0]
        # train
        if city in Train_cities1:
            csv_train1.writerow(myvideo_result[i])
        if city in Train_cities2:
            csv_train2.writerow(myvideo_result[i])
        if city in Train_cities3:
            csv_train3.writerow(myvideo_result[i])

        # val
        if city in Validation_cities1:
            csv_val1.writerow(myvideo_result[i])
        if city in Validation_cities2:
            csv_val2.writerow(myvideo_result[i])
        if city in Validation_cities3:
            csv_val3.writerow(myvideo_result[i])

        # test
        if city in Test_cities1:
            csv_test1.writerow(myvideo_result[i])
        if city in Test_cities2:
            csv_test2.writerow(myvideo_result[i])
        if city in Test_cities3:
            csv_test3.writerow(myvideo_result[i])

    f.close()
    train_file1.close()
    train_file2.close()
    train_file3.close()
    val_file1.close()
    val_file2.close()
    val_file3.close()
    test_file1.close()
    test_file2.close()
    test_file3.close()
    '''
        写入STED模型所需要的.npy文件
        size = (n, 8, 25)
    '''
    # box numpy数据保存
    train_val_test_box(result)
    # output label数据保存
    train_val_test_label(result)
    """
        my video detection write
        保存成DTP需要的跟踪格式
    """
    f = open('./clip_data/myvideo_yolo_detection.csv', 'w')
    csv_writer = csv.writer(f)
    csv_writer.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    # 按照跟踪序号排序
    myvideo_result.sort(key=lambda x: x[6])

    # 处理track条目
    for i in range(len(myvideo_result)):
        if i == 0:
            pass
        else:
            # 如何filename相同,track序号相同,帧数连续则表示跟踪
            if myvideo_result[i][6] == myvideo_result[i - 1][6] and eval(
                    myvideo_result[i][1]) == eval(
                        myvideo_result[i - 1][1]
                    ) + 1 and myvideo_result[i][0] == myvideo_result[i - 1][0]:
                myvideo_result[i][7] = str(eval(myvideo_result[i - 1][7]) + 1)
            else:
                myvideo_result[i][7] = '0'

    for i in range(len(myvideo_result)):
        csv_writer.writerow(myvideo_result[i])

    f.close()

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
コード例 #6
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
             break

        t1 = time.time()

        image = Image.fromarray(frame[...,::-1])  # bgr to rgb
        boxs = yolo.detect_image(image)[0]
        confidence = yolo.detect_image(image)[1]

        features = encoder(frame,boxs)

        detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0,255,0),2)
            
        cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1./(time.time()-t1))) / 2
        print("FPS = %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #7
0
ファイル: demo.py プロジェクト: aj1m0n/Deep-SORT-YOLOv4
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = False
    asyncVideo_flag = False
    webcamera_flag = True
    ipcamera_flag = False
    udp_flag = True

    file_path = '/workspace/data/C0133_v4.mp4'
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    elif ipcamera_flag :
        video_capture = cv2.VideoCapture('rtsp://*****:*****@192.168.2.201/ONVIF/MediaInput?profile=def_profile1')
    elif webcamera_flag :
        video_capture = cv2.VideoCapture(0)
    else:
        video_capture = cv2.VideoCapture(file_path)
        
    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    if udp_flag:
        HOST = ''
        PORT = 5000
        address = '192.168.2.255'
        sock =socket(AF_INET, SOCK_DGRAM)
        sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1)
        sock.bind((HOST, PORT))


    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    
    savetime = 0

    while True:
        nowtime = datetime.datetime.now().isoformat()
        ret, frame = video_capture.read()  # frame shape 640*480*3
        t1 = time.time()

        if time.time() - savetime >= 30: 
            print('save data') 
            cv2.imwrite("/workspace/images/image.png", frame)
            savetime = time.time()
        image = Image.fromarray(frame[...,::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                        zip(boxes, confidence, classes, features)]
        else:
            detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                        zip(boxes, confidence, classes)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)
                # socket
                message = str(nowtime + "," + str(track.track_id) + "," + str(int(bbox[0])) + "," + str(int(bbox[1])) + "," + str(int(bbox[2])) + "," + str(int(bbox[3])))
                bmessage = message.encode('utf-8')
                print(type(bmessage))
                if udp_flag:
                    sock.sendto(message.encode('utf-8'), (address, PORT))


        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        #cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #8
0
ファイル: demo.py プロジェクト: alievilya/deep-sort-yolov4
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0

    output_format = 'mp4'
    video_name = 'bus4_2in_4out.mp4'
    file_path = join('data_files/videos', video_name)
    output_name = 'save_data/out_' + video_name[0:-3] + output_format
    initialize_door_by_yourself = False
    door_array = None
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    counter = Counter(counter_in=0, counter_out=0, track_id=0)

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_name, fourcc, 15, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    ret, first_frame = video_capture.read()

    if door_array is None:
        if initialize_door_by_yourself:
            door_array = select_object(first_frame)[0]
            print(door_array)
        else:
            all_doors = read_door_info('data_files/doors_info.csv')
            door_array = all_doors[video_name]

    border_door = door_array[3]
    error_values = []
    truth = get_truth(video_name)
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            total_count = counter.return_total_count()
            true_total = truth.inside + truth.outside
            err = abs(total_count - true_total) / true_total
            log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \
                      " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in,
                                                                             truth.inside,
                                                                             counter.counter_out, truth.outside,
                                                                             total_count, true_total, err)
            with open('log_results.txt', 'w') as file:
                file.write(log_res)
            print(log_res)
            error_values.append(err)
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])),
                      (int(door_array[2]), int(door_array[3])), (23, 158, 21),
                      2)

        for det in detections:
            bbox = det.to_tlbr()
            if show_detections and len(classes) > 0:
                score = "%.2f" % (det.confidence * 100) + "%"
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(int(door_array[0]), int(door_array[1]),
                                      int(door_array[2]), int(door_array[3]))
                intersection = rect_head & rect_door

                if intersection:
                    squares_coeff = rect_square(*intersection) / rect_square(
                        *rect_head)
                    cv2.putText(
                        frame,
                        score + " inter: " + str(round(squares_coeff, 3)),
                        (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0],
                        (0, 100, 255), 5)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            # first appearence of object with id=track.id

            if track.track_id not in counter.people_init or counter.people_init[
                    track.track_id] == 0:
                counter.obj_initialized(track.track_id)
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(door_array[0], door_array[1],
                                      door_array[2], door_array[3])
                res = rect_head & rect_door
                if res:

                    inter_square = rect_square(*res)
                    head_square = rect_square(*rect_head)
                    #     was initialized in door, probably going in
                    if (inter_square / head_square) >= 0.8:
                        counter.people_init[track.track_id] = 2
                        #     initialized in the bus, mb going out
                    elif (inter_square /
                          head_square) <= 0.4 or bbox[3] > border_door:
                        counter.people_init[track.track_id] = 1
                # res is None, means that object is not in door contour
                else:
                    counter.people_init[track.track_id] = 1

                counter.people_bbox[track.track_id] = bbox
            counter.cur_bbox[track.track_id] = bbox

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 5)

            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

        id_get_lost = [
            track.track_id for track in tracker.tracks
            if track.time_since_update >= 25 and track.age >= 29
        ]
        id_inside_tracked = [
            track.track_id for track in tracker.tracks if track.age > 60
        ]
        for val in counter.people_init.keys():
            # check bbox also
            cur_c = find_centroid(counter.cur_bbox[val])
            init_c = find_centroid(counter.people_bbox[val])
            vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

            if val in id_get_lost and counter.people_init[val] != -1:
                # if vector_person < 0 then current coord is less than initialized, it means that man is going
                # in the exit direction
                if vector_person[1] > 70 and counter.people_init[
                        val] == 2:  # and counter.people_bbox[val][3] > border_door \
                    counter.get_in()

                elif vector_person[1] < -70 and counter.people_init[val] == 1:
                    counter.get_out()

                counter.people_init[val] = -1
                print(f"person left frame")
                print(f"current centroid - init : {cur_c} - {init_c}\n")
                print(f"vector: {vector_person}\n")

                del val
            # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \
            #         and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \
            #         and vector_person[1] > 0:  # and \
            #     # counter.people_bbox[val][3] > border_door:
            #     counter.get_in()
            #
            #     counter.people_init[val] = -1
            #     print(f"person is tracked for a long time")
            #     print(f"current centroid - init : {cur_c} - {init_c}\n")
            #     print(f"vector: {vector_person}\n")
            #     imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]),
            #                       find_centroid(counter.people_bbox[val]),
            #                       (0, 0, 255), 7)

            # cv2.imshow('frame', imaggg)
            # cv2.waitKey(0)

        ins, outs = counter.show_counter()
        cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0,
                    1e-3 * frame.shape[0], (255, 0, 0), 5)

        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('image', 1400, 800)
        cv2.imshow('image', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            # print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

    mean_error = np.mean(error_values)
    print("mean error for {} video: {}".format(video_name, mean_error))
コード例 #9
0
def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('input', type=str, help='Input video path')
    parser.add_argument('bbox', type=str, help='Input bounding box path')
    parser.add_argument('output', type=str, help='Ouput video path')
    args = parser.parse_args()

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = args.input
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(args.output, fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    # fps_imutils = imutils.video.FPS().start()

    with open(args.bbox) as f:
        data = json.load(f)
    frame_index = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes = np.asarray(
            [pred['bbox'] for pred in data[frame_index]['annotations']])
        confidence = np.asarray(
            [pred['score'] for pred in data[frame_index]['annotations']])
        classes = np.asarray(
            [pred['label'] for pred in data[frame_index]['annotations']])

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        # cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        # fps_imutils.update()

        # if not asyncVideo_flag:
        #     fps = (fps + (1./(time.time()-t1))) / 2
        #     print("FPS = %f"%(fps))

        # Press Q to stop!
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #     break

    # fps_imutils.stop()
    # print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
コード例 #10
0
ファイル: main.py プロジェクト: AngeloChen14/Deep-SORT-YOLOv4
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.25
    max_cross_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.5
    frame_rate = 12

    file_path = 'reid-wide2'
    file_path2 = 'reid-long2'

    show_detections = False
    writeVideo_flag = False
    asyncVideo_flag = False
    beta_calulate_flag = False
    predict_ns_flag = False
    predict_time = 0.5
    multi_camera_flag = True

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, frame_rate=frame_rate)

    if multi_camera_flag:
        metric2 = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker2 = Tracker(metric2, frame_rate=frame_rate)

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path + ".mp4")
    else:
        video_capture = cv2.VideoCapture(file_path + ".mp4")

    if multi_camera_flag:
        video_capture2 = cv2.VideoCapture(file_path2 + ".mp4")

    if asyncVideo_flag:
        video_capture.start()

    if asyncVideo_flag:
        w = int(video_capture.cap.get(3))
        h = int(video_capture.cap.get(4))
    else:
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))

    if writeVideo_flag:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(file_path2 + "-output.avi", fourcc, frame_rate,
                              (w, h))
        frame_index = -1

        if multi_camera_flag:
            out2 = cv2.VideoWriter(file_path + "-output2.avi", fourcc,
                                   frame_rate, (w, h))

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    alpha = np.arctan((326 + (369 - 326) * (250 - 110) /
                       (300 - 110) - w / 2) / w * 6.4 / 3.6) * 180 / np.pi
    beta = 0
    beta_last = 0
    x_temp = 0
    track_angle = np.array([])
    x_axis = np.array([])
    x_predict = np.array([])
    x_current = np.array([])
    x_kalman = np.array([])
    matches_id = []
    alert_mode_flag = False

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        if predict_ns_flag:
            tracker.predict_ns(predict_time)

        if alert_mode_flag:
            if len(tracker.tracks) > p:
                ret = tracker.tracks[track1_idx].is_confirmed()
                if not ret:
                    alert_mode_flag = False
            else:
                alert_mode_flag = False

        if not alert_mode_flag:
            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    det_cls = det.cls
                    score = "%.2f" % (det.confidence * 100) + "%"
                    cv2.putText(frame,
                                str(det_cls) + " " + score,
                                (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 5:
                    continue
                bbox = track.to_tlbr()

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

                if predict_ns_flag:
                    if track.x_predict > 0 and track.x_predict < w:
                        cv2.circle(frame, (track.x_predict, int(h / 2)), 15,
                                   (0, 0, 255), -1)
                    x_temp += 1 / frame_rate
                    x_axis = np.append(x_axis, x_temp)
                    x_predict = np.append(x_predict, track.x_predict)
                    x_current = np.append(x_current, track.x_current)
                    x_kalman = np.append(x_kalman, track.mean[0])

                if beta_calulate_flag:
                    beta = np.arctan(
                        (track.mean[0] - w / 2) / w * 6.4 / 16) * 180 / np.pi
        else:
            bbox = tracker.tracks[track1_idx].to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 1)

            # if not show_detections:
            #     track_cls = track.cls
            #     adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
            #     cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0),
            #                 1)
            #     cv2.putText(frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
            #                 1e-3 * frame.shape[0], (0, 255, 0), 1)

        if beta_calulate_flag:
            if np.isclose(beta, beta_last) and abs(beta) > 7:
                beta = np.sign(beta) * np.arctan(3.2 / 16) * 180 / np.pi
            cv2.putText(frame, "Beta_angle: " + '{:4.2f}'.format(beta),
                        (20, 20), 0, 1e-3 * frame.shape[0], (0, 0, 255), 1)
            beta_last = beta
            x_temp += 1 / frame_rate
            x_axis = np.append(x_axis, x_temp)
            track_angle = np.append(track_angle, alpha + beta)

        cv2.imshow('camera1', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        if multi_camera_flag:
            ret, frame = video_capture2.read()  # frame shape 640*480*3
            if ret != True:
                break

            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)

            features = encoder(frame, boxes)
            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.cls for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker2.predict()
            tracker2.update(detections)
            matches, unmatched_tracks_1, unmatched_tracks_2 = cross_tracker_match(
                tracker, tracker2, max_cross_cosine_distance)
            matches_id.clear()
            for track_1_idx, track_2_idx in matches:
                matches_id.append((tracker.tracks[track_1_idx].track_id,
                                   tracker2.tracks[track_2_idx].track_id))
            print("Matches:", matches_id)

            if alert_mode_flag:
                if len(tracker2.tracks) > track2_idx:
                    ret = tracker2.tracks[track2_idx].is_confirmed()
                    if not ret:
                        alert_mode_flag = False
                else:
                    alert_mode_flag = False

            if not alert_mode_flag:
                for det in detections:
                    bbox = det.to_tlbr()
                    if show_detections and len(classes) > 0:
                        det_cls = det.cls
                        score = "%.2f" % (det.confidence * 100) + "%"
                        cv2.putText(frame,
                                    str(det_cls) + " " + score,
                                    (int(bbox[0]), int(bbox[3])), 0,
                                    1e-3 * frame.shape[0], (0, 255, 0), 1)
                        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                      (int(bbox[2]), int(bbox[3])),
                                      (255, 0, 0), 2)

                for track in tracker2.tracks:
                    if not track.is_confirmed() or track.time_since_update > 5:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame, "ID: " + str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)

                if cv2.waitKey(1) & 0xFF == ord('t'):
                    roi = cv2.selectROI("Select Target", frame)
                    Roi = [Detection(roi, 0, None, 0)]
                    ret, track2_idx = ROI_target_match(tracker2, Roi)
                    cv2.destroyWindow("Select Target")

                    if ret:
                        t_list = [i for (i, j) in matches if j == track2_idx]
                        if t_list != []:
                            track1_idx = t_list[0]
                            alert_mode_flag = True
            else:
                bbox = tracker2.tracks[track2_idx].to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

            cv2.imshow('camera2', frame)

            if writeVideo_flag:
                # save a frame
                out2.write(frame)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        # if not asyncVideo_flag:
        #     fps = (fps + (1. / (time.time() - t1))) / 2
        #     print("FPS = %.2f" % (fps))
        fps_imutils.update()

    fps_imutils.stop()
    print('imutils FPS: {:.2f}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
        if multi_camera_flag:
            out2.release()

    if predict_ns_flag:
        np.save("x_axis.npy", x_axis)
        np.save("x_predict.npy", x_predict)
        np.save("x_current.npy", x_current)
        np.save("x_kalman.npy", x_kalman)
        plt.plot(x_axis, x_current, label='X_Measured')
        plt.plot(x_axis, x_kalman, label='X_Filtered')
        plt.plot(x_axis, x_predict, label='X_Predicted')
        plt.legend()
        plt.xlabel('Time (s)')
        plt.ylabel('Pixel')
        plt.title('Motion Prediction')
        plt.show()

    if beta_calulate_flag:
        np.save("x_axis.npy", x_axis)
        np.save("track_angle.npy", track_angle)
        plt.plot(x_axis, track_angle)
        plt.xlabel('Time (s)')
        plt.ylabel('Angle (deg)')
        plt.title('Target Angle')
        plt.show()

    cv2.destroyAllWindows()
    file_path = '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/videos/abc.mp4'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_custom.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    # L
    # for img_path in tqdm(img_paths):
    count = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()
コード例 #12
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False
    showImg_flag = True

    file_path = 'C:\\Users\\Divided\\Desktop\\test_vid.mp4'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        preprocess_time_start = get_current_ms()
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        overlay = frame.copy()
        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        preprocess_time = get_current_ms() - preprocess_time_start
        # print("Preprocessing time:\t{}".format(preprocess_time))

        detection_time_start = get_current_ms()
        boxs = yolo.detect_image(image)[0]
        confidence = yolo.detect_image(image)[1]

        detection_time = get_current_ms() - detection_time_start
        # print("Detection time:\t{}".format(detection_time))

        features = encoder(frame, boxs)

        detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in
                      zip(boxs, confidence, features)]

        nms_start_time = get_current_ms()
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        nms_time = get_current_ms() - nms_start_time
        # print("NMS time:\t{}".format(nms_time))

        tracker_start_time = get_current_ms()
        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        tracker_time = get_current_ms() - tracker_start_time
        # print("Tracker time:\t{}".format(tracker_time))

        postprocess_start_time = get_current_ms()

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            track_bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(track_bbox[0]), int(track_bbox[1])),
                          (int(track_bbox[2]), int(track_bbox[3])), (255, 255, 255), 1)

            if track.det_hist:
                det_curr = track.det_hist[-1]
                det_bbox = det_curr.to_tlbr()
                score = "%.2f" % round(det_curr.confidence, 2)
                cv2.rectangle(overlay, (int(det_bbox[0]), int(det_bbox[1])), (int(det_bbox[2]), int(det_bbox[3])),
                              track.color, 2)
                cv2.putText(overlay, score, (int(det_bbox[0]) + 5, int(det_bbox[3]) - 5), 0, 0.5, (255, 255, 255),
                            2)
                cv2.putText(overlay, str(track.track_id), (int(det_bbox[0]) + 5, int(det_bbox[1]) - 5), 0, 0.5,
                            (255, 255, 255), 2)

                # centroid = det_curr.get_centroid()

                # cv2.circle(overlay, (int(centroid[0]), int(centroid[1])), radius=3,
                #           color=(0, 0, 255), thickness=-1)

        alpha = 0.5  # Transparency factor.

        # Following line overlays transparent rectangle over the image
        frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)

        if showImg_flag:
            cv2.imshow('', frame)

        postprocess_time = get_current_ms() - postprocess_start_time
        # print("Postprocess time:\t{}".format(postprocess_time))

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("{}/{}/{}/{}/{}".format(preprocess_time, detection_time, nms_time, tracker_time, postprocess_time))
        print("FPS = %f" % fps)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #13
0
def main(yolo):
    # Definition of the parameters
    h = 0
    w = 0
    frame_index = -1
    fps = 0.0
    flows = OrderedDict()

    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = '/home/sohaibrabbani/Downloads/overhead_people_clips/baggageclaim.mp4'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov3.avi', fourcc,
                              video_capture.get(cv2.CAP_PROP_FPS), (w, h))
        # frame_index = -1

    fps_imutils = imutils.video.FPS().start()
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxs = yolo.detect_image(image)[0]
        confidence = yolo.detect_image(image)[1]

        features = encoder(frame, boxs)

        detections = [
            Detection(bbox, confidence, feature)
            for bbox, confidence, feature in zip(boxs, confidence, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 60:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

            # Initializing the flow list for saving the flow data of each person - Sohaib
            if track.track_id not in flows:
                flows[track.track_id] = OrderedDict()

            # Saving location of a person in a frame - Sohaib
            flows[track.track_id][frame_index + 1] = np.array(
                [int(bbox[0]), int(bbox[1])])

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0,
                        5e-3 * 130, (0, 255, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Calling the serialize function to save the pickle file - Sohaib
    serialize_flow_data(flows=flows,
                        height=h,
                        width=w,
                        file_path=file_path,
                        frame_count=video_capture.get(
                            cv2.CAP_PROP_FRAME_COUNT),
                        fps=video_capture.get(cv2.CAP_PROP_FPS))

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #14
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = '/media/wangsen/新加卷/MOF_data/citywalks/clips/PADUA/clip_000001.mp4'

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    frame_num = 0
    result = []
    ped_cnt = 0
    ped_index_store = []
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # for det in detections:
        #     bbox = det.to_tlbr()
        #     if show_detections and len(classes) > 0:
        #         det_cls = det.cls
        #         score = "%.2f" % (det.confidence * 100) + "%"
        #         cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
        #                     1e-3 * frame.shape[0], (0, 255, 0), 1)
        #         cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            if int(bbox[0]) < 0 or int(bbox[1]) < 0 or int(bbox[2]) < 0 or int(
                    bbox[3]) < 0:
                continue
            # 行人图像的裁剪
            crop_image = frame[int(bbox[1]):int(bbox[3]),
                               int(bbox[0]):int(bbox[2])]  # 裁剪
            crop_image = cv2.resize(crop_image, (128, 256))
            cv2.imwrite(
                './crop_image/frame_' + str(frame_num).zfill(4) + '_ped_' +
                str(track.track_id) + '.png', crop_image)

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 1)

            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

            w = int(int(bbox[2]) - int(bbox[0]))
            h = int(int(bbox[3]) - int(bbox[1]))
            tmp_result = []
            # filename
            tmp_result.append("clip_000001.mp4")
            # frame_num
            tmp_result.append(str(frame_num))
            # bb1~bb4
            tmp_result.append(int(bbox[0]))
            tmp_result.append(int(bbox[1]))
            tmp_result.append(int(bbox[2]))
            tmp_result.append(int(bbox[3]))
            # track
            tmp_result.append(str(track.track_id))
            # detection length
            tmp_result.append(str(0))
            # Height
            tmp_result.append(str(h))
            result.append(tmp_result)
        frame_num += 1

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    # 正式写入
    f = open('myvideo_yolo_detection.csv', 'w')
    csv_writer = csv.writer(f)
    # 写入头
    csv_writer.writerow([
        "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track",
        "detection_length", "Height"
    ])
    # 按照跟踪序号排序
    result.sort(key=lambda x: x[6])

    # Item handle: track
    for i in range(len(result)):
        if i == 0:
            pass
        else:
            # 如何filename相同,track序号相同,帧数连续则表示跟踪
            if result[i][6] == result[i - 1][6] and eval(result[i][1]) == eval(result[i - 1][1]) + 1 and result[i][0] == \
                    result[i - 1][0]:
                result[i][7] = str(eval(result[i - 1][7]) + 1)
            else:
                result[i][7] = '0'

    for i in range(len(result)):
        csv_writer.writerow(result[i])

    f.close()

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    model_par, valid_transform = model_init_par()
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                crop_img = image.crop(
                    [int(bbox[0]),
                     int(bbox[1]),
                     int(bbox[2]),
                     int(bbox[3])])
                #res_txt = demo_par(model_par, valid_transform, crop_img)

                #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1)

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                font = ImageFont.truetype(
                    '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf',
                    size=10)
                # positive_cnt = 1
                # for txt in res_txt:
                #     if 'personal' in txt:
                #         #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font)
                #         cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0,
                #                     1e-3 * frame.shape[0], (0, 255, 0), 1)
                #         positive_cnt += 1

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #16
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # Calculate cosine Distance Metric 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # Flags for process
    tracking = True # Set False if you only want to do detection
    writeVideo_flag = True # Set False if you don't want to write frames locally
    asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem

    # Video File Path
    file_path = '/mydrive/test.mp4'
    # Check if asyncVideo flag set to True
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_file = os.path.basename(file_path)[:-4]
        out = cv2.VideoWriter('./Output/' + output_file + "-prueba-test.avi", fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read() # Capture frames
        if ret != True:
             break

        t1 = time.time()
        
        # bgr to rgb frame conversion
        image = Image.fromarray(frame[...,::-1])  
        # YOLOv4 Detection
        boxes, confidence, classes = yolo.detect_image(image) 

        if tracking:
            # Encodes the frame and boxes for DeepSORT
            features = encoder(frame, boxes)
            # DeepSORT Detection
            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                          zip(boxes, confidence, classes, features)]
        else:
            # Only YOLOv4 Detection
            detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                          zip(boxes, confidence, classes)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                # Draw white bbox for DeepSORT
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            # Check the class for colored bbox
            if len(classes) > 0:
                cls = det.cls
                center_bbox = (int(bbox[2]), int(bbox[2]))
                if str(cls) == 'person':
                    # Draw Blue bbox for YOLOv4 person detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2)
                elif str(cls) == 'backpack' or 'handbag' or 'suitcase':
                    # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2)

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))
            cv2.putText(frame, "GPU: NVIDIA Tesla P100", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)
            cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)

            # draw the timestamp on the frame
            timestamp = datetime.datetime.now()
            ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S")
            cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)            

        #cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()
          
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
コード例 #17
0
class CameraSource(object):
  def __init__(self, cameraSource, height, output_file=None, startFrame=0,
               async_read=False, outputToServer=False, capture_size=None):
    if async_read:
      self.camera = VideoCaptureAsync(cameraSource)
    else:
      self.camera = cv2.VideoCapture(cameraSource)

    if capture_size is not None:
      print(capture_size)
      self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, capture_size[0])
      self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_size[1])

    if async_read:
      self.ORIGINAL_WIDTH = self.camera.width
      self.ORIGINAL_HEIGHT = self.camera.height
    else:
      self.ORIGINAL_WIDTH = int(self.camera.get(cv2.CAP_PROP_FRAME_WIDTH))
      self.ORIGINAL_HEIGHT = int(self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print('CameraSource')
    print('Requested capture size', capture_size)
    print('Actual capture size', self.ORIGINAL_WIDTH, self.ORIGINAL_HEIGHT)

    self.HEIGHT = height
    self.WIDTH = self.ORIGINAL_WIDTH * self.HEIGHT // self.ORIGINAL_HEIGHT
    self.WIDTH = self.WIDTH + self.WIDTH % 2  # Make it even.

    self.startFrame = startFrame
    self.nFrames = 0
    self.writer = VideoWriter(output_file)

    if async_read:
      self.camera.start()

    self.outputToServer = outputToServer
    if outputToServer:
      # https://robotpy.readthedocs.io/en/stable/vision/code.html
      pass
      #self.outputStream = CameraServer.getInstance().putVideo(
      #    'ProcessedVisionFrame', self.WIDTH, self.HEIGHT)


  def GetFrame(self):  
    # Processing on first call. 
    if self.nFrames == 0:
      # Skip some frames if requested.
      if self.startFrame > 0:
        skippedFrames = 0
        while True:
          ret, frame = self.camera.read()
          if not ret or frame is None:
            print('No more frames')
            return None
          skippedFrames += 1
          if skippedFrames >= self.startFrame:
            break
      # Start timer for first frame.
      self.startTime = time.time()

    # Get frame.
    frame = None
    frameTime = time.time() 
    if self.nFrames > 0 and self.nFrames % 50 == 0:
      print('FPS: ', self.nFrames / (frameTime - self.startTime))
    self.nFrames += 1
    ret, frame = self.camera.read()
    if ret and frame is not None:
      if frame.shape[0] != self.HEIGHT:
        frame = cv2.resize(frame, (self.WIDTH, self.HEIGHT))
    return frame


  def ImageSize(self):
    return (self.WIDTH, self.HEIGHT)


  def OutputFrameAndTestContinue(self, message, frame, height=None):
    self.writer.OutputFrame(frame)
    if self.outputToServer:
      self.outputStream.putFrame(frame)
    return ShowFrameAndTestContinue(message, frame, height)


  def __del__(self):
    self.camera.release()
    cv2.destroyAllWindows()
コード例 #18
0
def main():

    PATH_TO_CKPT_PERSON = 'models/faster_rcnn_restnet50.pb'

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = 200
    nms_max_overlap = 1.0
    yolo = YOLO()
    reid = PERSON_REID()
    frozen_person = FROZEN_GRAPH_INFERENCE(PATH_TO_CKPT_PERSON)

    # # Deep SORT
    # model_filename = 'model_data/mars-small128.pb'
    # encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    # file_path = 0
    if VIDEO_CAPTURE == 0 and asyncVideo_flag == True:
        video_capture = VideoCaptureAsync(file_path)
    elif VIDEO_CAPTURE == 1 and asyncVideo_flag == True:
        video_capture = myVideoCapture(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)
        im_width = int(video_capture.get(3))
        im_height = int(video_capture.get(4))

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_filename, fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    boxs = list()
    confidence = list()
    persons = list()
    frame_count = 0
    track_count = 0
    num_files = 0

    while True:
        t1 = time.time()
        ret, frame = video_capture.read()  # frame shape 640*480*3
        frame_org = frame.copy()
        if ret != True:
            break
        frame_count += 1
        # print('Frame count: {}'.format(frame_count))

        # Person detection using Frozen Graph
        persons = frozen_person.run_frozen_graph(frame, im_width, im_height)
        boxs = [[
            person['left'], person['top'], person['width'], person['height']
        ] for person in persons]
        confidence = [person['confidence'] for person in persons]
        cropped_persons = list(person['cropped'] for person in persons)

        # # Person detection using YOLO - Keras-converted model
        # image = Image.fromarray(frame[...,::-1])  # bgr to rgb
        # boxs = yolo.detect_image(image)[0]
        # confidence = yolo.detect_image(image)[1]
        # cropped_persons = [np.array(frame[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]) for box in boxs] #[x,y,w,h]

        # features = encoder(frame, boxs)
        if len(cropped_persons) > 0:
            features = reid.extract_feature_imgTensor(cropped_persons)
            # print(features.shape)
            detections = [
                Detection(bbox, confidence,
                          feature) for bbox, confidence, feature in zip(
                              boxs, confidence, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                            (0, 255, 0), 2)

                directory = os.path.join('output', str(track.track_id))
                if not os.path.exists(directory):
                    os.makedirs(directory)
                # file_count = len([name for name in os.listdir(directory+'/') if os.path.isfile(name)])
                file_count = sum(
                    [len(files) for root, dirs, files in os.walk(directory)])
                # print(file_count)

                if file_count == 0:
                    cv2.imwrite(
                        directory + '/' + str(file_count + 1) + '.jpg',
                        frame_org[int(bbox[1]):int(bbox[3]),
                                  int(bbox[0]):int(bbox[2])])
                elif file_count > 0 and track_count % 10 == 0:
                    cv2.imwrite(
                        directory + '/' + str(file_count + 1) + '.jpg',
                        frame_org[int(bbox[1]):int(bbox[3]),
                                  int(bbox[0]):int(bbox[2])])

                track_count += 1

            for det in detections:
                bbox = det.to_tlbr()
                score = "%.2f" % round(det.confidence * 100, 2)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])),
                            0, 5e-3 * 130, (0, 255, 0), 2)

        cv2.imshow('YOLO DeepSort', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2
        # print("FPS = %f"%(fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    # print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #19
0
ファイル: main.py プロジェクト: OguchiLab/Deep_Sort
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    width = 1280
    height = 720
    rfps = 10

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = args.tracking
    writeVideo_flag = args.writeVideo_flag
    asyncVideo_flag = args.asyncVideo_flag
    webcamera_flag = args.webcamera_flag
    ipcamera_flag = args.ipcamera_flag
    udp_flag = args.udp_flag

    full_cam_addr, key = sd.set_address(args.ipaddress, args.cam_ip,
                                        args.cam_cmd, args.key)
    cam_ip = full_cam_addr.replace(args.cam_cmd,
                                   "").replace("rtsp://*****:*****@", "")
    if args.jpegmode:
        full_cam_addr = full_cam_addr.replace("rtsp", "http")

    print(full_cam_addr)
    print(key)

    if asyncVideo_flag:
        print("load videofile")
        video_capture = VideoCaptureAsync(args.videofile)
    elif ipcamera_flag or args.jpegmode:
        print("load ipcamera")
        video_capture = cv2.VideoCapture(full_cam_addr)
        # width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        # height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
        # rfps = video_capture.get(cv2.CAP_PROP_FPS)
        print("fps:{}width:{}height:{}".format(rfps, width, height))
    elif webcamera_flag:
        print("load webcamera")
        video_capture = cv2.VideoCapture(0)
    else:
        print("load videofile")
        video_capture = cv2.VideoCapture(args.videofile)

    # video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    if udp_flag:
        HOST = ''
        PORT = 5000
        address = '192.168.2.255'
        sock = socket(AF_INET, SOCK_DGRAM)
        sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1)
        sock.bind((HOST, PORT))

    fps = 0.0

    i = 0

    if not args.maskoff:
        maskbgi = Image.new('RGB', (int(width), int(height)), (0, 0, 0))
        mask = Image.open(args.maskdir + 'mask' + args.ipaddress[-1] +
                          '.png').convert("L").resize(size=(int(width),
                                                            int(height)),
                                                      resample=Image.NEAREST)

    while True:
        nowtime = datetime.datetime.now(
            timezone('Asia/Tokyo')).strftime('%Y-%m-%d %H:%M:%S.%f%z')
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            print('cant read')
            video_capture = cv2.VideoCapture(full_cam_addr)
            continue

        t1 = time.time()

        try:
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        except TypeError:
            video_capture = cv2.VideoCapture(full_cam_addr)
            continue
        image = Image.composite(maskbgi, image, mask)
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        car_data = {}
        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                car_data[str(track.track_id)] = [
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3])
                ]
            sd.send_amqp(
                sd.create_jsondata(cam_ip, nowtime,
                                   time.time() - t1, car_data, args.jsonfile,
                                   args.json_path, i), key, args.AMQPHost)
            i += 1

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        ### 読み飛ばし処理を追加 ###
        if not args.jsonfile and args.skip:
            if fps <= 10:
                for _i in range(int(math.ceil(rfps / fps)) - 1):
                    ret, frame = video_capture.read()

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #20
0
ファイル: doorman.py プロジェクト: alievilya/yolov4-doorman
def main(yolo):
    # Definition of the parameters
    with open("cfg/detection_tracker_cfg.json") as detection_config:
        detect_config = json.load(detection_config)
    with open("cfg/doors_info.json") as doors_config:
        doors_config = json.load(doors_config)
    with open("cfg/around_doors_info.json") as around_doors_config:
        around_doors_config = json.load(around_doors_config)
    model_filename = detect_config["tracking_model"]
    input_folder, output_folder = detect_config["input_folder"], detect_config[
        "output_folder"]
    meta_folder = detect_config["meta_folder"]
    output_format = detect_config["output_format"]

    # Deep SORT
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    show_detections = True
    asyncVideo_flag = False

    check_gpu()

    # from here should start loop to process videos from folder
    # for video_name in os.listdir(input_folder):

    HOST = "localhost"
    PORT = 8075
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.bind((HOST, PORT))
        sock.listen()
        conn, addr = sock.accept()
        with conn:
            print('Connected by', addr)
            #  loop over all videos
            while True:
                data = conn.recv(1000)
                video_motion_list = data.decode("utf-8").split(';')
                videos_que = deque()
                for video_motion in video_motion_list:
                    videos_que.append(video_motion)
                video_name = videos_que.popleft()

                if not video_name.endswith(output_format):
                    continue

                print('elements in que', len(videos_que))
                print("opening video: {}".format(video_name))
                full_video_path = join(input_folder, video_name)
                # full_video_path = "rtsp://*****:*****@192.168.1.52:554/1/h264major"

                meta_name = meta_folder + video_name[:-4] + ".json"
                with open(meta_name) as meta_config_json:
                    meta_config = json.load(meta_config_json)
                camera_id = meta_config["camera_id"]
                if not os.path.exists(output_folder + str(camera_id)):
                    os.mkdir(output_folder + str(camera_id))

                output_name = output_folder + camera_id + '/out_' + video_name
                counter = Counter(counter_in=0, counter_out=0, track_id=0)
                tracker = Tracker(metric)

                if asyncVideo_flag:
                    video_capture = VideoCaptureAsync(full_video_path)
                    video_capture.start()
                    w = int(video_capture.cap.get(3))
                    h = int(video_capture.cap.get(4))
                else:
                    video_capture = cv2.VideoCapture(full_video_path)
                    w = int(video_capture.get(3))
                    h = int(video_capture.get(4))

                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_name, fourcc, 25, (w, h))

                door_array = doors_config["{}".format(camera_id)]
                around_door_array = tuple(
                    around_doors_config["{}".format(camera_id)])
                rect_door = Rectangle(door_array[0], door_array[1],
                                      door_array[2], door_array[3])
                border_door = door_array[3]
                #  loop over video
                save_video_flag = False
                while True:
                    fps_imutils = imutils.video.FPS().start()
                    ret, frame = video_capture.read()
                    if not ret:
                        with open('videos_saved/log_results.txt', 'a') as log:
                            log.write(
                                'processed (ret). Time: {}, camera id: {}\n'.
                                format(video_name, camera_id))
                        break
                    t1 = time.time()
                    # lost_ids = counter.return_lost_ids()
                    image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
                    # image = image.crop(around_door_array)
                    boxes, confidence, classes = yolo.detect_image(image)

                    features = encoder(frame, boxes)
                    detections = [
                        Detection(bbox, confidence, cls, feature)
                        for bbox, confidence, cls, feature in zip(
                            boxes, confidence, classes, features)
                    ]

                    # Run non-maxima suppression.
                    boxes = np.array([d.tlwh for d in detections])
                    scores = np.array([d.confidence for d in detections])
                    classes = np.array([d.cls for d in detections])
                    indices = preprocessing.non_max_suppression(
                        boxes, nms_max_overlap, scores)
                    detections = [detections[i] for i in indices]

                    # Call the tracker
                    tracker.predict()
                    tracker.update(detections)

                    cv2.rectangle(frame,
                                  (int(door_array[0]), int(door_array[1])),
                                  (int(door_array[2]), int(door_array[3])),
                                  (23, 158, 21), 3)
                    if len(detections) != 0:
                        counter.someone_inframe()
                        for det in detections:
                            bbox = det.to_tlbr()
                            if show_detections and len(classes) > 0:
                                score = "%.2f" % (det.confidence * 100) + "%"
                                cv2.rectangle(frame,
                                              (int(bbox[0]), int(bbox[1])),
                                              (int(bbox[2]), int(bbox[3])),
                                              (255, 0, 0), 3)
                    else:
                        if counter.need_to_clear():
                            counter.clear_all()
                    # identities = [track.track_id for track in tracker.tracks]
                    # counter.update_identities(identities)

                    for track in tracker.tracks:
                        if not track.is_confirmed(
                        ) or track.time_since_update > 1:
                            continue
                        bbox = track.to_tlbr()

                        if track.track_id not in counter.people_init or counter.people_init[
                                track.track_id] == 0:
                            # counter.obj_initialized(track.track_id)
                            ratio_init = find_ratio_ofbboxes(
                                bbox=bbox, rect_compare=rect_door)

                            if ratio_init > 0:
                                if ratio_init >= 0.5:  # and bbox[3] < door_array[3]:
                                    counter.people_init[
                                        track.track_id] = 2  # man in the door
                                elif ratio_init < 0.5:  # and bbox[3] > door_array[3]:  # initialized in the outside
                                    counter.people_init[track.track_id] = 1
                            else:
                                counter.people_init[track.track_id] = 1
                            counter.people_bbox[track.track_id] = bbox
                        counter.cur_bbox[track.track_id] = bbox

                        adc = "%.2f" % (track.adc * 100
                                        ) + "%"  # Average detection confidence
                        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                      (int(bbox[2]), int(bbox[3])),
                                      (255, 255, 255), 2)
                        cv2.putText(frame, "ID: " + str(track.track_id),
                                    (int(bbox[0]), int(bbox[1]) + 50), 0,
                                    1e-3 * frame.shape[0], (0, 255, 0), 3)

                        if not show_detections:
                            track_cls = track.cls
                            cv2.putText(frame, str(track_cls),
                                        (int(bbox[0]), int(bbox[3])), 0,
                                        1e-3 * frame.shape[0], (0, 255, 0), 3)
                            cv2.putText(frame, 'ADC: ' + adc,
                                        (int(bbox[0]),
                                         int(bbox[3] + 2e-2 * frame.shape[1])),
                                        0, 1e-3 * frame.shape[0], (0, 255, 0),
                                        3)
                        # if track.time_since_update >= 15:
                        #     id_get_lost.append(track.track_id)
                    id_get_lost = [
                        track.track_id for track in tracker.tracks
                        if track.time_since_update >= 15
                    ]

                    for val in counter.people_init.keys():
                        ratio = 0
                        cur_c = find_centroid(counter.cur_bbox[val])
                        init_c = find_centroid(counter.people_bbox[val])
                        if val in id_get_lost and counter.people_init[
                                val] != -1:
                            ratio = find_ratio_ofbboxes(
                                bbox=counter.cur_bbox[val],
                                rect_compare=rect_door)
                            if counter.people_init[val] == 2 \
                                    and ratio < 0.6:  # and counter.people_bbox[val][3] > border_door \
                                counter.get_out()
                                save_video_flag = True
                                print(counter.people_init[val], ratio)
                            elif counter.people_init[val] == 1 \
                                    and ratio >= 0.6:
                                counter.get_in()
                                save_video_flag = True
                                print(counter.people_init[val], ratio)
                            counter.people_init[val] = -1

                    ins, outs = counter.return_counter()
                    cv2.rectangle(frame, (frame.shape[1] - 150, 0),
                                  (frame.shape[1], 50), (0, 0, 0), -1, 8)
                    cv2.putText(frame, "in: {}, out: {} ".format(ins, outs),
                                (frame.shape[1] - 140, 20), 0,
                                1e-3 * frame.shape[0], (255, 255, 255), 3)
                    out.write(frame)
                    fps_imutils.update()
                    if not asyncVideo_flag:
                        pass
                        # fps = (1. / (time.time() - t1))
                        # print("FPS = %f" % fps)

                        # if len(fpeses) < 15:
                        #     fpeses.append(round(fps, 2))
                        #
                        # elif len(fpeses) == 15:
                        #     # fps = round(np.median(np.array(fpeses)))
                        #     median_fps = float(np.median(np.array(fpeses)))
                        #     fps = round(median_fps, 1)
                        #     print('max fps: ', fps)
                        #     # fps = 20
                        #     counter.fps = fps
                        #     fpeses.append(fps)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

                if asyncVideo_flag:
                    video_capture.stop()
                    del video_capture
                else:
                    video_capture.release()

                if save_video_flag:
                    with open('videos_saved/log_results.txt', 'a') as log:
                        log.write(
                            'detected!!! time: {}, camera id: {}, detected move in: {}, out: {}\n'
                            .format(video_name, camera_id, ins, outs))
                        log.write('video written {}\n\n'.format(output_name))
                    out.release()
                else:
                    if out.isOpened():
                        out.release()
                        if os.path.isfile(output_name):
                            os.remove(output_name)

                if os.path.isfile(full_video_path):
                    os.remove(full_video_path)
                if os.path.isfile(meta_name):
                    os.remove(meta_name)
                save_video_flag = False
                cv2.destroyAllWindows()
コード例 #21
0
ファイル: 30frames_draw.py プロジェクト: haiyang9zhang/csgo
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    np.random.seed(42)
    COLORS = np.random.randint(0, 255, size=(200, 3),
                               dtype="uint8")

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'testvideo2.avi'

    # load the COCO class labels our YOLO model was trained on    加载我们的YOLO模型经过培训的COCO类标签
    labelsPath = os.path.sep.join(["model_data", "coco_classes.txt"])
    LABELS = open(labelsPath).read().strip().split("\n")
    # print(str(len(LABELS))+"load successfully")
    # print(LABELS)
    class_nums = np.zeros(80)
    counter = np.zeros(80)
    track_id_max = -1
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('testvideo2_out.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    img_num = 0
    frame_cnt = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        frame_copy = frame.copy()
        if ret != True or frame_cnt > 30:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        boxs, confidence, class_names = yolo.detect_image(image)

        features = encoder(frame, boxs)

        detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in
                      zip(boxs, confidence, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        # print("print indices!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        # print(indices)

        # for i in indices:
        #    print(str(i)+class_names[i][0])

        # print(indices.shape)

        detections = [detections[i] for i in indices]
        class_names = [class_names[i] for i in indices]
        print("class_name:" + str(class_names))

        class_IDs = []
        current_nums = np.zeros(80)
        # class_IDs=[]
        for class_name in class_names:
            for i, LABEL in enumerate(LABELS):
                if class_name[0] == LABEL:
                    current_nums[i] += 1
                    class_IDs.append(i)
        # print("person:"+str(current_nums[0]))

        cv2.putText(frame, 'Current', (20, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 255, 255), 2)
        cv2.putText(frame, 'Total', (180, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 255), 2)

        x1 = 20
        y1 = 100
        for i, cl in enumerate(current_nums):
            if cl > 0:
                cv2.putText(frame, LABELS[i] + "=" + str(cl), (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255),
                            1)  # 当前帧各类别数量
                y1 = y1 + 20

        for i, det in enumerate(detections):
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[1]) + 10), cv2.FONT_HERSHEY_DUPLEX, 0.3,
                        (255, 255, 255), 1)
            # cv2.putText(frame, class_names[i],(int(bbox[0]), int(bbox[1])-5), 0, 5e-3 * 130, (0, 255, 0), 2)
            # cv2.putText(frame, class_names[i], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 130,(0, 255, 255), 2)

        print("Total of detections:" + str(len(detections)))
        # Call the tracker
        tracker.predict()
        tracker.update(detections, class_IDs)

        # for i, cl in enumerate(class_nums):
        #     if cl > 0:
        #         print("add: " + LABELS[i] + str(cl - class_last_nums[i]))

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                print("not track.is_confirmed() or track.time_since_update > 1: " + str(track.track_id))
                continue
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[track.class_id % len(COLORS)]]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)

            cv2.putText(frame, str(track.track_id) + ' ' + LABELS[track.class_id], (int(bbox[0]), int(bbox[1]) - 5),
                        cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)

            if track.track_id > track_id_max:
                counter[track.class_id] = counter[track.class_id] + 1
                track_id_max = track.track_id
                dest = frame_copy[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
                pdest = "./output_image/" + str(LABELS[track.class_id]) + str(int(counter[track.class_id])) + ".png"
                cv2.imwrite(pdest, dest)
                img_num += 1
            # class_nums[track.class_id].append(track.track_id)
            # print(str(LABELS[track.class_id])+":"+class_nums[track.class_id])
            # print("track.id: " + str(track.track_id))
            # print("track.class_name: " + str(LABELS[track.class_id]))

        print(str(counter))
        print("--------------------------该帧输出完毕!--------------------------------------")

        # cv2.putText(frame, 'Total', (200, 60), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 0, 0), 2)
        # x2 = 200
        # y2 = 100
        # for i, cl in enumerate(class_nums):
        #     if cl > 0:
        #         cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 2)
        #         y2 = y2 + 20

        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3)  # !!!!!!!!!输出FPS
        x2 = 180
        y2 = 100
        for i, cl in enumerate(counter):
            if cl > 0:
                cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1)
                y2 = y2 + 20
        # cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2

        # print("FPS = %f"%(fps))

        frame_cnt += 1
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'IMG_3326.MOV'
    dfObj = pd.DataFrame(
        columns=['frame_num', 'track', 'cx', 'cy', 'w', 'h', 'track_temp'])
    dfObjDTP = pd.DataFrame(columns=[
        'filename', 'frame_num', 'bb1', 'bb2', 'bb3', 'bb4', 'track',
        'track_temp', 'Height'
    ])

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker

            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                #Ini buat cropping gambar per frame

                #cropped_image = frame[int(bbox[1]):int(bbox[1])+(int(bbox[3])-int(bbox[1])),int(bbox[0]):int(bbox[0])+(int(bbox[2])-int(bbox[0]))]
                cropped_image = frame[int(bbox[1]):int(bbox[1]) + 256,
                                      int(bbox[0]):int(bbox[0]) + 128]
                # cropped_image = frame[2:5,6:10]

                # Matiin atau comment biar ga ada box putih
                # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                #
                # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                #             1.5e-3 * frame.shape[0], (0, 255, 0), 1)

                # print(cropped_image)
                dirname = "output_crop/{}".format(track.track_id)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)

                if (cropped_image.size == 0):
                    continue
                else:
                    writeStatus = cv2.imwrite(
                        "output_crop/{}/frame_{}.png".format(
                            track.track_id, frame_index), cropped_image)
                    print("output_crop/{}/frame_{}.png".format(
                        track.track_id, frame_index))

                # Write CSV
                dfObj = dfObj.append(pd.Series([
                    frame_index, track.track_id,
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]) - int(bbox[0]),
                    int(bbox[3]) - int(bbox[1]), track.time_since_update
                ],
                                               index=dfObj.columns),
                                     ignore_index=True)

                dfObjDTP = dfObjDTP.append(pd.Series([
                    file_path, frame_index,
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3]), track.track_id, track.time_since_update,
                    int(bbox[3]) - int(bbox[1])
                ],
                                                     index=dfObjDTP.columns),
                                           ignore_index=True)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"

            #Matiin atau comment biar ga ada box putih di crop image
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

            # if len(classes) > 0:
            #     cls = det.cls
            #     cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
            #                 1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    dfObj = dfObj.sort_values(["track", "frame_num"], ascending=(True, True))
    dfObj.to_csv(r'result_temp.csv', index=False)
    dfObjDTP = dfObjDTP.sort_values(["track", "frame_num"],
                                    ascending=(True, True))
    dfObjDTP.to_csv(r'result_temp_dtp.csv', index=False)
    convert_to_final()
    cv2.destroyAllWindows()
コード例 #23
0
    def run_video(self):
        # init for classify module
        clf_model = None
        clf_labels = None

        encoder = gdet.create_box_encoder(self.cfg.DEEPSORT.MODEL,
                                          batch_size=4)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", self.cfg.DEEPSORT.MAX_COSINE_DISTANCE,
            self.cfg.DEEPSORT.NN_BUDGET)
        tracker = Tracker(self.cfg, metric)

        tracking = True
        writeVideo_flag = self.args.out_video
        asyncVideo_flag = False

        list_classes = ['loai_1', 'loai_2', 'loai_3', 'loai_4']
        arr_cnt_class = np.zeros((len(list_classes), self.number_MOI),
                                 dtype=int)

        fps = 0.0
        fps_imutils = imutils.video.FPS().start()
        counted_obj = []
        count_frame = 0
        objs_dict = {}

        if asyncVideo_flag:
            video_capture = VideoCaptureAsync(self.video_path)
        else:
            video_capture = cv2.VideoCapture(self.video_path)

        if asyncVideo_flag:
            video_capture.start()

        if writeVideo_flag:
            if asyncVideo_flag:
                w = int(video_capture.cap.get(3))
                h = int(video_capture.cap.get(4))
            else:
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_camname = 'output' + '_' + self.video_name + '.avi'
            out = cv2.VideoWriter(output_camname, fourcc, 10, (1280, 720))
            frame_index = -1

        while True:
            count_frame += 1
            print('frame processing .......: ', count_frame)
            ret, frame = video_capture.read()
            if ret != True:
                break

            frame_info = self.video_name + "_" + str(count_frame - 1)

            t1 = time.time()
            # frame = cv2.flip(frame, -1)

            _frame = self.process(frame, count_frame, frame_info, encoder,
                                  tracking, tracker, objs_dict, counted_obj,
                                  arr_cnt_class, clf_model, clf_labels)

            if writeVideo_flag:  # and not asyncVideo_flag:
                # save a frame
                out.write(_frame)
                frame_index = frame_index + 1

            # visualize
            if self.args.visualize:
                _frame = imutils.resize(_frame, width=1000)
                cv2.imshow("Final result", _frame)

            fps_imutils.update()

            if not asyncVideo_flag:
                fps = (fps + (1. / (time.time() - t1))) / 2
                print("FPS = %f" % (fps))

            # Press Q to stop!
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        fps_imutils.stop()
        print('imutils FPS: {}'.format(fps_imutils.fps()))

        if asyncVideo_flag:
            video_capture.stop()
        else:
            video_capture.release()

        if writeVideo_flag:
            out.release()

        cv2.destroyAllWindows()
コード例 #24
0
    def run_video(self):
        # init for classify module
        clf_model = None
        clf_labels = None
        if self.use_classify:
            clf_model, clf_labels = mobileNet.load_model_clf(self.cfg)

        encoder = gdet.create_box_encoder(self.cfg.DEEPSORT.MODEL,
                                          batch_size=4)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", self.cfg.DEEPSORT.MAX_COSINE_DISTANCE,
            self.cfg.DEEPSORT.NN_BUDGET)
        tracker = Tracker(metric)

        tracking = True
        writeVideo_flag = True
        asyncVideo_flag = False

        list_classes = ['loai_1', 'loai_2', 'loai_3', 'loai_4']
        arr_cnt_class = np.zeros((len(list_classes), self.number_MOI),
                                 dtype=int)

        fps = 0.0
        fps_imutils = imutils.video.FPS().start()
        counted_obj = []
        count_frame = 0
        objs_dict = {}

        # file_path = 'data/demo.MOV'
        if asyncVideo_flag:
            video_capture = VideoCaptureAsync(self.video_path)
        else:
            video_capture = cv2.VideoCapture(self.video_path)

        if asyncVideo_flag:
            video_capture.start()

        if writeVideo_flag:
            if asyncVideo_flag:
                w = int(video_capture.cap.get(3))
                h = int(video_capture.cap.get(4))
            else:
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
            frame_index = -1

        while True:
            count_frame += 1
            ret, frame = video_capture.read()
            if ret != True:
                break

            t1 = time.time()
            # frame = cv2.flip(frame, -1)

            _frame = frame
            _frame = MOI.config_cam(_frame, self.cfg)

            # draw board
            ROI_board = np.zeros((150, 170, 3), np.int)
            _frame[0:150, 0:170] = ROI_board
            _frame, list_col = init_board(_frame, self.number_MOI)

            _frame_height, _frame_width = _frame.shape[:2]
            cropped_frame = frame
            # cv2.rectangle(_frame, (int(frame_width*0), int(_frame_height*0.1)), (int(_frame_width*0.98), int(_frame_height*0.98)), (255, 0, 0), 2)

            print("[INFO] Detecting.....")
            detections, detections_in_ROI = self.run_detection(
                cropped_frame, encoder, tracking, count_frame)
            print("[INFO] Tracking....")
            _, objs_dict = self.draw_tracking(cropped_frame, tracker, tracking,
                                              detections_in_ROI, count_frame,
                                              objs_dict)
            print("[INFO] Counting....")
            _frame, arr_cnt_class, vehicles_detection_list = self.counting(count_frame, cropped_frame, _frame, \
                                                                            objs_dict, counted_obj,
                                                                            arr_cnt_class, clf_model, clf_labels)
            # delete counted id
            for track in tracker.tracks:
                if int(track.track_id) in counted_obj:
                    track.delete()

            # write result to txt
            with open(self.result_filename, 'a+') as result_file:
                for frame_id, movement_id, vehicle_class_id in vehicles_detection_list:
                    result_file.write('{} {} {} {}\n'.format(
                        self.video_name, frame_id, movement_id,
                        vehicle_class_id))

            # write number to scoreboard
            _frame = write_board(_frame, arr_cnt_class, list_col,
                                 self.number_MOI)

            # visualize
            if self.args.visualize:
                _frame = imutils.resize(_frame, width=1000)
                cv2.imshow("Final result", _frame)

            if writeVideo_flag:  # and not asyncVideo_flag:
                # save a frame
                out.write(_frame)
                frame_index = frame_index + 1

            fps_imutils.update()

            if not asyncVideo_flag:
                fps = (fps + (1. / (time.time() - t1))) / 2
                print("FPS = %f" % (fps))

            # Press Q to stop!
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        fps_imutils.stop()
        print('imutils FPS: {}'.format(fps_imutils.fps()))

        if asyncVideo_flag:
            video_capture.stop()
        else:
            video_capture.release()

        if writeVideo_flag:
            out.release()

        cv2.destroyAllWindows()