def main(args):
    logger.info('Start Tracking...')

    ctx = mx.gpu(0) if args.gpu else mx.cpu()
    fps = max(0, min(BASE_FPS, args.fps))
    net = model_zoo.get_model(args.network, pretrained=True, ctx=ctx)
    net.reset_class(classes=['person'], reuse_weights=['person'])

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # feature extractor for deepsort re-id
    encoder = gdet.BoxEncoder()

    metric = nn_matching.NearestNeighborDistanceMetric('cosine',
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    capture = cv2.VideoCapture(args.src)
    frame_interval = BASE_FPS // fps if fps > 0 else 0
    frame_index = 0
    while True:
        ret, frame = capture.read()
        if ret != True:
            break

        if 0 < fps and frame_index % frame_interval != 0:
            frame_index += 1
            continue

        x, img = gcv.data.transforms.presets.yolo.transform_test(
            mx.nd.array(frame).astype('uint8'),
            short=args.short,
        )

        class_IDs, det_scores, det_boxes = net(x.as_in_context(ctx))

        boxs = []
        person = mx.nd.array([0])
        score_threshold = mx.nd.array([0.5])
        for i, class_ID in enumerate(class_IDs[0]):
            if class_ID == person and det_scores[0][i] >= score_threshold:
                boxs.append(det_boxes[0][i].asnumpy())

        if boxs:
            features = encoder(img, boxs)
        else:
            features = np.array([])

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        frame_index += 1

        # store original scene
        cv2.imwrite(os.path.join(args.out_dir, f'{frame_index}.jpg'), img)

        show_img = img.copy()
        # check missed
        for track in tracker.tracks:
            bbox = [max(0, int(x)) for x in track.to_tlbr()]
            if not track.is_confirmed() or track.time_since_update > 1:
                if 2 <= track.time_since_update < 10:
                    try:
                        cv2.imwrite(
                            os.path.join(
                                args.out_dir,
                                f'missed-{frame_index}-{track.track_id}.jpg'),
                            img,
                        )
                    except:
                        traceback.print_exc()
                logger.info('Skipped by time_since_update')
                continue

            logger.info(f'Frame #{frame_index} - Id: {track.track_id}')
            cv2.rectangle(show_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                          (255, 255, 255), 2)
            cv2.putText(show_img, str(track.track_id), (bbox[0], bbox[1] + 30),
                        0, 5e-3 * 200, (0, 255, 0), 2)

        # show image
        cv2.imwrite(os.path.join(args.out_dir, f'anno-{frame_index}.jpg'),
                    show_img)
        cv2.imshow('', show_img)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    logger.info(
        f'Missed obj: {tracker.missed_obj}, Missed frame: {tracker.missed_frame}'
    )

    capture.release()
    cv2.destroyAllWindows()
Esempio n. 2
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    #video_path ="C:/Users/Rashid Ali/Desktop/Person counting/V05.mp4"
    writeVideo_flag = True

    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        video_width = int(video_capture.get(3))
        video_height = int(video_capture.get(4))
        video_fps = int(video_capture.get(5))
        video_size = (int(video_width), int(video_height))
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        out = cv2.VideoWriter('output_V09.mp4', fourcc, video_fps, video_size)
        list_file = open('detection_v2 .txt', 'w')
        frame_index = -1

    fps = 0.0

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()
        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        '''
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)
        
        '''

        person_count = 0
        count1 = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            person_count = person_count + 1
            track_id = '{} {:.1f}'.format('Track_ID', track.track_id)
            count1 = '{} {:.1f}'.format('Total Persons Count', person_count)
            bbox = track.to_tlbr()

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1])), 0,
                        5e-3 * 100, (0, 255, 0), 2)

        cv2.putText(frame, str(count1), (20, 50), 0, 5e-3 * 100, (247, 7, 7),
                    2)
        cv2.putText(frame,
                    '{:.2f}ms'.format((time.time() - t1) * 1000), (20, 20),
                    fontFace=cv2.FONT_HERSHEY_COMPLEX,
                    fontScale=0.5,
                    color=(247, 7, 7),
                    thickness=1)
        cv2.imshow('Detections Window', frame)
        '''
        #cv2.namedWindow("Detections Window", cv2.WINDOW_AUTOSIZE)
        #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 1)
            #cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, 
            #            fontScale=0.5, color=(247, 7, 7), thickness=1)
        person_count=0
        count1=0
        for det in detections:
            person_count = person_count+1
            count = '{} {:.1f}'.format('Count', person_count)
            count1 = '{} {:.1f}'.format('Total Count', person_count)
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 2)
            cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, 
                        fontScale=0.5, color=(247, 7, 7), thickness=2)
        '''

        if writeVideo_flag:
            # save a frame
            print("Writing detections in file")
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write(str(person_count) + '\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Esempio n. 4
0
def Display():
    print("Start Displaying")
    yolo = YOLO()
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    writeVideo_flag = True
    w = 768
    h = 432
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
    list_file = open('detection.txt', 'w')
    frame_index = -1

    fps = 0.0
    max_boxs = 0
    person_track = {}
    yolo2 = YOLO2()
    while True:
        if q.empty() != True:
            #读取打卡信息
            face = []
            cur1 = conn.cursor()  # 获取一个游标
            sql1 = "select * from worker"
            cur1.execute(sql1)
            data = cur1.fetchall()
            for d in data:
                # 注意int类型需要使用str函数转义
                name = str(d[1]) + '_' + d[2]
                face.append(name)
            cur1.close()  # 关闭游标
            #获取队列帧
            frame = q.get()
            t1 = time.time()

            #进行安全措施检测
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            frame, wear = yolo2.detect_image(img)
            frame = np.array(frame)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # 获取警戒线
            #cv2.line(frame, (132,368), (229, 368), (0, 255, 255), 3)
            cv2.line(frame, (275,360), (378, 360), (0, 255, 255), 1)
            transboundaryline = t.line_detect_possible_demo(frame)
            #yolo目标检测
            boxs = yolo.detect_image(image)
            features = encoder(frame, boxs)
            # score to 1.0 here).
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            if len(boxs) > max_boxs:
                max_boxs = len(boxs)

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if max_boxs < track.track_id:
                    tracker.tracks.remove(track)
                    tracker._next_id = max_boxs + 1
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2)
                PointY = bbox[3]

                if track.track_id not in person_track:
                    track2 = copy.deepcopy(track)
                    person_track[track.track_id] = track2

                else:
                    track2 = copy.deepcopy(track)
                    bbox2 = person_track[track.track_id].to_tlbr()
                    PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2)
                    PointY2 = bbox2[3]
                    distance = math.sqrt(pow(PointX - PointX2, 2) + pow(PointY - PointY2, 2))
                    #修正
                    if distance < 120:
                        person_track[track.track_id] = track2
                    else:
                        # print('last',track.track_id)
                        dis = {}
                        for key in person_track:
                            bbox3 = person_track[key].to_tlbr()
                            PointX3 = bbox3[0] + ((bbox3[2] - bbox3[0]) / 2)
                            PointY3 = bbox3[3]
                            d = math.sqrt(pow(PointX3 - PointX, 2) + pow(PointY3 - PointY, 2))
                            dis[key] = d
                        dis = sorted(dis.items(), key=operator.itemgetter(1), reverse=False)
                        track2.track_id = dis[0][0]
                        person_track[dis[0][0]] = track2
                        tracker.tracks.remove(track)
                        tracker.tracks.append(person_track[track.track_id])

                # 写入class

                try:
                    box_title = face[track2.track_id - 1]
                except Exception as e:
                    box_title = str(track2.track_id) + "_" + "unknow"
                if box_title not in workers:
                    wid = box_title.split('_')[0]
                    localtime = time.asctime(time.localtime(time.time()))
                    workers[box_title] = wk.Worker()
                    workers[box_title].set(box_title, localtime, (int(PointX), int(PointY)))
                    cur2 = conn.cursor()  # 获取一个游标
                    sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'"
                    cur2.execute(sql2)
                    cur2.close()  # 关闭游标
                else:
                    localtime = time.asctime(time.localtime(time.time()))
                    yoloPoint = (int(PointX), int(PointY))
                    wear_dic = {}
                    workers[box_title].current_point = yoloPoint
                    workers[box_title].track_point.append(workers[box_title].current_point)
                    mytrack = str(workers[box_title].track_point)
                    wid = box_title.split('_')[0]
                    # 卡尔曼滤波预测
                    if wid not in utils.KalmanNmae:
                        utils.myKalman(wid)
                    if wid not in utils.lmp:
                        utils.setLMP(wid)
                    cpx, cpy = utils.predict(workers[box_title].current_point[0], workers[box_title].current_point[1], wid)

                    if cpx[0] == 0.0 or cpy[0] == 0.0:
                        cpx[0] = workers[box_title].current_point[0]
                        cpy[0] = workers[box_title].current_point[1]
                    workers[box_title].next_point = (int(cpx), int(cpy))

                    cur3 = conn.cursor()  # 获取一个游标
                    sql3 = "UPDATE worker SET current_point= '" + str(workers[box_title].current_point) + "' ,track_point = '" + mytrack + "',next_point = '" + str(workers[box_title].next_point) + "' WHERE worker_id= '" + wid + "'"
                    cur3.execute(sql3)
                    cur3.close()
                    # 写入安全措施情况
                    if len(wear) > 0:
                        for w in wear:
                            wear_dis = int(math.sqrt(pow(w[0] - yoloPoint[0], 2) + pow(w[1] - yoloPoint[1], 2)))
                            wear_dic[wear_dis] = w
                        wear_dic = sorted(wear_dic.items(), key=operator.itemgetter(0), reverse=False)

                        if wear_dic[0][0] < 120:
                            cur4 = conn.cursor()  # 获取一个游标

                            if wear[wear_dic[0][1]] == 1:
                                if len(workers[box_title].wear['no helmet']) == 0:

                                    workers[box_title].wear['no helmet'].append(localtime)
                                    sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                    cur4.execute(sql)
                                    cur4.close()  # 关闭游标

                                else:
                                    print(box_title,workers[box_title].wear['no helmet'])
                                    if localtime not in workers[box_title].wear['no helmet']:
                                        workers[box_title].wear['no helmet'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标


                            elif wear[wear_dic[0][1]] == 2:
                                if len(workers[box_title].wear['no work cloths']) == 0:
                                    workers[box_title].wear['no work cloths'].append(localtime)
                                    sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                    cur4.execute(sql)
                                    cur4.close()  # 关闭游标
                                else:
                                    if localtime not in workers[box_title].wear['no work cloths']:
                                        workers[box_title].wear['no work cloths'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标
                            elif wear[wear_dic[0][1]] == 3:
                                if len(workers[box_title].wear['unsafe wear']) == 0:
                                    workers[box_title].wear['unsafe wear'].append(localtime)
                                    sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                    cur4.execute(sql)
                                    cur4.close()  # 关闭游标
                                else:
                                    if localtime not in workers[box_title].wear['unsafe wear']:
                                        workers[box_title].wear['unsafe wear'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标

                    # 写入越线情况
                    if len(workers[box_title].track_point) > 4:

                        for i in range(len(transboundaryline)):
                            p1 = (transboundaryline[i][0], transboundaryline[i][1])
                            p2 = (transboundaryline[i][2], transboundaryline[i][3])
                            p3 = workers[box_title].track_point[-2]
                            p4 = workers[box_title].track_point[-1]
                            a = t.IsIntersec(p1, p2, p3, p4)
                            if a == '有交点':
                                cur5 = conn.cursor()  # 获取一个游标
                                cur6 = conn.cursor()  # 获取一个游标
                                cur5.execute(
                                    "select time from transboundary where worker_id = '" + wid + "' ")

                                qurrytime = cur5.fetchone()
                                cur5.close()  # 关闭游标
                                if qurrytime == None:
                                    print('越线')
                                    sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                    cur6.execute(sql)
                                    cur6.close()  # 关闭游标
                                else:
                                    temp1 = 0
                                    for qt in qurrytime:
                                        if qt == localtime:
                                            temp1 = 1
                                    if temp1 == 0:
                                        print('越线')
                                        sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                        cur6.execute(sql)
                                        cur6.close()  # 关闭游标
                    if len(workers[box_title].track_point) >= 20:
                        workers[box_title].previous_point = workers[box_title].track_point[-5]
                conn.commit()
                try:
                    cv2.putText(frame, face[track2.track_id - 1], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0),
                                2)
                except Exception as e:
                    cv2.putText(frame, "unknow", (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                                (0, 255, 0), 2)

            cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(
                        boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Esempio n. 5
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = False
    asyncVideo_flag = False

    file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Esempio n. 6
0
def main(yolo, input):

    #拡張子ありのファイル名
    basename = os.path.basename(input)
    print(" START YOLOv4 + DeepSort input file is ", basename)

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = '../model/mars-small128.pb'
    cencoder = gdet.create_box_encoder(model_filename, batch_size=1)
    pencoder = gdet.create_box_encoder(model_filename, batch_size=1)

    cmetric = nn_matching.NearestNeighborDistanceMetric(
        "cosine", max_cosine_distance, nn_budget)
    pmetric = nn_matching.NearestNeighborDistanceMetric(
        "cosine", max_cosine_distance, nn_budget)
    ctracker = Tracker(cmetric)
    ptracker = Tracker(pmetric)

    tracking = True
    writeVideo_flag = True

    #推論したいカテゴリを設定
    cl_list = ['Pedestrian', 'Car']

    video_capture = cv2.VideoCapture(input)

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    if writeVideo_flag:
        basename_without_ext = os.path.splitext(os.path.basename(input))[0]
        fname = basename_without_ext + 'output_yolov4.mp4'
        output_path = '../output/' + fname
        video_FourCC = int(video_capture.get(cv2.CAP_PROP_FOURCC))
        video_fps = video_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))

        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
        frame_index = -1

    Nm_fr = 0
    all_result = []

    while True:

        Car_result_ALL = []
        Pedestrian_result_ALL = []

        Nm_fr = Nm_fr + 1

        ret, frame = video_capture.read()  # frame shape 1920*1216*3

        if ret != True:
            break

        print("Frame no. = ", Nm_fr)
        t1 = time.time()
        image = Image.fromarray(frame[..., ::-1])

        cboxes, cconfidence, cclasses = yolo.detect_image(image, cl_list[1])
        pboxes, pconfidence, pclasses = yolo.detect_image(image, cl_list[0])

        if tracking:
            cfeatures = cencoder(frame, cboxes)
            pfeatures = pencoder(frame, pboxes)

            cdetections = [Detection(cbbox, cconfidence, ceach_class, cfeature) for cbbox, cconfidence, ceach_class, cfeature in \
                          zip(cboxes, cconfidence, cclasses, cfeatures)]
            pdetections = [Detection(pbbox, pconfidence, peach_class, pfeature) for pbbox, pconfidence, peach_class, pfeature in \
                          zip(pboxes, pconfidence, pclasses, pfeatures)]
            #else:
            #    detections = [Detection_YOLO(bbox, confidence, each_class) for bbox, confidence, each_class in \
            #                 zip(boxes, confidence, classes)]

            # Run non-maxima suppression.
            cboxes = np.array([d.tlwh for d in cdetections])
            cscores = np.array([d.confidence for d in cdetections])
            cindices = preprocessing.non_max_suppression(
                cboxes, nms_max_overlap, cscores)
            cdetections = [cdetections[i] for i in cindices]

            pboxes = np.array([d.tlwh for d in pdetections])
            pscores = np.array([d.confidence for d in pdetections])
            pindices = preprocessing.non_max_suppression(
                pboxes, nms_max_overlap, pscores)
            pdetections = [pdetections[i] for i in pindices]

            if tracking:
                # Call the tracker
                ctracker.predict()
                ctracker.update(cdetections)

                ptracker.predict()
                ptracker.update(pdetections)

                for ctrack in ctracker.tracks:
                    if not ctrack.is_confirmed(
                    ) or ctrack.time_since_update > 1:
                        continue
                    cbbox = ctrack.to_tlbr()
                    cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])),
                                  (int(cbbox[2]), int(cbbox[3])), (0, 0, 255),
                                  3)
                    cv2.putText(frame, "ID: " + str(ctrack.track_id), (int(cbbox[0]), int(cbbox[1])), 0, \
                                1.5e-3 * frame.shape[0], (0, 0, 255), 3)

                    #OUTPUT TRACKING
                    ID = int(ctrack.track_id)
                    left = int(cbbox[0])
                    top = int(cbbox[1])
                    right = int(cbbox[2])
                    bottom = int(cbbox[3])

                    Car_result = {
                        'id': ID,
                        'box2d': [left, top, right, bottom]
                    }  #予測結果
                    print("Car_result = ", Car_result)
                    Car_result_ALL.append(Car_result)

                for ptrack in ptracker.tracks:
                    if not ptrack.is_confirmed(
                    ) or ptrack.time_since_update > 1:
                        continue
                    pbbox = ptrack.to_tlbr()
                    cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])),
                                  (int(pbbox[2]), int(pbbox[3])), (255, 0, 0),
                                  3)
                    cv2.putText(frame, "ID: " + str(ptrack.track_id), (int(pbbox[0]), int(pbbox[1])), 0, \
                                1.5e-3 * frame.shape[0], (255, 0, 0), 3)

                    #OUTPUT TRACKING
                    ID = int(ptrack.track_id)
                    left = int(pbbox[0])
                    top = int(pbbox[1])
                    right = int(pbbox[2])
                    bottom = int(pbbox[3])

                    Pedestrian_result = {
                        'id': ID,
                        'box2d': [left, top, right, bottom]
                    }  #予測結果
                    print("Pedestrian_result = ", Pedestrian_result)
                    Pedestrian_result_ALL.append(Pedestrian_result)

            #YOLOv4 output to frame for Car
            for cdet in cdetections:
                cbbox = cdet.to_tlbr()
                cscore = "%.2f" % round(cdet.confidence * 100, 2) + "%"
                cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])),
                              (int(cbbox[2]), int(cbbox[3])), (255, 255, 255),
                              2)
                if len(cclasses) > 0:
                    ceach_class = cdet.cls
                    cv2.putText(frame, str(ceach_class) + " " + cscore, (int(cbbox[0]), int(cbbox[3])), 0, \
                                1.5e-3 * frame.shape[0], (255, 255, 255), 2)

            #YOLOv4 output to frame for Pedestrian
            for pdet in pdetections:
                pbbox = pdet.to_tlbr()
                pscore = "%.2f" % round(pdet.confidence * 100, 2) + "%"
                cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])),
                              (int(pbbox[2]), int(pbbox[3])), (127, 127, 127),
                              2)
                if len(pclasses) > 0:
                    peach_class = pdet.cls
                    cv2.putText(frame, str(peach_class) + " " + pscore, (int(pbbox[0]), int(pbbox[3])), 0, \
                                1.5e-3 * frame.shape[0], (127, 127, 127), 2)

            # Each frame result
            all_result.append({
                'Car': Car_result_ALL,
                'Pedestrian': Pedestrian_result_ALL
            })

            if writeVideo_flag:
                # save a frame
                out.write(frame)
                frame_index = frame_index + 1

            fps_imutils.update()

            fps = (fps + (1. / (time.time() - t1))) / 2
            print("     FPS = %f" % (fps))

    if writeVideo_flag:
        out.release()

    video_capture.release()

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    return {basename: all_result}
Esempio n. 7
0
def read(stack):
    print('Process to read: %s' % os.getpid())
    yolo = YOLO()
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    # deep_sort
    d = os.path.dirname(__file__)
    model_filename = d + '/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    max_boxs = 0
    face = ['17208019']

    #目标上一帧的点
    history = {}
    #id和标签的字典
    person = {}
    #赋予新标签的id列表
    change = []
    while True:
        if len(stack) != 0:
            frame = stack.pop()
            t1 = time.time()
            frame_count = 0
            localtime = time.asctime(time.localtime(time.time()))
            utils.draw(frame, line.readline())
            # 获取警戒线
            transboundaryline = line.readline()
            utils.draw(frame, transboundaryline)
            img = Image.fromarray(frame)
            #img.save('frame.jpg')
            '''
            cv2.line(frame, (837, 393), (930, 300), (0, 255, 255), 3)
            transboundaryline = t.line_detect_possible_demo(frame)
            '''
            # image = Image.fromarray(frame)
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            boxs = yolo.detect_image(image)
            # print("box_num",len(boxs))
            features = encoder(frame, boxs)

            # score to 1.0 here).
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxs, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            if len(boxs) > max_boxs:
                max_boxs = len(boxs)
            # Call the tracker
            tracker.predict()
            tracker.update(detections)
            #一帧信息
            info = {}
            target = []
            for track in tracker.tracks:
                #一帧中的目标
                per_info = {}
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                if track.track_id not in person:
                    person[track.track_id] = str(track.track_id)
                bbox = track.to_tlbr()
                PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2)
                PointY = bbox[3]
                dis = int(PointX) - 1200
                try:
                    if dis < 15:
                        if track.track_id not in change:
                            person[track.track_id] = face.pop(0)
                            change.append(track.track_id)
                except:
                    print('非法入侵')
                #当前目标
                if track.track_id not in change:
                    per_info['worker_id'] = 'unknow' + str(track.track_id)
                else:
                    per_info['worker_id'] = person[track.track_id]
                #当前目标坐标
                yoloPoint = (int(PointX), int(PointY))
                per_info['current_point'] = yoloPoint

                # 卡尔曼滤波预测
                if per_info['worker_id'] not in utils.KalmanNmae:
                    utils.myKalman(per_info['worker_id'])
                if per_info['worker_id'] not in utils.lmp:
                    utils.setLMP(per_info['worker_id'])
                cpx, cpy = utils.predict(yoloPoint[0], yoloPoint[1],
                                         per_info['worker_id'])

                if cpx[0] == 0.0 or cpy[0] == 0.0:
                    cpx[0] = yoloPoint[0]
                    cpy[0] = yoloPoint[1]
                if frame_count > 20:
                    per_info['next_point'] = (int(cpx), int(cpy))
                else:
                    per_info['next_point'] = yoloPoint

                # 写入越线情况
                if per_info['worker_id'] in history:
                    per_info['transboundary'] = 'no'
                    #print(transboundaryline)

                    line1 = [
                        per_info['next_point'], history[per_info['worker_id']]
                    ]
                    a = line.IsIntersec2(transboundaryline, line1)

                    if a == '有交点':
                        print('越线提醒')

                        per_info['transboundary'] = 'yes'

                history[per_info['worker_id']] = per_info['current_point']

                frame_count = frame_count + 1
                #print(per_info)
                #画目标框
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, per_info['worker_id'],
                            (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                target.append(per_info)
            info['time'] = localtime
            #info['frame'] = str(img.tolist()).encode('base64')
            info['frame'] = 'frame'
            info['target'] = target
            #写入josn
            info_json = json.dumps(info)
            info_queue.put(info_json)
            getInfo(info_queue)
            cv2.imshow("img", frame)
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
    def main(self, frame_data):
        # Definition of the parameters
        nms_max_overlap = 1.0

        # set HyperParams
        size = 416
        iou = 0.45
        score = 0.50
        info = False

        input_size = size

        self.person1.is_used = 0
        self.person2.is_used = 0
        self.person3.is_used = 0
        self.person4.is_used = 0

        out = None

        frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB)
        image_data = cv2.resize(frame_data, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        # start_time = time.time()

        batch_data = tf.constant(image_data)
        pred_bbox = self.infer(batch_data)  # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]  # 좌표
            pred_conf = value[:, :, 4:]  # 벡터값

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame_data.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if count:
            cv2.putText(frame_data, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            # print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = self.encoder(frame_data, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # DeepSort Tracking Start

        # Call the tracker
        self.tracker.predict()  # load tracker
        self.tracker.update(detections)

        match_person = 0
        # reset unmatched for center compare
        unmatched = []

        # update tracks
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # draw bbox on screen           # 이거 처리까지 하고 나서 보내야 할 것 같다.
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # Matching index with index_stack
            if self.person1.is_exist(track.track_id):
                self.person1.centerX, self.person1.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person1.index_stack[0], colors,
                              bbox)
                self.person1.is_used = 1
                match_person += 1
            elif self.person2.is_exist(track.track_id):
                self.person2.centerX, self.person2.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person2.index_stack[0], colors,
                              bbox)
                self.person2.is_used = 1
                match_person += 1
            elif self.person3.is_exist(track.track_id):
                self.person3.centerX, self.person3.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person3.index_stack[0], colors,
                              bbox)
                self.person3.is_used = 1
                match_person += 1
            elif self.person4.is_exist(track.track_id):
                self.person4.centerX, self.person4.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person4.index_stack[0], colors,
                              bbox)
                self.person4.is_used = 1
                match_person += 1
            else:
                unmatched.append([track.track_id, bbox])
                print('found new object!')

        unmatched = np.array(unmatched, dtype=object)

        # Missed Person Only 1
        if match_person == 3 and len(unmatched) == 1:
            if self.person1.is_used == 0:
                self.person1.centerX, self.person1.centerY = self.getCenter(
                    unmatched[0][1])
                self.person1.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person1.index_stack[0], colors,
                              unmatched[0][1])
                self.person1.is_used = 1
                match_person += 1
            elif self.person2.is_used == 0:
                self.person2.centerX, self.person2.centerY = self.getCenter(
                    unmatched[0][1])
                self.person2.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person2.index_stack[0], colors,
                              unmatched[0][1])
                self.person2.is_used = 1
                match_person += 1
            elif self.person3.is_used == 0:
                self.person3.centerX, self.person3.centerY = self.getCenter(
                    unmatched[0][1])
                self.person3.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person3.index_stack[0], colors,
                              unmatched[0][1])
                self.person3.is_used = 1
                match_person += 1
            elif self.person4.is_used == 0:
                self.person4.centerX, self.person4.centerY = self.getCenter(
                    unmatched[0][1])
                self.person4.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person4.index_stack[0], colors,
                              unmatched[0][1])
                self.person4.is_used = 1
                match_person += 1
            else:
                print("ERROR : Something problem on object.is_used")

        # Missed Person Over 2
        if match_person <= 3 and len(unmatched) >= 1:
            for unmatch in unmatched:
                if match_person >= 4:
                    break
                else:
                    # Apply center location Euclidean Distance
                    EUD_min = self.get_EuclideanDistance(unmatch)
                    print(EUD_min)
                    if not len(str(EUD_min)) == 0:
                        self.draw_box(frame_data, EUD_min, colors, unmatch[1])
                        match_person += 1

        # if enable info flag then print details about each track
        if info:
            print(
                "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                .format(
                    str(track.track_id), class_name,
                    (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR)

        return result
Esempio n. 9
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = False

    #video_capture = cv2.VideoCapture("rtsp://*****:*****@192.168.30.81:554/3")

    #video_capture = cv2.VideoCapture("test.mp4")

    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    htr = HumanTraceRecorder()
    frameCounter = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3

        #frame = cv2.resize(frame,(640,360), interpolation=cv2.INTER_LINEAR)

        if ret != True:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            print(track.name_id)

            if (frameCounter % 5) == 0:

                x = int((bbox[0] + bbox[2]) / 2)
                y = int((bbox[1] + bbox[3]) / 2)
                new_time = frameCounter

                if track.name_id != 0:  # this is a tracked person
                    htr.updatePerson(track.name_id, x, y, new_time)
                else:
                    subimage = image.crop((int(bbox[0]), int(bbox[1]),
                                           int(bbox[2]), int(bbox[3])))
                    subimagearr = np.asarray(subimage)
                    faceID = getFaceID(
                        subimagearr,
                        predictor_path=
                        "./deep_sort/traceRecording/shape_predictor_5_face_landmarks.dat",
                        face_rec_model_path=
                        "./deep_sort/traceRecording/dlib_face_recognition_resnet_model_v1.dat"
                    )
                    if faceID == 0:
                        print("No face found")
                    else:
                        find, name_id = htr.checkPerson(faceID)
                        if find:
                            track.name_id = htr.updatePerson(
                                name_id, x, y, new_time)
                        else:
                            track.name_id = htr.addNewPerson(
                                faceID, x, y, new_time)

            cv2.putText(frame, str(track.name_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
        frameCounter += 1
        """
            if track.track_id != 0: # this is a tracked person
                dataBase.addPersonTrace(track.track_id, location) # add the new location to the person's trace
            else :  # new detected person, need to verify the identity
            
            
                subimage = image(cv2.Rect(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))
                result = faceDetect(subimage) # result should be the face data detected


                if result: # make sure there is a face detection
                    if dataBase.checkPerson(result): # check if the person was recorded before
                        name = dataBase.checkName(result)
                    else: # new person, add to dataBase
                        newName = XXXXXXXXX # XXXXXXXXX is the new name
                        dataBase.addPerson(result, newName) 
                        name = newName
                    dataBase.addPersonTrace(name, location) # add the new location to the person's trace

                    track.track_id = name
                else : # no face detected, cannot identify the person
                    # do nothing, leave track unchange and wait for the next chance
        """

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

    htr.saveToFile()
Esempio n. 10
0
def main(yolo):
    t0 = time.time()
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    counter = []
    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)  #跟踪使用

    find_objects = ['person']
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        if args["ids"] == False:
            out = cv2.VideoWriter('./output/output%s.avi' % args["camera"][1],
                                  fourcc, 50, (w, h))
        else:
            out = cv2.VideoWriter(
                './output/output%s_reid.avi' % args["camera"][1], fourcc, 50,
                (w, h))
        list_file = open('detection_rslt.txt', 'w')
        frame_index = -1
        nump = 1
    #fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()
        frame2 = copy.deepcopy(frame)
        #image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb 仅yolo使用
        boxs, confidence, class_names = yolo.detect_image(image)
        print(boxs)
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        makequery = True
        for det in detections:
            bbox = det.to_tlbr()

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255),
                          2)  #跟踪框
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
            list_file.write(str(frame_index) + ',')  #3-5-7-9
            list_file.write(str(track.track_id) + ',')  #画面内的所有人id
            b0 = str(bbox[0]
                     )  #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1]
            b1 = str(bbox[1]
                     )  #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1]
            b2 = str(bbox[2] - bbox[0]
                     )  #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1]
            b3 = str(bbox[3] - bbox[1])

            #放置id
            list_file.write(
                str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3))
            list_file.write('\n')
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                            (255, 255, 255), 2)  #person

            i += 1
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))

            pts[track.track_id].append(center)

            thickness1 = 5
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (255, 255, 255), thickness)
            if args["ids"] == False:
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (color), 3)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                            (color), 2)  #id
                cv2.circle(frame, (center), 1, color, thickness1)
                for j in range(1, len(pts[track.track_id])):
                    if pts[track.track_id][j - 1] is None or pts[
                            track.track_id][j] is None:
                        continue
                    thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                    cv2.line(frame, (pts[track.track_id][j - 1]),
                             (pts[track.track_id][j]), (color), thickness)
                try:
                    num = (int(args["camera"][1]) - 1) * 200
                    path = 'Z:\\pro2\\whole\\person\\gallery\\%04d' % int(
                        track.track_id + num)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    if len(os.listdir(path)) <= 150:  #最多存储150张相片
                        crop = frame2[int(bbox[1]):int(bbox[3]),
                                      int(bbox[0]):int(bbox[2])]
                        crop = cv2.resize(crop, (64, 128),
                                          interpolation=cv2.INTER_AREA
                                          )  #CUBIC 对扩大图片 area 对缩小图片
                        filepath = path + '\\' + '%04d' % int(
                            track.track_id +
                            num) + '_%s_' % args["camera"] + '%04d' % int(
                                len(os.listdir(path)) +
                                1) + '_%.2f' % (video_capture.get(0) /
                                                1000) + '.jpg'  #%04d
                        cv2.imwrite(filepath, crop)
                except:
                    continue

            #单独索引
            else:
                makequery = False
                id1 = int(args["ids"])
                if int(track.track_id) == id1:
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (color), 3)
                    cv2.putText(frame, str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 50)), 0,
                                5e-3 * 150, (color), 2)  #id
                    cv2.circle(frame, (center), 1, color, thickness1)
                    for j in range(1, len(pts[track.track_id])):
                        if pts[track.track_id][j - 1] is None or pts[
                                track.track_id][j] is None:
                            continue
                        thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                        cv2.line(frame, (pts[track.track_id][j - 1]),
                                 (pts[track.track_id][j]), (color), thickness)
                    cv2.putText(frame, str(class_names[0]),
                                (int(bbox[0]), int(bbox[1] - 20)), 0,
                                5e-3 * 150, (color), 2)  #person
                else:
                    continue
        count = len(set(counter))
        cv2.putText(frame, "Total Pedestrian Counter: " + str(count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Pedestrian Counter: " + str(i),
                    (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.namedWindow("YOLO4_Deep_SORT", 0)
        cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO4_Deep_SORT', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        out.write(frame)
        frame_index = frame_index + 1

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    #makequery
    if makequery == True:
        root_path = 'Z:\\pro2\\whole\\person\\gallery\\'
        copy_path = 'Z:\\pro2\\whole\\person\\query\\'
        ids = os.listdir(root_path)
        #print(ids)
        for i in ids:
            img_path = root_path + i
            img = os.listdir(img_path)
            indeximg = img[int(len(img) / 2)]
            old_name = img_path + '\\' + indeximg
            new_path = copy_path + i
            new_name = new_path + '\\' + indeximg
            if not os.path.exists(new_path):
                os.makedirs(new_path)
            shutil.copyfile(old_name, new_name)
    print(" ")
    print("[Finish]")
    end = time.time()
    print("the whole time ", end - t0)
    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")
    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Esempio n. 11
0
def main(yolo):
    pics = os.listdir('pictures')
    for g in pics:
        os.remove(os.path.join('pictures/', g))

    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    nms_max_overlap = 1.0

    video_capture = cv2.VideoCapture('videos/walk1.mp4')
    # frame1 = cv2.imread('combine.jpg')
    # video_capture.set(cv2.CAP_PROP_POS_FRAMES, 189)
    # ret, frame2 = video_capture.read()
    # cv2.imwrite('189.jpg', frame2)
    # combine_pic(frame1, frame2, yolo, encoder, 'combine2.jpg')
    frame_cnt = 0
    w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # init_w = w
    init_w = 0
    add_cnt = 0
    all_boxes = []
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        out_boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            out_boxes.append([
                int(bbox[0] - 25),
                int(bbox[1] - 25),
                int(bbox[2] + 25),
                int(bbox[3] + 25)
            ])
        all_boxes.append(out_boxes)

        if len(out_boxes) == 0:
            continue

        # if out_boxes[0][0] < 0:
        #     break
        # if out_boxes[0][2] < init_w:
        #     init_w = out_boxes[0][0]
        #     if add_cnt == 0:
        #         output_frame = frame.copy()
        #     else:
        #         output_frame = combine_pic(output_frame, frame, out_boxes)
        #     add_cnt += 1
        #     print(add_cnt)

        if out_boxes[0][2] > w:
            break
        if out_boxes[0][0] >= init_w:
            init_w = out_boxes[0][2]
            if add_cnt == 0:
                output_frame = frame.copy()
            else:
                output_frame = combine_pic(output_frame, frame, out_boxes)
            add_cnt += 1
            print(add_cnt)

    all_boxes = np.array(all_boxes)
    np.save('boxes.npy', all_boxes)
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = './models/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)


    writeVideo_flag = True

    video_capture = cv2.VideoCapture(args.input_video)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('/media/rudy/C0E28D29E28D252E/2017_taroko/终点前/video_YDXJ0101.avi', fourcc, 15, (w, h))
        list_file = open('/media/rudy/C0E28D29E28D252E/2017_taroko/终点前/detection_YDXJ0101.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    n = 0

    id_frame = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        if int(args.skip_frame) != n:
            n += 1
            continue
        n = 0

        t1 = time.time()

        image = Image.fromarray(frame)
        boxs, classes = yolo.detect_image(image)

        for idb, box in enumerate(boxs):
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0])+int(box[2]), int(box[1])+int(box[3])),(255,255,255), 2)
            cv2.putText(frame, str(classes[idb]),(int(box[0]), int(box[1])),0, 5e-3 * 100, (0,255,0),2)

        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2)

        for idb, det in enumerate(detections):
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            # cv2.putText(frame, str(classes[idb]), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 100, (0, 255, 0), 2)


        # cv2.imshow('gallery', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ';')
            list_file.write('\n')
            
        fps  = ( fps + (1./(time.time()-t1)) ) / 2

        id_frame +=1
        print("idx_frame-%d, fps= %f"%(id_frame, fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def deteksi_ds(yolo):

    # Definisikan Parameternya
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    tulis_video_output = True

    # Model DEEP SORT diambil di sini..
    namafile_model = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(namafile_model, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture("demo-1.mp4")
    video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 160)
    video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 120)

    #
    # Kalau ingin menulis hasil rekaman videonya buat VideoWriter object dgn codec-nya
    # sekalian tulis juga hasil deteksi ke file txt
    #
    if tulis_video_output:

        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('hasil_output2.avi', fourcc, 15, (w, h))
        list_file = open('hasil_deteksi.txt', 'w')
        frame_index = -1

    # --------------------
    # MULAI CAPTURE VIDEO
    # --------------------
    # FPS awal
    fps = 0.0
    jum_track = set()

    while True:
        ret, frame = video_capture.read()

        # Warna Gray
        # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # baca frame
        if ret != True:
            break

        # siapkan hitungan waktu
        t1 = time.time()

        # konversi bgr ke rgb
        image = Image.fromarray(frame[..., ::-1])
        kotak = yolo.detect_image(image)

        # print("[-] Kotak : ",len(kotak))
        features = encoder(frame, kotak)

        # score to 1.0 here).
        deteksi_box = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(kotak, features)
        ]

        # panggil non-maxima suppression
        boxes = np.array([d.tlwh for d in deteksi_box])
        scores = np.array([d.confidence for d in deteksi_box])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        deteksi_box = [deteksi_box[i] for i in indices]

        # Panggil class tracker
        tracker.predict()
        tracker.update(deteksi_box)

        # Tracking hasil deteksi
        for track in tracker.tracks:
            # apakah berhasil di track?
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # buat bounding box-nya
            bbox = track.to_tlbr()

            # box
            # Ini adalah prediction box
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

            # teks untuk box_id
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 1)

            jum_track.add(track.track_id)

            cv2.putText(frame, "> Jumlah Orang : " + str(max(jum_track)),
                        (10, 25), cv2.FONT_HERSHEY_DUPLEX, .5, (0, 0, 255), 1)

            print(">> Hits (Manusia) :", max(jum_track))

        #
        # pastikan box deteksi ada terus
        # ini adalah Detection Box
        for det in deteksi_box:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        # tampilkan - DEBUG
        cv2.imshow('Test Video', frame)

        #
        # Jika tulis video?
        #
        if tulis_video_output:

            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(kotak) != 0:
                for i in range(0, len(kotak)):
                    list_file.write(
                        str(kotak[i][0]) + ' ' + str(kotak[i][1]) + ' ' +
                        str(kotak[i][2]) + ' ' + str(kotak[i][3]) + ' ')
            list_file.write('\n')

        # tampilan fps
        fps = (fps + (1. / (time.time() - t1))) / 2
        print(">> fps= %f" % (fps))

        # tekan Q untuk stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # bersihkan video_capture
    video_capture.release()
    if tulis_video_output:
        # release video
        out.release()
        # tutup file
        list_file.close()
    # release semua
    cv2.destroyAllWindows()
Esempio n. 14
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1

    # initialize deep sort
    model_filename = 'deep_sort/feature_extractor/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # configuration of object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                             tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    # Capture video
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # looping over each frame of the video
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('End!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run the detections
        batch_data = tf.constant(image_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # Class allowed to be tracked
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # allowed_classes = list(class_names.values())
        allowed_classes = ['person', 'car']

        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)

        # remove detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # The tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if FLAGS.show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
Esempio n. 15
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture(0)
    fps = 0.0
    while True:
        _, frame = video_capture.read()  # frame shape 640*480*3
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)

        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track, det in zip(tracker.tracks, detections):
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 4)
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 480,
                        (124, 252, 0), 4)
        cv2.imshow('', frame)

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()
Esempio n. 16
0
    def Display(self, yolo):
        self.ui.Open.setEnabled(False)
        self.ui.Close.setEnabled(True)

        # Definition of the parameters
        max_cosine_distance = 0.3
        nn_budget = None
        nms_max_overlap = 1.0

        # deep_sort
        model_filename = 'model_data/mars-small128.pb'
        encoder = gdet.create_box_encoder(model_filename, batch_size=1)

        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)

        writeVideo_flag = True

        if writeVideo_flag:
            # Define the codec and create VideoWriter object
            w = int(self.cap.get(3))
            h = int(self.cap.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            out = cv2.VideoWriter("output.avi", fourcc, 15, (w, h))
            list_file = open('detection.txt', 'w')
            frame_index = -1

        fps = 0.0

        while self.cap.isOpened():
            ret, frame = self.cap.read()

            if ret != True:
                break
            t1 = time.time()

            image = Image.fromarray(frame)
            boxs = yolo.detect_image(image)
            # print("box_num",len(boxs))
            features = encoder(frame, boxs)

            # score to 1.0 here).
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxs, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            str1 = "Identification List:" + '\n\n'

            for track in tracker.tracks:
                if track.is_confirmed() and track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                import random
                if track.track_id == 2 and self.fileName.split(
                        "/")[-1] == "reid.mp4":
                    str1 = str1 + "      person" + str(
                        track.track_id) + " —— LaiSiyu (" + str(
                            random.random() * 0.1 + 0.85) + ")" + '\n\n'
                    # str1 += "      person{0} - LaiSiyu ({:%.2f})".format(track.track_id, random.random() * 0.13 + 0.85)
                    ui.label_2.setText(str1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                    cv2.putText(frame, str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                                (0, 0, 255), 2)

                else:
                    str1 = str1 + "      person" + str(track.track_id) + '\n\n'
                    ui.label_2.setText(str1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame, str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                                (0, 255, 0), 2)

            for det in detections:
                bbox = det.to_tlbr()
                # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

            cv2.imshow('', frame)  # TODO
            # RGB转BGR
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            img = QImage(frame.data, frame.shape[1], frame.shape[0],
                         QImage.Format_RGB888)
            self.ui.DisplayLabel.setPixmap(QPixmap.fromImage(img))

            if writeVideo_flag:
                # save a frame
                out.write(frame)
                frame_index = frame_index + 1
                list_file.write(str(frame_index) + ' ')
                if len(boxs) != 0:
                    for i in range(0, len(boxs)):
                        list_file.write(
                            str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                            str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
                list_file.write('\n')

            fps = (fps + (1. / (time.time() - t1))) / 2
            print("fps= %f" % (fps))

            # Press Q to stop!
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            if self.isCamera:
                cv2.waitKey(1)
            else:
                cv2.waitKey(int(1000 / self.frameRate))

                # 判断关闭事件是否已触发
            if True == self.stopEvent.is_set():
                # 关闭事件置为未触发,清空显示label
                self.stopEvent.clear()
                self.ui.DisplayLabel.clear()
                self.ui.label_2.clear()
                self.ui.Close.setEnabled(False)
                self.ui.Open.setEnabled(True)
                break

        self.cap.release()
        if writeVideo_flag:
            out.release()
            list_file.close()
        cv2.destroyAllWindows()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, max_age=40)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        objects = 0

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            bbox = track.to_tlbr()
            class_name = track.get_class()

            if (FLAGS.class_1 == 'all'):
                objects += 1
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            elif (FLAGS.class_1 != 'all'):
                if (class_name == FLAGS.class_1):
                    objects += 1
                    color = colors[int(track.track_id) % len(colors)]
                    color = [i * 255 for i in color]
                    cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        img, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(img, class_name + "-" + str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (255, 255, 255), 2)

    # print("Objetos filtrados:{}".format(objects))
    # print N_objects on screen
        cv2.putText(img, "# Objetos: {}".format(objects), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        # print fps on screen
        #fps  = ( fps + (1./(time.time()-t1)) ) / 2
        #cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
        #                  cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Esempio n. 18
0
def video_to_json(cap_name, jfile, cam_id, start_time):
    # video capture
    vcap = cv.VideoCapture(cap_name)
    fps = int(math.ceil(vcap.get(cv.CAP_PROP_FPS)))

    frame_idx = 0
    proc_fps = 0.0
    output_data = []

    seconds = 1
    yolo_detect_avg = 0.0
    fps_avvg = 0.0
    feature_encode_avg = 0.0
    feature_encode_avg_25 = 0.0
    yolo_detect_avg_25 = 0.0
    fps_avg_25 = 0.0

    initial = time.time()
    timestamp = int(start_time)

    while True:
        ret, frame = vcap.read()  # frame shape 640*480*3
        print(ret)
        if ret != True:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        t1_yolo_start = time.time()
        boxs = yolo.detect_image(image)  # x,y,w,h, score, class
        t2_yolo_end = time.time()
        yolo_detect_avg += t2_yolo_end - t1_yolo_start
        yolo_detect_avg_25 += t2_yolo_end - t1_yolo_start
        # separate out score, class info from loc
        scores_classes = [box[-2:] for box in boxs]
        boxs = [box[0:4] for box in boxs]
        f_t_start = time.time()
        features = encoder(frame, boxs)
        f_t_end = time.time()
        feature_encode_avg += f_t_end - f_t_start
        feature_encode_avg_25 += f_t_end - f_t_start

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # filter unwanted obj by classfication
        for idx, det in list(enumerate(detections)):
            det.score = "%.2f" % scores_classes[idx][0]
            det.cls = scores_classes[idx][1]
        detections = [
            det for det in detections if yolo.class_names[det.cls] in [
                'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus',
                'train', 'truck'
            ]
        ]
        # filter detections with roi
        detections = [
            det for det in detections
            if 0 < cv.pointPolygonTest(np.array(roi), (
                det.to_xyah()[0], det.to_xyah()[1]), False)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            # update track speed in y direction, in pixels/seconds
            if not hasattr(track, 'first_frame'):
                track.first_frame = frame_idx
                track.first_bbox = track.to_tlwh()
            if frame_idx > track.first_frame + 1 * fps:
                cur_bbox = track.to_tlwh()
                cur_c_y = (cur_bbox[1] + cur_bbox[3] / 2)
                first_c_y = (track.first_bbox[1] + track.first_bbox[3] / 2)
                track.speed = (cur_c_y - first_c_y) / (frame_idx -
                                                       track.first_frame) * fps

        # Add aggregated data
        speeds = [
            abs(track.speed) for track in tracker.tracks
            if hasattr(track, 'speed')
        ]
        avg_spd = np.sum(speeds) / len(speeds) * 0.0005787 * 3600  # km/hr
        lane_area = cv.contourArea(np.array(roi))
        box_area = np.sum([det.tlwh[2] * det.tlwh[3] for det in detections])
        occupancy = box_area / lane_area

        proc_fps = (proc_fps + (1. / (time.time() - t1))) / 2
        # print("fps= %f"%(proc_fps))
        fps_avvg += proc_fps
        fps_avg_25 += proc_fps

        if frame_idx % fps in np.linspace(0, fps,
                                          samples_per_sec).astype(int)[:-1]:
            output_data.append({
                "data_event_name":
                "vehicle_detection",
                "camera_id":
                cam_id,
                "lane_id":
                1,
                "frame_idx":
                frame_idx,
                "average_speed":
                avg_spd,
                "lane_occupancy":
                occupancy,
                "time_stamp":
                float(start_time) + frame_idx / fps,
                "vehicles": [track_encoder(trk) for trk in tracker.tracks]
            })

        frame_idx += 1

        if frame_idx % 25 == 0:
            print("THe {} second:".format(str(seconds)))
            seconds += 1
            print("Total frame {}".format(str(frame_idx)))
            print(
                "Average Speed for FPS is {}, for Yolo computing is {} second, for feature encoding is {} second"
                .format(fps_avg_25 / float(25.0),
                        yolo_detect_avg_25 / float(25.0),
                        feature_encode_avg_25 / float(25.0)))
            yolo_detect_avg_25 = 0
            feature_encode_avg_25 = 0
            fps_avg_25 = 0

        #if frame_idx > 30: break

    vcap.release()
    t_final = time.time()
    #print("Final Average Speed for FPS is {}, for Yolo computing is {} second, for feature encoding is {} second"
    #    .format(fps_avvg/frame_idx, yolo_detect_avg / frame_idx, feature_encode_avg / frame_idx))
    #print("The total time for this process is {}".format(t_final-initial))
    with open(jfile, 'w') as jout:
        json.dump(output_data, jout)
Esempio n. 19
0
def main(_argv):

    with open("./config_birdview.yml", "r") as ymlfile:
        bird_view_cfg = yaml.load(ymlfile)

    width_og, height_og = 0, 0
    corner_points = []
    for section in bird_view_cfg:
        corner_points.append(bird_view_cfg["image_parameters"]["p1"])
        corner_points.append(bird_view_cfg["image_parameters"]["p2"])
        corner_points.append(bird_view_cfg["image_parameters"]["p3"])
        corner_points.append(bird_view_cfg["image_parameters"]["p4"])
        width_og = int(bird_view_cfg["image_parameters"]["width_og"])
        height_og = int(bird_view_cfg["image_parameters"]["height_og"])
        img_path = bird_view_cfg["image_parameters"]["img_path"]
        size_height = bird_view_cfg["image_parameters"]["size_height"]
        size_width = bird_view_cfg["image_parameters"]["size_width"]

    tr = np.array([
        bird_view_cfg["image_parameters"]["p4"][0],
        bird_view_cfg["image_parameters"]["p4"][1],
    ])
    tl = np.array([
        bird_view_cfg["image_parameters"]["p2"][0],
        bird_view_cfg["image_parameters"]["p2"][1],
    ])
    br = np.array([
        bird_view_cfg["image_parameters"]["p3"][0],
        bird_view_cfg["image_parameters"]["p3"][1],
    ])
    bl = np.array([
        bird_view_cfg["image_parameters"]["p1"][0],
        bird_view_cfg["image_parameters"]["p1"][1],
    ])

    widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2))
    widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2))
    heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2))
    maxHeight = max(int(heightA), int(heightB))

    matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth,
                                                      maxHeight,
                                                      cv2.imread(img_path))
    height, width, _ = imgOutput.shape
    dim = (width, height)

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = "model_data/mars-small128.pb"
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == "tflite":
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures["serving_default"]

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    output_video_1, output_video_2 = None, None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        """
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        """
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:

        black_img = cv2.imread("./black_bg.png")
        black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA)

        return_value, frame = vid.read()

        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print("Video has ended or failed, try a different video format!")
            break
        frame_num += 1
        print("Frame #: ", frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.0
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == "tflite":
            interpreter.set_tensor(input_details[0]["index"], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]["index"])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == "yolov3" and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(
                    pred[1],
                    pred[0],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
            else:
                boxes, pred_conf = filter_boxes(
                    pred[0],
                    pred[1],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        (
            boxes,
            scores,
            classes,
            valid_detections,
        ) = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score,
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        #         allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ["person"]

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(
                frame,
                "Objects being tracked: {}".format(count),
                (5, 35),
                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                2,
                (0, 255, 0),
                2,
            )
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap("tab20b")
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        bbox_array = []
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            bbox_array.append(
                (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1])),
                (int(bbox[2]), int(bbox[3])),
                color,
                2,
            )
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1] - 30)),
                (
                    int(bbox[0]) +
                    (len(class_name) + len(str(track.track_id))) * 17,
                    int(bbox[1]),
                ),
                color,
                -1,
            )
            cv2.putText(
                frame,
                class_name + "-" + str(track.track_id),
                (int(bbox[0]), int(bbox[1] - 10)),
                0,
                0.75,
                (255, 255, 255),
                2,
            )

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(
                        str(track.track_id),
                        class_name,
                        (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(
                            bbox[3])),
                    ))

        if len(bbox_array) >= 1:
            array_centroids, array_groundpoints = get_centroids_and_groundpoints(
                bbox_array)
            transformed_downoids = compute_point_perspective_transformation(
                matrix, array_centroids)

            # Show every point on the top view image
            for point in transformed_downoids:
                x, y = point
                cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2)
                cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        #         result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            if output_video_1 is None and output_video_2 is None:
                fourcc1 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_1 = cv2.VideoWriter(
                    "./video.avi", fourcc1, 25,
                    (frame.shape[1], frame.shape[0]), True)
                fourcc2 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_2 = cv2.VideoWriter(
                    "./bird_view.avi",
                    fourcc2,
                    25,
                    (black_img.shape[1], black_img.shape[0]),
                    True,
                )

            elif output_video_1 is not None and output_video_2 is not None:
                output_video_1.write(frame)
                output_video_2.write(black_img)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    cv2.destroyAllWindows()
Esempio n. 20
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.1

    _ = InteractiveSession(config=config)
    utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(
            model_path=f'{FLAGS.weights}_{FLAGS.size}')
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(
            f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    all_start_time = None
    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            Image.fromarray(frame)
        else:
            fps = float(frame_num) / (time.time() - all_start_time)
            print("fps=%.2f size=%d frames=%d deep=%s output=%s" %
                  (fps, FLAGS.size, frame_num,
                   "true" if FLAGS.deep else "false", FLAGS.output))
            break
        frame_num += 1
        if FLAGS.info:
            print("frame_num=%d" % frame_num)
        start_time = time.time()
        if all_start_time is None:
            all_start_time = time.time()
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for _, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        if FLAGS.deep:
            features = encoder(frame, bboxes)
        else:
            features = np.empty((len(bboxes), 0), np.float32)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)

        # calculate frames per second of running detections
        if FLAGS.info:
            fps = 1.0 / (time.time() - start_time)
            print("fps=%.2f" % fps)

        if not FLAGS.dont_show:
            if cv2.waitKey(1) & 0xFF == ord('q'): break
    if not FLAGS.dont_show:
        cv2.destroyAllWindows()
Esempio n. 21
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    np.random.seed(42)
    COLORS = np.random.randint(0, 255, size=(200, 3),
                               dtype="uint8")

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'testvideo2.avi'

    # load the COCO class labels our YOLO model was trained on    加载我们的YOLO模型经过培训的COCO类标签
    labelsPath = os.path.sep.join(["model_data", "coco_classes.txt"])
    LABELS = open(labelsPath).read().strip().split("\n")
    # print(str(len(LABELS))+"load successfully")
    # print(LABELS)
    class_nums = np.zeros(80)
    counter = np.zeros(80)
    track_id_max = -1
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('testvideo2_out.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    img_num = 0
    frame_cnt = 0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        frame_copy = frame.copy()
        if ret != True or frame_cnt > 30:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        boxs, confidence, class_names = yolo.detect_image(image)

        features = encoder(frame, boxs)

        detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in
                      zip(boxs, confidence, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        # print("print indices!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        # print(indices)

        # for i in indices:
        #    print(str(i)+class_names[i][0])

        # print(indices.shape)

        detections = [detections[i] for i in indices]
        class_names = [class_names[i] for i in indices]
        print("class_name:" + str(class_names))

        class_IDs = []
        current_nums = np.zeros(80)
        # class_IDs=[]
        for class_name in class_names:
            for i, LABEL in enumerate(LABELS):
                if class_name[0] == LABEL:
                    current_nums[i] += 1
                    class_IDs.append(i)
        # print("person:"+str(current_nums[0]))

        cv2.putText(frame, 'Current', (20, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 255, 255), 2)
        cv2.putText(frame, 'Total', (180, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 255), 2)

        x1 = 20
        y1 = 100
        for i, cl in enumerate(current_nums):
            if cl > 0:
                cv2.putText(frame, LABELS[i] + "=" + str(cl), (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255),
                            1)  # 当前帧各类别数量
                y1 = y1 + 20

        for i, det in enumerate(detections):
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[1]) + 10), cv2.FONT_HERSHEY_DUPLEX, 0.3,
                        (255, 255, 255), 1)
            # cv2.putText(frame, class_names[i],(int(bbox[0]), int(bbox[1])-5), 0, 5e-3 * 130, (0, 255, 0), 2)
            # cv2.putText(frame, class_names[i], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 130,(0, 255, 255), 2)

        print("Total of detections:" + str(len(detections)))
        # Call the tracker
        tracker.predict()
        tracker.update(detections, class_IDs)

        # for i, cl in enumerate(class_nums):
        #     if cl > 0:
        #         print("add: " + LABELS[i] + str(cl - class_last_nums[i]))

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                print("not track.is_confirmed() or track.time_since_update > 1: " + str(track.track_id))
                continue
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[track.class_id % len(COLORS)]]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)

            cv2.putText(frame, str(track.track_id) + ' ' + LABELS[track.class_id], (int(bbox[0]), int(bbox[1]) - 5),
                        cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)

            if track.track_id > track_id_max:
                counter[track.class_id] = counter[track.class_id] + 1
                track_id_max = track.track_id
                dest = frame_copy[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
                pdest = "./output_image/" + str(LABELS[track.class_id]) + str(int(counter[track.class_id])) + ".png"
                cv2.imwrite(pdest, dest)
                img_num += 1
            # class_nums[track.class_id].append(track.track_id)
            # print(str(LABELS[track.class_id])+":"+class_nums[track.class_id])
            # print("track.id: " + str(track.track_id))
            # print("track.class_name: " + str(LABELS[track.class_id]))

        print(str(counter))
        print("--------------------------该帧输出完毕!--------------------------------------")

        # cv2.putText(frame, 'Total', (200, 60), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 0, 0), 2)
        # x2 = 200
        # y2 = 100
        # for i, cl in enumerate(class_nums):
        #     if cl > 0:
        #         cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 2)
        #         y2 = y2 + 20

        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3)  # !!!!!!!!!输出FPS
        x2 = 180
        y2 = 100
        for i, cl in enumerate(counter):
            if cl > 0:
                cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1)
                y2 = y2 + 20
        # cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2

        # print("FPS = %f"%(fps))

        frame_cnt += 1
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            # names = {'6_d': 'Thomas Delaney',
            #          '10_b': 'Leroy Sane',
            #          '18_b': 'Leon Goretzka',
            #          '25_b': 'Thomas Muller',
            #          '5_d': 'Dan-Axel Zagadou',
            #          '12_d': 'Zaragoza',
            #          '4_b': 'Niklas Sule',
            #          '14_d': 'Nico Schulz',
            #          '11_d': 'Marco Reus',
            #          'Referee': 'Referee',
            #          'ball': 'ball',
            #          '10_d': 'Thorgan Hazard',
            #          '6_b': 'Joshua Kimmich ',
            #          'gk_b': 'Ron-Thorben Hoffmann(GK)',
            #          '17_b': 'Jérôme Boateng',
            #          '27_b': 'David Alaba',
            #          '9_d': 'Erling Haaland',
            #          '8_d': 'Mahmoud Dahoud',
            #          'gk_d': 'Luca Unbehaun(GK)',
            #          '19_b': 'Alphonso Davies',
            #          '29_b': 'Kingsley Coman',
            #          '24_d': 'Marcel Schmelzer',
            #          '9_b': 'Robert Lewandowski',
            #          "23_d": 'Emre Can',
            #          }
            # if class_name == 'Referee':
            #     color = (0, 0, 0)
            if class_name == 'ball':
                # color = (255, 255, 255)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
            # else:

            # try:
            #     colors = {'b': (252, 3, 78), 'd': (250, 247, 80)}
            #     color = colors[str(class_name.split('_')[-1])]
            # except KeyError:
            #     pass

            # class_name = names[str(class_name)]
            # color = (250, 247, 80)

            # color = colors[int(track.track_id) % len(colors)]
            # color = [i * 255 for i in color]
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1)
            cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 251, 46), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'input_video.avi'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov3.mp4', fourcc, 25, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidences = yolo.detect_image(image)

        boxes = np.array(boxes)
        confidences = np.array(confidences)

        # Run non-maxima suppression.
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    confidences)

        boxes = [boxes[i] for i in indices]
        confidences = [confidences[i] for i in indices]

        time1 = time.time()

        features = encoder(frame, boxes)

        time2 = time.time()

        detections = [
            Detection(bbox, confidence, feature)
            for bbox, confidence, feature in zip(boxes, confidences, features)
        ]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        time3 = time.time()

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0,
                        5e-3 * 130, (0, 255, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("FPS = %f" % (fps))

        time4 = time.time()

        time_sum = time4 - t1
        print("time:", (time1 - t1) / time_sum, (time2 - time1) / time_sum,
              (time3 - time2) / time_sum, (time4 - time3) / time_sum)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
            '''Timer start'''
    #           out.write(original_frame)
            
#            original_frame = original_frame[:,240:1680]
            fframe = cv2.resize(original_frame,(640,480))
#            frame = fframe.copy()
#            frame = infer(frame)
            ti1 = time.time()
            '''yolov3 + deepsort'''
            image = Image.fromarray(fframe[...,::-1]) #bgr to rgb                
            boxs = yolo.detect_image(image)
            features = encoder(fframe,boxs)
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            tracker.predict()
            tracker.update(detections)
            
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue 
                bbox = track.to_tlbr()
                #bbox ----->  (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])
                '對應座標'
                bbox = np.maximum(bbox, 0)
                ID = track.track_id
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                box_h = int(bbox[3]-y1)
def startRecording_YOLO():
    date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format
    vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi')
    
    
    #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam
    videothread = VideoThread(resolution=(640,480), framerate=30).start()

    width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(videothread.stream.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    
    #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #fps = int(vid.get(cv2.CAP_PROP_FPS))
    #codec = cv2.VideoWriter_fourcc(*'XVID')
    #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    frame_number = 0
    freq = cv2.getTickFrequency()
    avg_fps = 0

    #while video is running/recording
    while True:
        return_val, frame = videothread.read()
        #return_val, frame = vid.read()
        
        if return_val:
            #frame = cv2.flip(frame, -1)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video error, try another format')
            break
        
        frame_number += 1
        #print('Frame #: ', frame_number)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data/ 255.
        #mage_data = np.expand_dims(frame_resized, axis = 0)

        #if floating_model:
         #   image_data = (np.float32(image_data) - 127.5)/127.5
        image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type
        start_time = time.time()

        #TFLite Detections
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        #box = interpreter.get_tensor(output_details[0]['index'])[0]
        #scores = interpreter.get_tensor(output_details[2]['index'])[0]
        boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size]))

        #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape
        #Shape = returns a 1-D integer tensor, represents the shape of the input 
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])),
            max_output_size_per_class = 50,
            max_total_size = 50,
            iou_threshold = 0.45,
            score_threshold = 0.5
        )

        #convert the received data into numpy arrays, then slice out unused elements
        number_of_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0 : int(number_of_objects)]
        scores = scores.numpy()[0]
        scores = scores[0 : int(number_of_objects)]
        classes = classes.numpy()[0]
        classes = classes[0 : int(number_of_objects)]

        #format bounding boxes with normalized minimums and maximums of x and y
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        prediction_bbox = [bboxes, scores, classes, number_of_objects]

        #Read in all the class names from config and only allow certain ones to be detected (eases computation power)
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
        allowed_classes = ['traffic light', 'person', 'car', 'stop sign']

        #loop through objects and get classification name, using only the ones allows in allowed_classes
        names = []
        deleted_indx = []
        for i in range(number_of_objects):
            classification_index = int(classes[i])
            class_name = class_names[classification_index]
            if class_name not in allowed_classes: deleted_indx.append(i)
            else: names.append(class_name)
        names = np.array(names)
        count = len(names)

        #delete irrelevant detections (not in allowed_classes)
        bboxes = np.delete(bboxes, deleted_indx, axis = 0)
        scores = np.delete(scores, deleted_indx, axis = 0)

        #Feed tracker with encoded yolo detections
        detections_features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        #run non-maxima supression (reduces amount of detected entities to as little as possible)
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        #Call tracker
        tracker.predict()
        tracker.update(detections)

        #update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1: continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            #if class_name == 'person': print('person found')

        #change frame to that which showcases the lane detection
        #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS

        #distance approximation (barebones, needs more adjusting)
            cam_parameter = 18    #change with different cameras. Gets the detected distance closer to actual distance
            distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter
            det_dest = str(int(distance))
    
        #draw bounded box on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
        
        #calculate fps of running detections
        fps = 1.0/ (time.time() - start_time)
        avg_fps = avg_fps + fps
        #print("FPS: %.2f" % fps)
        cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        cv2.imshow("Output Video", result)

        output_video.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
    print('Average FPS: ', (avg_fps/frame_number))
    print('Number of Frames: ', frame_number)
    videothread.stop()
Esempio n. 26
0
def iterate(lines, model, vid, frame_num):
    tracks = []

    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)
    else:
        print('Video has ended or failed, try a different video format!')
        cv2.destroyAllWindows()
        return False, tracks

    frame_size = frame.shape[:2]
    image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    start_time = time.time()

    # run detections on tflite if flag is set
    if FLAGS.framework == 'tflite':
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
        # run detections using yolov3 if flag is set
        if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
            boxes, pred_conf = filter_boxes(pred[1],
                                            pred[0],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0],
                                            pred[1],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
    else:
        batch_data = tf.constant(image_data)
        pred_bbox = model.signatures['serving_default'](batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score)

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)

    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    # by default allow all classes in .names file
    #allowed_classes = list(class_names.values())

    # custom allowed classes (uncomment line below to customize tracker for only people)
    #allowed_classes = ['person']
    allowed_classes = ['car', 'bus', 'truck']

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
        class_name = class_names[class_indx]
        if class_name not in allowed_classes:
            deleted_indx.append(i)
        else:
            names.append(class_name)

    names = np.array(names)
    count = len(names)
    if FLAGS.count:
        cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
        print("Objects being tracked: {}".format(count))

    # delete detections that are not in allowed_classes
    bboxes = np.delete(bboxes, deleted_indx, axis=0)
    scores = np.delete(scores, deleted_indx, axis=0)

    # encode yolo detections and feed to tracker
    features = encoder(frame, bboxes)
    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      bboxes, scores, names, features)
    ]

    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]

    # Call the tracker
    tracker.predict()
    tracker.update(detections)

    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        bbox = track.to_tlbr()
        class_name = track.get_class()

        tracks.append(
            Rect(track.track_id, (int(bbox[0]), int(bbox[1])),
                 (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]))))

        # draw bbox on screen
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                      (int(bbox[2]), int(bbox[3])), color, 2)
        cv2.rectangle(
            frame, (int(bbox[0]), int(bbox[1] - 30)),
            (int(bbox[0]) +
             (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])),
            color, -1)
        cv2.putText(frame, class_name + "-" + str(track.track_id),
                    (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                    (255, 255, 255), 2)

        # if enable info flag then print details about each track
        #if FLAGS.info:
        #    print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

    for line in lines:
        cv2.line(frame, line.pt1, line.pt2, line.color, 3)
        cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2)
        cv2.putText(frame, str(line.count), line.center,
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (255, 255, 0), 2)

    # calculate frames per second of running detections
    fps = 1.0 / (time.time() - start_time)
    print("FPS: %.2f" % fps)
    result = np.asarray(frame)
    result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    if not FLAGS.dont_show:
        cv2.imshow("Output Video", result)

    # if output flag is set, save video file
    if FLAGS.output:
        out.write(result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        return False, tracks

    return True, tracks
def main(yolo):
    global p1_flag, p2_flag, vline1, mid_line, vline2, lane_1, lane_2, lane_3, lane_4, lane_5, lane_6, mask, preset
    global cnt_lane_1, cnt_lane_2, cnt_lane_3, cnt_lane_4, cnt_lane_5, cnt_lane_6, global_point
    global speed_lane_1, speed_lane_2, speed_lane_3, speed_lane_4, speed_lane_5, speed_lane_6

    ################# parameters ######################
    track_len = 2
    detect_interval = 4
    of_track = []
    preset = 0
    alpha = 0.3
    mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0, 0
    v1, v2, v3, v4, v5, v6 = 0, 0, 0, 0, 0,0
    ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0,0
    prv1, prv2, prv3, prv4, prv5, prv6 = 0, 0, 0, 0, 0, 0
    ms2kmh = 3.6
    fps = 30
    
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.3
    
    counter = []
    ##################################################

    # Deep SORT
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    #video_path = "./output/output.avi"
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        total_frame = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
        fourcc = cv2.VideoWriter_fourcc(*'FMP4')
        video_fps = video_capture.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter('output/CH4_output.avi', fourcc, video_fps, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    frame_idx = 0
    speed_dict = OrderedDict()

    ret, first_frame = video_capture.read()  
    cal_mask1 = np.zeros_like(first_frame[:, :, 0])
    cal_mask2 = np.zeros_like(first_frame[:, :, 0])

    while True:
        ret, frame = video_capture.read()  
        glob = frame.copy()
        cmask = frame.copy()
        
        # Channel and preset setting
        if ch == 8:
            if 0 <= frame_idx <= 480 or 2415 <= frame_idx <= 4203 or 6140 <=frame_idx<=7925 or 9864 <=frame_idx<=11648 or 13585 <= frame_idx <= 15370 or frame_idx >=17306:
                preset = 1
            elif 559 <= frame_idx <= 2340 or 4275 <= frame_idx<=6064 or 8000 <= frame_idx<=9787 or 11730 <=frame_idx<=13513 or 15450 <=frame_idx<=17237:
                preset = 2
            else:
                preset = 0

            if preset == 1:
                vline1 = p1_vline1
                mid_line = p1_mid_line
                vline2 = p1_vline2

                lane_1 = p1_lane_1
                lane_2 = p1_lane_2
                lane_3 = p1_lane_3
                lane_4 = p1_lane_4
                lane_5 = p1_lane_5
                lane_6 = p1_lane_6

                global_point = p1_global_point

                mask = p1_mask
                
                # Polyline
                cv2.polylines(frame, [lane_1], True, (153,255,255))
                cv2.polylines(frame, [lane_2], True, (255,204,204))
                cv2.polylines(frame, [lane_3], True, (204,255,204))
                cv2.polylines(frame, [lane_4], True, (255,204,255))
                cv2.polylines(frame, [lane_5], True, (153,153,255))
                cv2.polylines(frame, [lane_6], True, (102,255,153))

                frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1)
                frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1)
                frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1)

                p1_flag = True

                view_polygon = np.array([[10, 1920], [380, 250], [680, 250], [1080, 480], [1080, 1920]])
                cal_polygon = np.array([[361, 304], [755, 293], [1076, 480], [1077, 1067], [163, 1051]])

                pg1 = np.array([[382, 347], [359, 347], [236, 833], [272, 832]])  # RT, LT, LB, RB
                pg2 = np.array([[460, 347], [434, 346], [456, 833], [505, 833]])  # LB, RT, LT, RB
                pg3 = np.array([[544, 345], [514, 345], [686, 833], [755, 832]])  # LB, LT, RT, RB
                pg4 = np.array([[630, 342], [598, 343], [924, 829], [991, 829]])  # LB, LT, LB, RB
                pg5 = np.array([[725, 343], [696, 345], [996, 650], [1056, 646]])  # RT, LB, LT, RB
                pg6 = np.array([[798, 340], [761, 340], [1037, 535], [1070, 530]])  # RT, LB, LT, RB

                cv2.fillConvexPoly(cal_mask1, cal_polygon, 1)
                
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1)

                if pg1.size>0:
                    cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA)
                if pg2.size>0:
                    cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA)
                if pg3.size>0:
                    cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA)
                if pg4.size>0:
                    cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA)
                if pg5.size>0:
                    cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA)
                if pg6.size>0:
                    cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA)

            elif preset == 2:
                vline1 = p2_vline1
                mid_line = p2_mid_line
                vline2 = p2_vline2

                lane_1 = p2_lane_1
                lane_2 = p2_lane_2
                lane_3 = p2_lane_3
                lane_4 = p2_lane_4
                lane_5 = p2_lane_5
                lane_6 = p2_lane_6
                global_point = p2_global_point

                mask = p2_mask
                
                # Polyline
                cv2.polylines(frame, [lane_1], True, (153,255,255))
                cv2.polylines(frame, [lane_2], True, (255,204,204))
                cv2.polylines(frame, [lane_3], True, (204,255,204))
                cv2.polylines(frame, [lane_4], True, (255,204,255))
                cv2.polylines(frame, [lane_5], True, (153,153,255))
                cv2.polylines(frame, [lane_6], True, (102,255,153))

                frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1)
                frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1)
                frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1)

                p2_flag = True

                view_polygon = np.array([[284, 649], [0, 1629], [1076, 1574], [1079, 888], [676, 634]])
                cal_polygon = np.array([[896, 778], [244, 794], [105, 1271], [1077, 1245], [1077, 879]])

                pg1 = np.array([[276, 846], [234, 847], [134, 1200], [199, 1198]])  # RT, LT, LB, RB
                pg2 = np.array([[418, 844], [375, 844], [384, 1196], [442, 1198]])  # LB, RT, LT, RB
                pg3 = np.array([[553, 843], [508, 844], [637, 1194], [706, 1194]])  # LB, LT, RT, RB
                pg4 = np.array([[686, 841], [637, 843], [886, 1190], [968, 1189]])  # LB, LT, LB, RB
                pg5 = np.array([[817, 837], [773, 841], [1005, 1051], [1060, 1047]])  # RT, LB, LT, RB
                pg6 = np.array([[966, 837], [919, 840], [1043, 929], [1087, 927]])  # RT, LT, LB, RB

                cv2.fillConvexPoly(cal_mask2, cal_polygon, 1)

                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2)

                if pg1.size > 0:
                    cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA)
                if pg2.size > 0:
                    cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA)
                if pg3.size > 0:
                    cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA)
                if pg4.size > 0:
                    cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA)
                if pg5.size > 0:
                    cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA)
                if pg6.size > 0:
                    cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA)

        elif ch == 4:
            if 0 <= frame_idx <= 1751 or frame_idx >= 3655:
                preset = 1
            elif 1797 <= frame_idx <= 3600:
                preset = 2
            else:
                preset = 0

            if preset == 1:
                lane_1 = p1_lane_1
                lane_2 = p1_lane_2
                lane_3 = p1_lane_3
                lane_4 = p1_lane_4
                lane_5 = p1_lane_5
                lane_6 = p1_lane_6

                global_point = p1_global_point

                mask = p1_mask

                # Polyline
                # cv2.polylines(frame, [lane_1], True, (153,255,255))
                # cv2.polylines(frame, [lane_2], True, (255,204,204))
                # cv2.polylines(frame, [lane_3], True, (204,255,204))
                # cv2.polylines(frame, [lane_4], True, (255,204,255))
                # cv2.polylines(frame, [lane_5], True, (153,153,255))
                # cv2.polylines(frame, [lane_6], True, (102,255,153))

                p1_flag = True

                view_polygon = np.array([[731, 563], [385, 567], [33, 1260], [1077, 1254], [1078, 812]])
                cal_polygon = np.array([[914, 669],[286, 675],  [89, 1083], [1078, 1083], [1078, 772]])

                pg6 = np.array([[346, 686], [313, 686], [163, 992], [244, 996]])  # RT, LT, LB, RB
                pg5 = np.array([[430, 684], [401, 685], [338, 998], [420, 1000]])  # LB, RT, LT, RB
                pg4 = np.array([[534, 685], [506, 685], [547, 999], [631, 999]])  # LB, LT, RT, RB
                pg3 = np.array([[654, 685], [609, 684], [760, 1000], [839, 999]])  # LB, LT, LB, RB
                pg2 = np.array([[770, 685], [723, 684], [979, 999], [1051, 998]])  # RT, LB, LT, RB
                pg1 = np.array([[858, 683], [815, 683], [1031, 860], [1077, 857]])  # RT, LB, LT, RB

                cv2.fillConvexPoly(cal_mask1, cal_polygon, 1)

                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1)

                if pg1.size > 0:
                    cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA)
                if pg2.size > 0:
                    cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA)
                if pg3.size > 0:
                    cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA)
                if pg4.size > 0:
                    cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA)
                if pg5.size > 0:
                    cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA)
                if pg6.size > 0:
                    cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA)

            elif preset == 2:
                lane_1 = p2_lane_1
                lane_2 = p2_lane_2
                lane_3 = p2_lane_3
                lane_4 = p2_lane_4
                lane_5 = p2_lane_5
                lane_6 = p2_lane_6
                global_point = p2_global_point

                mask = p2_mask

                # Polyline
                # cv2.polylines(frame, [lane_1], True, (153,255,255))
                # cv2.polylines(frame, [lane_2], True, (255,204,204))
                # cv2.polylines(frame, [lane_3], True, (204,255,204))
                # cv2.polylines(frame, [lane_4], True, (255,204,255))
                # cv2.polylines(frame, [lane_5], True, (153,153,255))
                # cv2.polylines(frame, [lane_6], True, (102,255,153))

                p2_flag = True

                view_polygon = np.array([[547, 609], [0, 1109], [1, 1271], [1078, 1278], [1079, 594]])
                cal_polygon = np.array([[529, 611], [8, 1105], [1077, 1110], [1078, 599]])

                pg6 = np.array([[556, 609], [493, 607], [108, 1033], [190, 1030]])  # RT, LT, LB, RB
                pg5 = np.array([[693, 604], [642, 602], [356, 1020], [455, 1020]])  # LB, RT, LT, RB
                pg4 = np.array([[812, 633], [765, 633], [604, 1026], [702, 1026]])  # LB, LT, RT, RB
                pg3 = np.array([[932, 638], [882, 636], [883, 1007], [953, 1001]])  # LB, LT, LB, RB
                pg2 = np.array([[1059, 641], [978, 638], [1028, 941], [1079, 916]])  # RT, LB, LT, RB
                pg1 = np.array([])

                cv2.fillConvexPoly(cal_mask2, cal_polygon, 1)

                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2)

                if pg1.size > 0:
                    cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA)
                if pg2.size > 0:
                    cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA)
                if pg3.size > 0:
                    cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA)
                if pg4.size > 0:
                    cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA)
                if pg5.size > 0:
                    cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA)
                if pg6.size > 0:
                    cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA)

        if ret != True:
            break
        t1 = time.time()
        
        cnt_lane_1 = cnt_lane_2 = cnt_lane_3 = cnt_lane_4 = cnt_lane_5 = cnt_lane_6 = 0

        image = Image.fromarray(frame[...,::-1]) #bgr to rgb
        boxs,class_names = yolo.detect_image(image)
        
        #features is 128-dimension vector for each bounding box
        features = encoder(frame,boxs)        
        
        # score to 1.0 here.
        # score 1.0 means that bbox's class score after yolo is 100%
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)

        # detections is object detection result at current frame
        # it may contain many bbox or no bbox
        detections = [detections[i] for i in indices]

        # calculate lane by lane avg speed
        ## swhan method (optical flow)
        mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0,0

        if len(of_track) > 0:
            img0, img1 = prev_gray, frame_gray
            p0 = np.float32([tr[-1] for tr in of_track]).reshape(-1, 1, 2)
            p1, _st, _err = cv2.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params)
            p0r, _st, _err = cv2.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params)
            d = abs(p0-p0r).reshape(-1, 2).max(-1)
            good = d < 1
            new_of_tracks = []
            for tr, (x, y), good_flag in zip(of_track, p1.reshape(-1, 2), good):
                if not good_flag:
                    continue
                tr.append((x, y))
                if len(tr) > track_len:
                    del tr[0]
                new_of_tracks.append(tr)
                cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
            of_track = new_of_tracks

            
            for idx, tr in enumerate(of_track):
                #print(frame_idx, tr)
                if pg1.size > 0:
                    result_pg1 = cv2.pointPolygonTest(pg1, tr[0],True)
                else:
                    result_pg1 = -999
                if pg2.size > 0:
                    result_pg2 = cv2.pointPolygonTest(pg2, tr[0], True)
                else:
                    result_pg2 = -999
                if pg3.size > 0:
                    result_pg3 = cv2.pointPolygonTest(pg3, tr[0], True)
                else:
                    result_pg3 = -999
                if pg4.size > 0:
                    result_pg4 = cv2.pointPolygonTest(pg4, tr[0], True)
                else:
                    result_pg5 = -999
                if pg5.size > 0:
                    result_pg5 = cv2.pointPolygonTest(pg5, tr[0], True)
                else:
                    result_pg5 = -999
                if pg6.size > 0:
                    result_pg6 = cv2.pointPolygonTest(pg6, tr[0], True)
                else:
                    result_pg6 = -999

                if frame_idx % detect_interval == 0:
                    if result_pg1 > 0:
                        ptn1 += 1
                        if preset == 1:
                            mm1 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])   
                        elif preset == 2:
                            mm1 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm1 = mm1/ptn1
                        v1 = mmm1*fps*ms2kmh*6
                    if result_pg2 > 0:
                        ptn2 += 1
                        if preset == 1:
                            mm2 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])           
                        elif preset == 2:
                            mm2 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm2 = mm2/ptn2
                        v2 = mmm2*fps*ms2kmh*6
                    if result_pg3 > 0:
                        ptn3 += 1
                        if preset == 1:
                            mm3 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])           
                        elif preset == 2:
                            mm3 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm3 = mm3/ptn3
                        v3 = mmm3*fps*ms2kmh*6
                    if result_pg4 > 0:
                        ptn4 += 1
                        if preset == 1:
                            mm4 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])           
                        elif preset == 2:
                            mm4 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm4 = mm4/ptn4
                        v4 = mmm4*fps*ms2kmh*6
                    if result_pg5 > 0:
                        ptn5 += 1
                        if preset == 1:
                            mm5 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])           
                        elif preset == 2:
                            mm5 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm5 = mm5/ptn5
                        v5 = mmm5*fps*ms2kmh*6
                    if result_pg6 > 0:
                        ptn6 += 1
                        if preset == 1:
                            mm6 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1])           
                        elif preset == 2:
                            mm6 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1])          
                        mmm6 = mm6/ptn6
                        v6 = mmm6*fps*ms2kmh*6
        
        mask = np.zeros_like(frame_gray)
        mask[:] = 255
        for x, y in [np.int32(tr[-1]) for tr in of_track]:
            cv2.circle(mask, (x, y), 3, 0, -1)
        p = cv2.goodFeaturesToTrack(frame_gray, mask = mask, **feature_params)
        if p is not None:
            for x, y in np.float32(p).reshape(-1, 2):
                of_track.append([(x, y)])
        prev_gray = frame_gray

        ## swhan method
        if frame_idx % detect_interval == 0:
            if ptn1 > 0:
                avg_speed_lane_1 = v1
                prv1=v1
            elif ptn1 == 0:
                avg_speed_lane_1 = 0 
                prv1 = 0
            if ptn2 > 0:
                avg_speed_lane_2 = v2
                prv2=v2
            elif ptn2 == 0:
                avg_speed_lane_2 = 0 
                prv2 = 0
            if ptn3 > 0:
                avg_speed_lane_3 = v3
                prv3=v3
            elif ptn3 == 0:
                avg_speed_lane_3 = 0 
                prv3 = 0
            if ptn4 > 0:
                avg_speed_lane_4 = v4
                prv4=v4
            elif ptn4 == 0:
                avg_speed_lane_4 = 0 
                prv4 = 0
            if ptn5 > 0:
                avg_speed_lane_5 = v5
                prv5=v5
            elif ptn5 == 0:
                avg_speed_lane_5 = 0 
                prv5 = 0
            if ptn6 > 0:
                avg_speed_lane_6 = v6
                prv6=v6
            elif ptn6 == 0:
                avg_speed_lane_6 = 0 
                prv6 = 0
        else:
            avg_speed_lane_1 = prv1 
            avg_speed_lane_2 = prv2 
            avg_speed_lane_3 = prv3 
            avg_speed_lane_4 = prv4 
            avg_speed_lane_5 = prv5 
            avg_speed_lane_6 = prv6 
        
        ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0, 0

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        
        for det in detections:
            bbox = det.to_tlbr()
        
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()

            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3)
            if len(class_names) > 0:
                class_name = class_names[0]
            
            i += 1
            center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2))
            
            # global point matching
            track.matching_point[0] = center[0]
            track.matching_point[1] = center[1]
            
            temp_list = [track.matching_point[0],track.matching_point[1],frame_idx]

            if len(track.matching_point_list) == 4:
                track.matching_point_list.append(temp_list)

                x1 = track.matching_point_list[0][0]
                y1 = track.matching_point_list[0][1]
                x2 = track.matching_point_list[-1][0]
                y2 = track.matching_point_list[-1][1]
            
                # If the pixel don't change, the speed should be zero.
                if x1 == x2 and y1 == y2:
                    track.matching_point_list.pop(0)
                else:
                    time1 = track.matching_point_list[0][2]
                    time2 = track.matching_point_list[-1][2]
            
                    if preset == 1:
                        R_dist1 = convPtoR_1(x1,y1,x2,y2)
                        t_time1 = (time2-time1)/30
                        speed = int(3.6*R_dist1 //t_time1)
                        #print("time1 : ",time1, "time2 : ",time2)

                    elif preset == 2:
                        R_dist1 = convPtoR_2(x1,y1,x2,y2)
                        t_time1 = (time2-time1)/30
                        speed = int(3.6*R_dist1 //t_time1)
                        #print("time1 : ",time1, "time2 : ",time2)
                                       
                track.matching_point_list.pop(0)
                if frame_idx % 6 ==1 :
                    track.speed = speed
                cv2.putText(frame, str(int(track.speed))+'km/h', (int(bbox[0]), int(bbox[1]+((bbox[3]-bbox[1])/2))),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 4)
            elif len(track.matching_point_list) == 0 :
                track.matching_point_list.append(temp_list)
            elif track.matching_point_list[-1][0] != track.matching_point[0] and track.matching_point_list[-1][1] != track.matching_point[1]: 
                track.matching_point_list.append(temp_list)

            cv2.circle(frame, (track.matching_point[0], track.matching_point[1]), 5, (0,0,255),-1)
            
            # traffic lane by lane
            if cv2.pointPolygonTest(lane_1, center, False) >= 0:
                cnt_lane_1 += 1
                track.driving_lane = 1
            elif cv2.pointPolygonTest(lane_2, center, False) >= 0:
                cnt_lane_2 += 1 
                track.driving_lane = 2
            elif cv2.pointPolygonTest(lane_3, center, False) >= 0:
                cnt_lane_3 += 1
                track.driving_lane = 3
            elif cv2.pointPolygonTest(lane_4, center, False) >= 0:
                cnt_lane_4 += 1 
                track.driving_lane = 4
            elif cv2.pointPolygonTest(lane_5, center, False) >= 0:
                cnt_lane_5 += 1 
                track.driving_lane = 5
            elif cv2.pointPolygonTest(lane_6, center, False) >= 0:
                cnt_lane_6 += 1 
                track.driving_lane = 6

            cv2.putText(frame, "ID:"+str(track.track_id) + "/"+ str(track.driving_lane), (int(bbox[0]), int(bbox[1])-20),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

            pts[track.track_id].append(center)
            ctime[track.time].append(time.time())

            thickness = 5
            
            #center point
            cv2.circle(frame,  (center), 1, color, thickness)

            #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness)
                # cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2)
 
        cv2.putText(frame, "Current Object Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2)
        cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3)

        cv2.putText(frame, "Traffic, Avg_speed", (570,40), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_1: " + str(cnt_lane_1)+', '+str(int(avg_speed_lane_1)), (500,80), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_2: " + str(cnt_lane_2)+', '+str(int(avg_speed_lane_2)), (500,120), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_3: " + str(cnt_lane_3)+', '+str(int(avg_speed_lane_3)), (500,160), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_4: " + str(cnt_lane_4)+', '+str(int(avg_speed_lane_4)), (500,200), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_5: " + str(cnt_lane_5)+', '+str(int(avg_speed_lane_5)), (500,240), 0, 1, (0,255,0),  2)
        cv2.putText(frame, "Lane_6: " + str(cnt_lane_6)+', '+str(int(avg_speed_lane_6)), (500,280), 0, 1, (0,255,0),  2)
       
        frame_idx += 1 

        if writeVideo_flag:
            #save a frame
            # cv2.fillPoly(frame, [mask], (0,0,0))
            # for i,v in enumerate(global_point):            
            #     cv2.circle(frame, v, 1, (0,255,255),-1)
            cv2.addWeighted(cmask, alpha, frame, 1 - alpha, 0, frame)
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')                
            fps  = ( fps + (1./(time.time()-t1)) ) / 2

    print(" ")
    print("[Finish]")
    end = time.time()

    video_capture.release()

    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Esempio n. 28
0
        #     av_time = 1
        #     continue
        # else:
        #     av_time +=end
        #     ccnt +=1
        #     continue

        detections = []
        for bbox, score, classe, color, feature in zip(boxes, scores, classes,
                                                       colors, features):
            detections.append(Detection(bbox, score, classe, color, feature))

        # 运行非最大值抑制
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.score for d in detections])
        indices = preprocessing.non_max_suppression(boxes, 1.0, scores)
        detections = [detections[i] for i in indices]

        start_tr = time.time()
        # 追踪器刷新
        tracker.predict()
        tracker.update(detections)
        print("tracker 更新:", float(time.time() - start_tr))

        # 遍历绘制跟踪信息
        track_count = 0
        track_total = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            y1, x1, y2, x2 = np.array(track.to_tlbr(), dtype=np.int32)
def process_frame(dataset):
    # Definition of the parameters
    to_process = {}
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    producer = KafkaProducer(
        bootstrap_servers='master:6667',
        value_serializer=lambda m: json.dumps(m).encode('utf8'))

    data = dataset.collect()

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    dt_now = dt.datetime.now()
    for datum in data:
        event = json.loads(datum[1])
        dt_event = dt.datetime.strptime(event['timestamp'],
                                        '%Y-%m-%dT%H:%M:%S.%f')
        delta = dt_now - dt_event
        print("timestamp = " + str(dt_event))
        if delta.seconds > 5:
            continue
        to_process[event['camera_id']] = event
    for key, event in to_process.items():
        t1 = time.time()
        event = json.loads(datum[1])
        try:
            decoded = base64.b64decode(event['image'])
        except TypeError:
            return

        filename = '/home/haohsiang/Vigilancia-Distributed/codev1frame.jpg'  # I assume you have a way of picking unique filenames
        with open(filename, 'wb') as f:
            f.write(decoded)
        frame = cv2.imread(filename)
        #ret, frame = video_capture.read()  # frame shape 640*480*3
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        print("box_num", len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        #sent result to kafka
        if len(boxs) > 0:
            print(
                str(track.track_id) + ' :' + str(bbox[0]) + ' ' +
                str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3]))
            print(dt.datetime.now().time())
            result = {
                'ID': str(track.track_id),
                'timestamp': dt.datetime.now().isoformat(),
                'location_x': str(bbox[0]),
                'w': str(bbox[2])
            }
            producer.send('position', result)
            producer.flush()

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Esempio n. 30
0
def main(yolo):

    # Determining the FPS of a video having variable frame rate
    # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos
    filename = "clip1.mp4"
    # Determining the total duration of the video
    clip = VideoFileClip(filename)

    cap2 = cv2.VideoCapture(filename)
    co = 0
    ret2 = True
    while ret2:
        ret2, frame2 = cap2.read()
        # Determining the total number of frames
        co += 1
    cap2.release()

    # Computing the average FPS of the video
    Input_FPS = co / clip.duration

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    frame_count = 0

    # Implementing Deep Sort algorithm
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    # Cosine distance is used as the metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture(filename)

    # Define the codec and create a VideoWriter object to save the output video
    out = cv2.VideoWriter(
        'output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS,
        (int(video_capture.get(3)), int(video_capture.get(4))))

    # To calculate the frames processed by the deep sort algorithm per second
    fps = 0.0

    # Initializing empty variables for counting and tracking purpose
    queue_track_dict = {}  # Count time in queue
    queue_track_dict_2 = {}
    alley_track_dict = {}  # Count time in alley
    store_track_dict = {}  # Count total time in store
    latest_frame = {}  # Track the last frame in which a person was identified
    reidentified = {
    }  # Yes or No : whether the person has been re-identified at a later point in time
    plot_head_count_store = []  # y-axis for Footfall Analysis
    plot_head_count_queue = []  # y-axis for Footfall Analysis
    plot_time = []  # x-axis for Footfall Analysis

    # Loop to process each frame and track people
    while True:
        ret, frame = video_capture.read()
        if ret != True:
            break

        maxIntensity = 255.0
        phi = 1
        theta = 1
        newImage1 = (maxIntensity / phi) * (frame /
                                            (maxIntensity / theta))**1.3
        frame = array(newImage1, dtype=uint8)
        cv2.imwrite('testing.jpg', frame)

        if frame_count == 5000:
            break

        head_count_store = 0
        head_count_queue = 0
        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # BGR to RGB conversion
        boxs = yolo.detect_image(image)
        features = encoder(frame, boxs)

        # Getting the detections having score of 0.0 to 1.0
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression on the bounding boxes
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker to associate tracking boxes to detection boxes
        tracker.predict()
        tracker.update(detections)

        # Defining the co-ordinates of the area of interest
        pts3 = np.array([[133, 120], [303, 120], [297, 175], [127, 175]],
                        np.int32)
        pts3 = pts3.reshape((-1, 1, 2))
        pts2 = np.array(
            [[380, 250], [380, 365], [175, 480], [0, 480], [0, 380]], np.int32)
        pts2 = pts2.reshape((-1, 1, 2))  # Queue Area - 1
        pts = np.array([[0, 380], [0, 0], [640, 0], [640, 480], [170, 480],
                        [380, 360], [380, 250]], np.int32)
        pts = pts.reshape((-1, 1, 2))  # Alley Region
        cv2.polylines(frame, [pts], True, (255, 0, 255), thickness=1)
        cv2.polylines(frame, [pts3], True, (248, 197, 39), thickness=2)
        cv2.polylines(frame, [pts2], True, (0, 255, 255), thickness=2)

        # Drawing tracker boxes and frame count for people inside the areas of interest
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test_2 = center_point_inside_polygon(bbox, pts3)
            queue_point_test = center_point_inside_polygon(bbox, pts2)
            alley_point_test = center_point_inside_polygon(bbox, pts)

            # Checking if a person has been reidentified in a later frame
            if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside':
                if track.track_id in latest_frame.keys():
                    if latest_frame[track.track_id] != frame_count - 1:
                        reidentified[track.track_id] = 1

            # Initializing variables incase a new person has been seen by the model
            if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside':
                head_count_store += 1
                if track.track_id not in store_track_dict.keys():
                    store_track_dict[track.track_id] = 0
                    queue_track_dict[track.track_id] = 0
                    queue_track_dict_2[track.track_id] = 0
                    alley_track_dict[track.track_id] = 0
                    reidentified[track.track_id] = 0

            # Processing for people inside the Queue Area - 1
            if queue_point_test == 'inside':
                head_count_queue += 1
                queue_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 255, 255), 2)
                cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15),
                              (int(bbox[2]) + 1, int(bbox[1])), (0, 255, 255),
                              -1)
                wait_time = round(
                    (queue_track_dict[track.track_id] / Input_FPS), 2)
                cv2.putText(frame,
                            str(wait_time) + "s", (int(bbox[0]), int(bbox[1])),
                            5, 0.9, (0, 0, 0), 1)

            # Processing for people inside the Queue Area - 2
            if queue_point_test_2 == 'inside':
                head_count_queue += 1
                queue_track_dict_2[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (248, 197, 39), 2)
                cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15),
                              (int(bbox[2]) + 1, int(bbox[1])), (248, 197, 39),
                              -1)
                wait_time = round(
                    (queue_track_dict_2[track.track_id] / Input_FPS), 2)
                cv2.putText(frame,
                            str(wait_time) + "s", (int(bbox[0]), int(bbox[1])),
                            5, 0.9, (0, 0, 0), 1)

            # Processing for people inside the Alley Region
            if alley_point_test == 'inside':
                alley_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4)
                # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2)

            # Getting the total Store time for a person
            if track.track_id in store_track_dict.keys():
                store_track_dict[track.track_id] = queue_track_dict[
                    track.track_id] + alley_track_dict[track.track_id]

        # Drawing bounding box detections for people inside the store
        for det in detections:
            bbox = det.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test = center_point_inside_polygon(bbox, pts)
            alley_point_test = center_point_inside_polygon(bbox, pts2)

            # if queue_point_test == 'inside' or alley_point_test == 'inside':
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 	2)

        # Video Overlay - Head Count Data at that instant
        cv2.putText(frame, "Count: " + str(head_count_store), (30, 610),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3,
                    cv2.LINE_AA, False)
        cv2.putText(frame, "Count: " + str(head_count_store), (30, 610),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2,
                    cv2.LINE_AA, False)

        # Calculating the average wait time in queue
        total_people = len([v for v in queue_track_dict.values() if v > 0])
        total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0)
        avg_queue_frames = 0
        if total_people != 0:
            avg_queue_frames = total_queue_frames / total_people
        avg_queue_time = round((avg_queue_frames / Input_FPS), 2)

        # Video Overlay - Average Wait Time in Queue
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's',
                    (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0),
                    3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's',
                    (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,
                    (0, 255, 77), 2, cv2.LINE_AA, False)

        # Calculating the average wait time in the store
        total_people = len(store_track_dict)
        total_store_frames = sum(store_track_dict.values())
        avg_store_frames = 0
        if total_people != 0:
            avg_store_frames = total_store_frames / total_people
        avg_store_time = round((avg_store_frames / Input_FPS), 2)

        # Video Overlay - Average Store time
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's',
                    (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0),
                    3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's',
                    (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,
                    (0, 255, 77), 2, cv2.LINE_AA, False)

        # Write the frame onto the VideoWriter object
        out.write(frame)

        # Calculating the frames processed per second by the model
        fps = (fps + (1. / (time.time() - t1))) / 2
        frame_count += 1

        # Printing processing status to track completion
        op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(
            round(fps, 2))
        print("\r" + op, end="")

        # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now)
        if frame_count % 50 == 0:
            plot_time.append(round((frame_count / Input_FPS), 2))
            plot_head_count_store.append(head_count_store)
            plot_head_count_queue.append(head_count_queue)

        # Press Q to stop the video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Defining data to be written into the csv file - Detailed Report
    csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time']
    csv_data = []
    csv_row = {}
    detailed_csv_file = 'Detailed_Store_Report.csv'
    queue_unique_count = 0
    for k, v in store_track_dict.items():
        csv_row = {}
        csv_row = {
            csv_columns[0]: k,
            csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2),
            csv_columns[2]: round((v / Input_FPS), 2)
        }
        if round((queue_track_dict[k] / Input_FPS), 2) > 1:
            queue_unique_count += 1
            csv_data.append(csv_row)

    # Writing the data into the csv file - Detailed Report
    with open(detailed_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in csv_data:
            writer.writerow(data)

    # Defining data to be written into the csv file - Detailed Report
    csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time']
    csv_data = []
    csv_row = {}
    detailed_csv_file = 'Detailed_Store_Report_2.csv'
    queue_unique_count = 0
    for k, v in store_track_dict.items():
        csv_row = {}
        csv_row = {
            csv_columns[0]: k,
            csv_columns[1]: round((queue_track_dict_2[k] / Input_FPS), 2),
            csv_columns[2]: round((v / Input_FPS), 2)
        }
        if round((queue_track_dict_2[k] / Input_FPS), 2) > 1:
            queue_unique_count += 1
            csv_data.append(csv_row)

    # Writing the data into the csv file - Detailed Report
    with open(detailed_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in csv_data:
            writer.writerow(data)

    # Defining data to be written into the csv file - Brief Report
    csv_columns_brief = [
        'Total Head Count', 'Total Queue Time', 'Average Queue Time',
        'Total Store Time', 'Average Store Time'
    ]
    brief_csv_file = 'Brief_Store_Report.csv'
    csv_data_brief = {
        csv_columns_brief[0]:
        queue_unique_count,
        csv_columns_brief[1]:
        round((sum(queue_track_dict.values()) / Input_FPS), 2),
        csv_columns_brief[2]:
        avg_queue_time,
        csv_columns_brief[3]:
        round((sum(store_track_dict.values()) / Input_FPS), 2),
        csv_columns_brief[4]:
        avg_store_time
    }

    # Writing the data into the csv file - Brief Report
    with open(brief_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief)
        writer.writeheader()
        writer.writerow(csv_data_brief)

    # Releasing objects created
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()
Esempio n. 31
0
def main(yolo):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
       # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            
        cv2.imshow('', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        print("fps= %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()