Python Tracker.predictの例

プログラミング言語: Python

名前空間/パッケージ名: deep_sort.tracker

クラス/型: Tracker

メソッド/関数: predict

hotexamples.comのコード掲載数: 31

Python Tracker.predict - 31件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdeep_sort.tracker.Tracker.predictの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Tracker(30)

predict(30)

update(30)

consume(2)

delete_all(2)

_next_id(2)

to_tlwh(1)

reset_tracks(1)

reset(1)

predict_ns(1)

get_viewable_tracks(1)

is_confirmed(1)

get_results(1)

get_next_id(1)

get_deleted_on_last_update(1)

appear_time(1)

update_events(1)

コード例 #1

ファイルを表示

ファイル: demo.py プロジェクト: zhaoyu19920930/deep_sort_yolov3

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov3.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxs = yolo.detect_image(image)[0]
        confidence = yolo.detect_image(image)[1]

        features = encoder(frame, boxs)

        detections = [
            Detection(bbox, confidence, feature)
            for bbox, confidence, feature in zip(boxs, confidence, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0,
                        5e-3 * 130, (0, 255, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

コード例 #2

ファイルを表示

def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    ## Retrieving object moving sequence from text file generated from record_positions.py
    object_move_pos_strings = []
    object_move_pos = []

    #Retreive object pos from .txt file.
    with open("/home/pierre/MasterThesisObjectTracking/positions.txt", 'r') as object_pos_file:
        for line in object_pos_file:
            pos = str.split(line)
            object_move_pos_strings.append(pos)
        object_pos_file.close()


    #Convert positions to integers
    for element in object_move_pos_strings:
        ints = [int(item) for item in element]
        object_move_pos.append(ints)
    
    #Boolean for when the object mover sequence is still running
    still_moving = True


    #Boolean that turns true when object has finished to move
    end_trial = False

    moving_trigger = 0

    
    
    #Dynmixel setup
    # connecting
    Ax12.open_port()
    Ax12.set_baudrate()

    #Declaring servomotor for pan and tilt
    camera_panning_motor = Ax12(1)
    camera_tilt_motor = Ax12(2)


    # Declaring servomotor for object
    object_pan1 = Ax12(3)
    object_tilt1 = Ax12(4)
    object_tilt2 = Ax12(5)
    object_pan2 = Ax12(6)

    start_pan = 500
    start_tilt = 230

    # define video codec
    fourcc = cv2.VideoWriter_fourcc(*'XVID')


    bound_box = [0,0,0,0]

    frames_per_second = 0


    time.sleep(0.2)

    #Set start position
    camera_panning_motor.set_position(start_pan)
    camera_tilt_motor.set_position(start_tilt)

    #Set start position for object
    move_object_motors(object_move_pos[0], speed= 500)

    camera_panning_motor.set_torque_limit(1023)
    camera_tilt_motor.set_torque_limit(1023)

    camera_panning_motor.set_moving_speed(1000)
    camera_tilt_motor.set_moving_speed(1000)

    one_degree = int(camera_panning_motor.get_torque_limit()/300)

    # Create class instance of second counter
    main_count = timer_sec.SecondCounter()

    # Create class instance of second counter when tracker is in ROI
    roi_count = timer_sec.Counter_tread()

    roi_seconds = 0

    returned_roi_seconds = 0

    in_roi = False
    been_in_roi = False
    first_entry = True
    start_count = True
    left_roi = False
    roi_seconds_start = 0
    roi_seconds_stop = 0

    last_move_time = 0


    roi = 35
    margin = 70

    tracksuccess = False
    track_lost = False
    refound = 0

    first_detection = True


    #Count for iterating in object position list
    next_object_pos = 0

    object_speed = 100

    move_trigger = 0

    #Function that returns center of ROI
    def goalPosition (bbox):
        x, y = int(bbox[0]), int(bbox[1])
        w = int(bbox[2]) - int(bbox[0])  
        h = int(bbox[3]) - int(bbox[1])
        center_x = int(x + w/2)
        center_y = int(y + h/2)
        center = [center_x, center_y]
        return center

    # Function that calculates distance between center of frame and center of ROI
    def calculateDistance(cam_center, track_center):
        ROI = False
        x_diff = cam_center[0] - track_center[0]
        y_diff = cam_center[1] - track_center[1]
        difference = [x_diff, y_diff]
        if abs(x_diff) <= roi and abs(y_diff) <= roi:
            ROI = True
        return difference, ROI


    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))


    import os
    import shutil




    output_dir = "{}/outputs".format(os.getcwd())

    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)

    frames_dir = "{}/frames".format(output_dir)
    text_dir = "{}/txt_files".format(output_dir)
    video_dir = "{}/video".format(output_dir)
    os.makedirs(frames_dir)
    os.makedirs(text_dir)
    os.makedirs(video_dir)

    # Create output
    return_value, frame = vid.read()
    videOutput = cv2.VideoWriter("{}/output.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0]))
    videoOutput_no_box = cv2.VideoWriter("{}/output_no_graphics.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0]))

    frame_num = 0
    # start the main count of seconds
    main_count.start()
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num +=1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        no_graphics = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        videoOutput_no_box.write(no_graphics)

        cv2.imwrite("{}/frame_{}.jpg".format(frames_dir,frame_num), no_graphics)

        #Checks if where still moving positions for the object
        if next_object_pos < len(object_move_pos):
            print(f'objec_move_pos length = {len(object_move_pos)}')
            still_moving = True
        else:
            print("Object moving sequence is finished")
            if still_moving:
                move_finished_time = main_count.peek()
            still_moving = False


        if still_moving:
            move_object_motors(object_move_pos[next_object_pos], speed= object_speed)

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        #allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['cell phone', 'cup']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]       

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        #Drawing a circle in the center of the frame
        camera_center = [int(frame.shape[1]/2), int(frame.shape[0]/2)]
        cv2.circle(frame, (camera_center[0], camera_center[1]), radius= 1, color= (0, 255, 0), thickness= 10)


        distance = [0,0]
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                track_lost = True
                tracksuccess = False
                bound_box = [0, 0, 0, 0]
                continue 
            bbox = track.to_tlbr()
            class_name = track.get_class()



            tracksuccess = True
            if tracksuccess and track_lost:
                refound +=1
                track_lost = False

            first_detection = False
            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            
            #Calulate center of bounding box and draw circle
            center = goalPosition(bbox)
            cv2.circle(frame, (center[0], center[1]), radius =0, color= color, thickness=10)



            print("Bbox: {0}" .format(bbox))

            #Storing value for the output.csv file
            bound_box = bbox


            distance, in_roi = calculateDistance(camera_center, center)

            moveMotors(distance[0], distance[1], camera_panning_motor, camera_tilt_motor, roi, margin) #difference in x-cordinates rescpeticly y-cordinates

            

        # if enable info flag then print details about each track
            if FLAGS.info:
                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))


        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)

        frames_per_second = fps

        #Fps text
        cv2.putText(frame, "FPS {0}".format(str(int(fps))), (75, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

          #moving interval in seconds
        moving_interval = 3
        if still_moving:
            if main_count.peek() > 1 and main_count.peek() < 3:
                move_trigger = main_count.peek()
            elif main_count.peek() > move_trigger +0.02:
                move_trigger += moving_interval
                next_object_pos += 1
        else:
            print("Object move coreography is finished")
            if main_count.peek() > move_finished_time + 2:
                end_trial = True

        ##Starts timer when the ROI is centered
        if in_roi and tracksuccess:
            if start_count:
                roi_count.start()
                start_count = False
            if first_entry:
                roi_seconds = roi_count.peek()
            if left_roi:
                roi_seconds_start = roi_count.peek()
            been_in_roi = True
            left_roi = False

        ##Stops timer when ROI is not centered and accumulate time in counter
        if been_in_roi and tracksuccess and in_roi is False :
            print("roi_stop: {}".format(roi_seconds_stop))
            if first_entry:
                roi_seconds_stop = 0
            else:
                roi_seconds_stop = roi_count.peek() - roi_seconds_start
            roi_seconds += roi_seconds_stop
            been_in_roi = False
            first_entry = False
            left_roi = True


       


        ## Graphics to show timer counters

        cv2.putText(result, "Sec {0}".format(str(int(main_count.peek()))), (500, 50), cv2.FONT_HERSHEY_COMPLEX, 1,
                (0, 255, 255), 2)
        cv2.putText(result, "Sec in ROI {0}".format(str(int(roi_seconds))), (400, 85), cv2.FONT_HERSHEY_COMPLEX, 1,
                (254, 0, 255), 2)

        #cv2.imshow("Frame", frame)

        videOutput.write(result)

        mean_roi_sec = round((roi_seconds / main_count.peek()) * 100, 2)

        camera_pos = [camera_panning_motor.get_position(), camera_tilt_motor.get_position()]

        object_pos = [Ax12(3).get_position(),Ax12(4).get_position(),Ax12(5).get_position(),Ax12(6).get_position()]



        #Create a csv to store results
        with open('outputs/output.csv', 'a', newline='') as csvfile:
            fieldnames = ['Frame', 'FPS', 'Distance_x', 'Distance_y', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax',
                      'Prop_roi(%)', 'Time', 'Roi_time', 'In_roi', 'Tracking_success', 'Refound_tracking', 'camera_pan',
                      'camera_tilt', 'object_pan1', 'object_tilt1', 'object_tilt2', 'object_pan2', 'img_center_x', 'img_center_y']
            csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            #Fill in values in csv file
            if frame_num ==1:
                csv_writer.writeheader()
            csv_writer.writerow({'Frame': str(frame_num), 'FPS': str(int(frames_per_second)) ,'Distance_x': str(distance[0]), 'Distance_y': str(distance[1]),
                                'bbox_xmin': str(int(bound_box[0])), 'bbox_ymin': str(int(bound_box[1])), 'bbox_xmax': str(int(bound_box[2])),
                                'bbox_ymax': str(int(bound_box[3])), 'Prop_roi(%)': str(mean_roi_sec), 'Time': str(round(main_count.peek(), 2)),
                                'Roi_time': str(round(roi_seconds,2)), 'Tracking_success': str(tracksuccess),
                                'Refound_tracking': str(refound-1), 'camera_pan': str(camera_pos[0]),
                                'camera_tilt':str(camera_pos[1]), 'object_pan1':str(object_pos[0]),
                                'object_tilt1':str(object_pos[1]), 'object_tilt2':str(object_pos[2]),
                                'object_pan2':str(object_pos[3]), 'img_center_x': str(camera_center[0]),
                                 'img_center_y': str(camera_center[1])})
            csvfile.close()





        
        
        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)
        
        
        
        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q') or end_trial: break
    cv2.destroyAllWindows()
    videOutput.release()
    videoOutput_no_box.release()
    Ax12.close_port()
    # stop the count and get elapsed time
    main_seconds = main_count.finish()
    roi_count.finish()

コード例 #3

ファイルを表示

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 0.7

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    webcam_flag = False

    # some links from earthcam https://github.com/Crazycook/Working/blob/master/Webcams.txt    https://www.vlcm3u.com/web-cam-live/
    video_url = 'https://videos3.earthcam.com/fecnetwork/9974.flv/chunklist_w1421640637.m3u8'  # NYC

    # video_url = 'https://videos3.earthcam.com/fecnetwork/hdtimes10.flv/chunklist_w48750913.m3u8'

    if webcam_flag:
        video_capture = cv2.VideoCapture(0)
    else:
        video_capture = cv2.VideoCapture()
        video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2)
        video_capture.open(video_url)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, np.array(boxs)[:, 0:4].tolist())

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        #Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, str(det.score), (int(bbox[0]), int(bbox[1])), 0,
                        5e-3 * 200, (0, 255, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #4

ファイルを表示

ファイル: main.py プロジェクト: darrenclover/yolo3sort

def main(yolo):

    start = time.time()
    #Definition of the parameters
    max_cosine_distance = 0.5  #0.9 余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3  #非极大抑制的阈值

    counter = []
    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    #video_path = "../../yolo_dataset/t1_video/test_video/det_t1_video_00025_test.avi"
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(
            './output/' + args["input"][43:57] + "_" + args["class"] +
            '_output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs, class_names = yolo.detect_image(image)
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            #boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                        (color), 2)
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                            (color), 2)

            i += 1
            #bbox_center_point(x,y)
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            #track_id[center]
            pts[track.track_id].append(center)
            thickness = 5
            #center point
            cv2.circle(frame, (center), 1, color, thickness)

            #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (color), thickness)
                #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2)

        count = len(set(counter))
        cv2.putText(frame, "Total Object Counter: " + str(count),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(i),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)
        cv2.namedWindow("YOLO3_Deep_SORT", 0)
        cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO3_Deep_SORT', frame)

        if writeVideo_flag:
            #save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = (fps + (1. / (time.time() - t1))) / 2
        #print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")

    video_capture.release()

    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #5

ファイルを表示

ファイル: object_tracker.py プロジェクト: raushankumar586/tracking_robot_

def main(_argv):
    # Definition of the parameters
    rospy.init_node('tracker', anonymous=True)
    rospy.Subscriber("/new_image_raw", Image, callback)
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    #vid = cv2.VideoCapture(0)
    # vid = cv_image

    #cv_image = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = 720
        height = 862
        fps = 2
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0

    while True:
        #_, img = vid.read()

        img = cv_image

        #cv2.imshow("loading image", cv_image)
        #image is comming over here fine

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        print("shape = {} , ".format(img.shape))
        print("dtype = {} , ".format(img.dtype))
        #img = np.array(img, dtype=np.uint16)
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in, steps=1)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        #cv2.imshow("output", img.astype('float32'))
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    # vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()

    cv2.destroyAllWindows()

コード例 #6

ファイルを表示

ファイル: DeepSort41person.py プロジェクト: jebeda/video

        ##If error occurs, need to set if statement for zero cases of detection
        detections = [
            Detection(box, det_class,
                      feature) for box, det_class, feature in zip(
                          personDetected_box, det_classes, features)
        ]

        detections_box = np.array([dlc.tlwh for dlc in detections])
        detections_score = np.array([dlc.confidence for dlc in detections])
        detections_indices = preprocessing.non_max_suppression(
            detections_box, nms_max_overlap, detections_score)
        detections = [detections[i] for i in detections_indices]

        #
        tracker.predict()
        tracker.update(detections)

        cv2.putText(frame, str(frameIdx), (0, 30), 0, 5e-3 * 200, (0, 255, 0),
                    2)  #To check frame number
        '''
        Support Visualization
        
        '''
        for track in tracker.tracks:
            bbox = [max(0, int(x)) for x in track.to_tlbr()]
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                          (0, 0, 255), 3)
            cv2.putText(frame, str(track.track_id), (bbox[0], bbox[1] + 30), 0,
                        5e-3 * 200, (0, 255, 0), 2)
        #frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

コード例 #7

ファイルを表示

ファイル: run_mot.py プロジェクト: Cris-zj/mmdetection

class VideoStructor(object):
    def __init__(self, detector, extractor, cfg):
        self.detector = detector
        self.extractor = extractor
        self.cfg = cfg
        self.tracker = Tracker(self.cfg.tracktor.metric)
        self.results = {}

    def init_tracker(self):
        self.tracker = Tracker(self.cfg.tracktor.metric)
        self.results = {}

    def __call__(self):
        pass

    def step(self, frame):
        img = cv2.imread(frame.filename)

        det_cfg = self.cfg.tracktor.detection
        if det_cfg.default:
            bbox_result = frame.detections[:, 2:7]
            bbox_result[:, 2:4] += bbox_result[:, :2]
        else:
            bbox_result, other_cls_result = self.get_bboxes(img)

        valid_inds = bbox_result[:, 4] > det_cfg.min_conf
        bbox_result = bbox_result[valid_inds]
        _, keep_inds = nms(bbox_result, det_cfg.nms_iou_thr)
        bbox_result = bbox_result[keep_inds]

        features = self.get_features(img, bbox_result)

        bbox_result[:, 2:4] -= bbox_result[:, :2]
        detections = []
        for bbox, feat in zip(bbox_result, features):
            detections.append(Detection(bbox[0:4], bbox[4], feat))

        self.tracker.predict()
        self.tracker.update(detections)

        frame_results = self.tracker.get_results()
        frame_results = frame_results if frame_results else np.zeros((0, 5))
        self.results[frame.frame_idx] = np.array(frame_results)

    def get_bboxes(self, img):
        bbox_result = self.detector(img)

        needed_labels = self.cfg.detection.labels
        dets_for_reid = bbox_result[needed_labels[0] - 1]
        other_dets = []
        for i in needed_labels[1:]:
            other_dets.append(bbox_result[i - 1])

        return dets_for_reid, tuple(other_dets)

    def get_features(self, img, bbox):
        if bbox.shape[1] <= 5:
            features = self.extractor(img, bbox[:, 0:4]).cpu().numpy()
        else:
            features = bbox[:, 5:]
        return features

    def get_results(self):
        return self.results

コード例 #8

ファイルを表示

ファイル: kp_ds.py プロジェクト: wss321/mot

def main(video_path=0, save_path=None):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    # openpose
    model = 'mobilenet_thin'
    e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368))
    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    video = Video(video_path)
    # video_capture = cv2.VideoCapture()  # 'data/person.mp4'
    w = video.w
    h = video.h
    tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w ** 2 + h ** 2)))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(save_path, fourcc, 7, (w, h))
    list_file = open(output_dir + 'detection.txt', 'w')
    frame_index = -1
    fps = 0.0

    while True:
        ret, frame = video.video_capture.read()  # frame shape 640*480*3
        if ret is not True or frame is None:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        # boxes, scores, _ = yolo.detect_image(image)
        humans = get_keypoints(frame, e)
        boxes = []
        for human in humans:
            bbox = human.get_upper_body_box(w, h, 1)
            if bbox is not None:
                bbox['w'] = max(bbox['w'], 6)
                bbox['h'] = max(bbox['h'], 6)
                boxes.append([bbox['x'], bbox['y'], bbox['w'], bbox['h']])
        features = encoder(frame, boxes)  # 提取到每个框的特征
        detections = [Detection(bbox_and_feature[0], 1, bbox_and_feature[1]) for idx, bbox_and_feature in
                      enumerate(zip(boxes, features))]  # 保存到一个类中

        # Call the tracker
        tracker.predict()
        tracker.update(detections, np.asarray(image))

        frame = draw_humans(frame, humans, imgcopy=False)
        for index, track in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                        5e-3 * 200, track.color, 2)

        # for det in detections:
        #     bbox = det.to_tlbr()
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if save_path is not None:
            # Define the codec and create VideoWriter object

            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxes) != 0:
                for i in range(0, len(boxes)):
                    list_file.write(
                        str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(
                            boxes[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        # print("fps= %f" % fps)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.video_capture.release()
    if save_path is not None:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #9

ファイルを表示

def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = os.getenv(
        'HOME'
    ) + '/st_mini/src/scout_mini_ros/scout_bringup/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    if use_webcam:
        video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    if use_webcam:
        try:
            vid = cv2.VideoCapture(int(video_path))
        except:
            vid = cv2.VideoCapture(video_path)

    # 로봇 모터 제어를 위한 초깃값 설정
    x = 0
    y = 0
    z = 0
    th = 0
    speed = 0.7
    turn = 1

    # 변수 추가
    cx, cy, h = 0, 0, 0
    frame_num = 0

    # Depth camera class 불러오기
    if not use_webcam:
        dc = DepthCamera()

    # 장애물 영역 기본값 받아오기
    default = Default_dist()

    # ROS class init
    go = scout_pub_basic()
    rate = rospy.Rate(60)

    # 타겟 아이디 초깃값(sub 객체가 있는 사람 id를 아래에서 할당 예정)
    target_id = False

    # while video is running
    while not rospy.is_shutdown():
        if use_webcam:
            return_value, frame = vid.read()
        else:
            # depth camera 사용
            return_value, depth_frame, frame = dc.get_frame()

        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1

        if not use_webcam:
            # 장애물 회피를 위한 ROI 디폴트 세팅하기 (현재는 10프레임만) 추가
            if frame_num < 11:
                default.default_update(depth_frame)
                continue

        print('Frame #: ', frame_num)
        # frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person', 'cell phone', 'tie', 'stop sign']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # <st-mini 제어를 위한 Publisher code>
        go.update(x, y, z, th, speed, turn)
        go.sendMsg()

        # 좌/우 회전 한곗값 설정
        left_limit = frame.shape[1] // 2 - 100
        right_limit = frame.shape[1] // 2 + 100

        # # 좌/우 회전 속도 증가 구간 설정
        # left_max = frame.shape[1]//4
        # right_max = (frame.shape[1]//4)*3

        # 로봇의 최대,최소 속도 설정
        # <!--선속도--!>
        max_speed = 0.8
        min_speed = 0.5

        # <!--각속도--!>
        max_turn = 1.3
        min_turn = 0.8

        # <!-┌---------------------------------- sub 객체를 이용한 person id 할당하기 -----------------------------------┐!>
        # track id list 만들기
        track_id_list = []

        # person id to bbox dict 만들기
        person_id_to_bbox = {}
        """아래와 같은 형식의 데이터
        person_id_to_bbox = {
            1 : [100,200,300,400] # id : bbox
        }
        """

        # recognition bbox list 만들기
        recognition_bbox_list = []

        # 인식용 객체(인식용 객체가 사람 bbox 안에 있다면 그 사람의 id를 계속 추적, 이후 target lost를 하게 되면 동일하게 인식 가능)
        recognition_object = 'tie'

        # track id list 만들기
        for track in tracker.tracks:
            class_name = track.get_class()
            track_id = track.track_id
            bbox = track.to_tlbr()

            # track id list 만들기
            track_id_list.append(track.track_id)

            if class_name == 'person': person_id_to_bbox[track_id] = bbox
            elif class_name == recognition_object:
                recognition_bbox_list.append(bbox)

        print('person_id_to_bbox: {}, recognition_bbox_list: {}'.format(
            person_id_to_bbox, recognition_bbox_list))
        # person bbox 내부에 recognition object bbox가 있으면 해당 person id를 계속 추적하도록 target_id 할당
        if target_id == False:  # False라면 사람 id를 할당해야한다.
            # try:
            for person_id, person_bbox in person_id_to_bbox.items():
                # person_bbox = list(map(int, person_bbox))
                for rec_bbox in recognition_bbox_list:
                    # rec_bbox = list(map(int, rec_bbox))
                    # if person_bbox[0] <0: person_bbox[0] = 0
                    # elif person_bbox[1] < 0: person_bbox[1] = 0
                    # elif person_bbox[2] > frame.shape[0]: person_bbox[2] = frame.shape[0]
                    # elif person_bbox[3] > frame.shape[1]: person_bbox[3] = frame.shape[1]
                    if rec_bbox[0] >= person_bbox[0] and rec_bbox[
                            1] >= person_bbox[1] and rec_bbox[2] <= person_bbox[
                                2] and rec_bbox[3] <= person_bbox[3]:
                        target_id = person_id
                        print('target id 할당 성공')
                    else:
                        target_id = False
                        print('target id 할당 실패')

        print('target_id: ', target_id)
        # <!-└--------------------------------- sub 객체를 이용한 person id 할당하기 ----------------------------------┘!>

        # 추적 알고리즘
        if len(tracker.tracks) == 0:  # 추적할 객체가 없다면 정지
            key = 'stop'
            print('There are no objects to track.')
        else:  # 추적할 객체가 있다면 동작
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                track_id = track.track_id
                bbox = track.to_tlbr()
                class_name = track.get_class()

                if target_id == track_id and class_name == 'person':
                    # cx, cy 계산 추가
                    w, h = int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1])
                    cx, cy = int(w / 2 + bbox[0]), int(h / 2 + bbox[1])

                    # 사람과 로봇의 거리 person_distance
                    person_distance = person_dist(depth_frame, cx, cy, h)
                    print('사람과의 거리: ', person_distance)

                    # 직진 안전 구간 최대/최소값
                    stable_max_dist = 2500
                    stable_min_dist = 2000

                    if person_distance < stable_min_dist:
                        print('Too Close')
                        key = 'stop'
                    else:
                        """
                        depth값 활용
                        1. Target과의 거리[전진]
                        1) 적정거리: 1.5m ~ 2.0m --> linear.x = 1
                        2) 위험거리: ~1.5m       --> linear.x = 0
                        3) 추격거리: 2.0m~       --> linear.x += 0.2 (적정거리가 될 때까지)

                        2. Target의 중심점을 이용해 좌우 회전
                        1) 중심점 cx, cy는 아래와 같이 구할 수 있다.
                        width = bbox[2] - bbox[0]
                        height = bbox[3] - bbox[1]
                        cx = int(width/2 + bbox[0])
                        cy = int(height/2 + bbox[1])
                        좌우 판단이기 때문에 cx만 사용.

                        2) cx값 설정 중 주의 사항
                        target이 화면 밖으로 점점 나갈수록 bbox의 좌측 상단 x좌표는 음수 혹은 frame width(여기서는 640)보다 커질 수 있다.
                        즉, cx의 값을 설정할 때 bbox[0]값이 음수 또는 640을 초과하면 좌우 회전 즉시 실시해야 함.

                        depth camera를 통해 장애물 유무를 먼저 판단하고 없다면 Target과의 거리/방향 측정 후 최종 발행값 결정.
                        """

                        # 좌/우 회전 속도 증가 구간 설정
                        left_max = frame.shape[1] // 4
                        right_max = (frame.shape[1] // 4) * 3

                        max_speed = 1.5
                        min_speed = 1.2
                        """
                        정지 조건(3/3): 장애물이 있을 때, 정확히는 정지가 아닌 회피 기동
                        obstacle_detect 함수는 장애물이 있을때만 key에 값을 할당한다.
                        """
                        # 장애물 회피용 ROI distance로 left, right string 받아오기
                        # if person_distance > stable_max_dist:
                        # key = obstacle_detect(default, depth_frame)
                        cv2.rectangle(frame, (cx + 10, cy - (h // 5) + 10),
                                      (cx - 10, cy - (h // 5) - 10),
                                      (255, 0, 0), 5)
                        # 장애물이 없다면 사람 따라가기
                        # if key == None:
                        key, speed, turn = drive(cx, left_limit, right_limit,
                                                 turn, speed)
                        # key,speed,turn = drive2(cx, left_limit, right_limit, turn, frame, speed, max_speed, max_turn)
                        # key,speed,turn = drive3(cx, left_limit, right_limit, turn, frame, speed, max_speed, min_speed, max_turn, stable_min_dist, stable_max_dist, person_distance, start_speed_down=300)

                # draw bbox on screen
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    frame, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(frame, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
        # 주행 알고리즘(drive)를 거치고 나온 속도/방향을 로봇에 전달
        x, y, z, th, speed, turn = key_move(key, x, y, z, th, speed, turn)

        print('key: ', key)
        print('key_type: ', type(key))
        print('x: {}, y: {}, th: {}, speed: {}, turn: {}'.format(
            x, y, th, speed, turn))

        # 화면 중심 표시
        cv2.circle(frame, (320, 240), 10, (255, 255, 255))

        # 좌우 회전 구분선 그리기
        cv2.line(frame, (left_limit, 0), (left_limit, frame.shape[0]),
                 (255, 0, 0))
        cv2.line(frame, (right_limit, 0), (right_limit, frame.shape[0]),
                 (255, 0, 0))

        # ROS Rate sleep
        rate.sleep()

        print('track id list: ', track_id_list)
        '''
        box_center_roi = np.array((depth_frame[cy-10:cy+10, cx-10:cx+10]),dtype=np.float64)
        cv2.rectangle(frame, (cx-10, cy+10), (cx+10, cy-10), (255, 255, 255), 2)
        '''

        safe_roi = np.array([[400, 400], [240, 400], [160, 480], [480, 480]])
        #safe_roi = np.array([[240, 420], [400, 420], [480, 160], [480, 480]])
        cv2.polylines(frame, [safe_roi], True, (255, 255, 255), 2)
        cv2.rectangle(frame, (205, 445), (195, 435), (255, 0, 0), 5)
        cv2.rectangle(frame, (245, 405), (235, 395), (255, 0, 0), 5)
        cv2.rectangle(frame, (405, 405), (395, 395), (255, 0, 0), 5)
        cv2.rectangle(frame, (445, 445), (435, 435), (255, 0, 0), 5)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # depth map을 칼라로 보기위함
        # depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_frame, alpha=0.03), cv2.COLORMAP_JET)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)

        if cv2.waitKey(1) & 0xFF == ord('q'): break
    dc.release()
    cv2.destroyAllWindows()

コード例 #10

ファイルを表示

ファイル: demo.py プロジェクト: Pedestrian1671022/deep_sort_yolov3_2

def main():

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'model_data/input_video.avi'
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('model_data/output_yolov3.mp4', fourcc, 25, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    net = load_net(b"model_data/yolov3-tiny.cfg",
                   b"model_data/yolov3-tiny.weights",
                   0)
    meta = load_meta(b"model_data/coco.data")

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
             break

        t1 = time.time()

        # frame = cv2.resize(frame, (416, 416))
        boxes, confidences = detect(net, meta, frame)
        print(boxes)
        # print(boxes, confidences)
        boxes = [convertBack(float(box[0]), float(box[1]), float(box[2]), float(box[3])) for box in boxes]
        # boxes, confidences = yolo.detect_image(image)
        # print(boxes)
        # print(confidences)

        # boxes = np.array(boxes)
        # confidences = np.array(confidences)
        #
        # # Run non-maxima suppression.
        # indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, confidences)
        #
        # boxes = [boxes[i] for i in indices]
        # confidences = [confidences[i] for i in indices]

        # time1 = time.time()

        features = encoder(frame, boxes)

        # time2 = time.time()

        detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxes, confidences, features)]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # time3 = time.time()
        
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0,255,0),2)
            
        cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        fps = (fps + (1./(time.time()-t1))) / 2
        print("FPS = %f"%(fps))

        # time4 = time.time()
        #
        # time_sum = time4 - t1
        # print("time:", (time1-t1)/time_sum, (time2-time1)/time_sum, (time3-time2)/time_sum, (time4-time3)/time_sum)
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

コード例 #11

ファイルを表示

ファイル: videowriter.py プロジェクト: farhantandia/Applied-CV-Zero-DCE-master

def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # load configuration for object detector
    input_size = FLAGS.size
    video_path = FLAGS.video

    saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc("XVID")
        out = cv2.VideoWriter("demo.mp4", codec, fps, (width, height))
        frame_index = -1

    # while video is running
    # swimming re-id
    model = load_model('best_model_triplet.hdf5', compile=False)
    le = pickle.loads(open('le.pickle', "rb").read())
    data = pickle.loads(open('recognizer.pikle', "rb").read())
    img_count = 0
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        batch_data = tf.constant(image_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)

        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                      zip(bboxes, scores, names, features)]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        flag_ok = 0
        # map_reid = {}
        if img_count < 30 or img_count % 30 == 0:
            map_reid = reid(tracker, data, frame, model, le)
        img_count += 1
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            try:
                # ID_fix = str(map_reid[track.track_id])
                # color = colors[int(ID_fix) % len(colors)]
                # color = [i * 255 for i in color]

                if map_reid[track.track_id] == 0:
                    color = (0, 0, 0)
                elif map_reid[track.track_id] == 1:
                    color = (255, 0, 0)
                elif map_reid[track.track_id] == 2:
                    color = (255, 240, 0)
                elif map_reid[track.track_id] == 3:
                    color = (0, 255, 0)
                elif map_reid[track.track_id] == 4:
                    color = (0, 255, 255)
                elif map_reid[track.track_id] == 5:
                    color = (0, 0, 255)
                elif map_reid[track.track_id] == 6:
                    color = (127, 0, 255)
                elif map_reid[track.track_id] == 7:
                    color = (255, 182, 190)
                else:
                    color = (0, 0, 0)

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[2]), int(bbox[3] + 50)), color, 2)
                cv2.putText(frame, class_name + " ReID-" + str(map_reid[track.track_id]),
                            (int(bbox[0]), int(bbox[1] - 55)), 0, 0.7, color, 2)
                flag_ok = 1
            except Exception as e:
                # print("Exception", map_reid, track.track_id)
                # print("Exception", e)
                map_reid = reid(tracker, data, frame, model, le)

                # update tracks
                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    class_name = track.get_class()
                    cv2.putText(frame, class_name + " ReID-" + str(map_reid[track.track_id]),
                                (int(bbox[0]), int(bbox[1] - 55)), 0, 0.7, color, 2)
                print("after", map_reid, track.track_id)
                pass

        # --------Threading------------#
        if flag_ok == 1:
            thread_num = 32
            threads = [None] * thread_num
            original_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            f = original_frame
            n_t = len(tracker.tracks)
            s = 0
            e = n_t // thread_num
            for i in range(thread_num):
                if i == thread_num - 1:
                    threads[i] = threading.Thread(target=crop_and_recognize,
                                                  args=(tracker.tracks[:-1], original_frame, map_reid,))
                else:
                    threads[i] = threading.Thread(target=crop_and_recognize,
                                                  args=(tracker.tracks[s:e], original_frame, map_reid,))
                s = e
                e += e

            for t in threads:
                t.start()

            for i in range(len(tracker.tracks)):
                track = tracker.tracks[i]
                bbox = track.to_tlbr()
                try:
                    ID = map_reid[track.track_id]
                except Exception as e:
                    # print("map_reid, track.track_id", map_reid, track.track_id)
                    # print("Exception 2", e)
                    pass

                if not track.is_confirmed() or track.time_since_update > 1:
                    continue

                    # color = colors[ID % len(colors)]
                # color = [i * 255 for i in color]
                # cv2.rectangle(f, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)

                if swimmer_style.get(ID) is not None:
                    (sstyle, conf) = swimmer_style[ID]
                    cv2.putText(f, str(sstyle) + ": " + str(int(round(conf, 2) * 100)) + "%",
                                (int(bbox[0]), int(bbox[1] - 35)), 0, 0.7, (255, 255, 255), 2)
            frame = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
        else:
            print("NOT OKAY", "map_reid, track.track_id", map_reid, track.track_id, "len(tracker.tracks)",
                  len(tracker.tracks))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
            frame_index = frame_index + 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    # cv2.release()
    cv2.destroyAllWindows()

コード例 #12

ファイルを表示

ファイル: object_tracking.py プロジェクト: jmudy/Deep-SORT-YOLOv4-TF2

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # Calculate cosine Distance Metric 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # Flags for process
    tracking = True # Set False if you only want to do detection
    writeVideo_flag = True # Set False if you don't want to write frames locally
    asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem

    # Video File Path
    file_path = './Input/video1.avi'
    # Check if asyncVideo flag set to True
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_file = os.path.basename(file_path)[:-4]
        out = cv2.VideoWriter('./Output/' + output_file + "_output.mp4", fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read() # Capture frames
        if ret != True:
             break

        t1 = time.time()
        
        # bgr to rgb frame conversion
        image = Image.fromarray(frame[...,::-1])  
        # YOLOv4 Detection
        boxes, confidence, classes = yolo.detect_image(image) 

        if tracking:
            # Encodes the frame and boxes for DeepSORT
            features = encoder(frame, boxes)
            # DeepSORT Detection
            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                          zip(boxes, confidence, classes, features)]
        else:
            # Only YOLOv4 Detection
            detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                          zip(boxes, confidence, classes)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                # Draw white bbox for DeepSORT
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            # Check the class for colored bbox
            if len(classes) > 0:
                cls = det.cls
                center_bbox = (int(bbox[2]), int(bbox[2]))
                if str(cls) == 'person':
                    # Draw Blue bbox for YOLOv4 person detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2)
                elif str(cls) == 'backpack' or 'handbag' or 'suitcase':
                    # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2)

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))
            cv2.putText(frame, "GPU: NVIDIA GEFORCE GTX 960M", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)
            cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)

            # draw the timestamp on the frame
            timestamp = datetime.datetime.now()
            ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S")
            cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)

        cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

コード例 #13

ファイルを表示

class ObjectTracking(object):
    def __init__(self, name):
        self.max_iou_distance = rospy.get_param('max_iou_distance', 0.7)
        self.max_age = rospy.get_param('max_age', 30)
        self.n_init = rospy.get_param('n_init', 3)
        self.metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", 0.2, None)
        self.tracker = Tracker(self.metric, self.max_iou_distance,
                               self.max_age, self.n_init)
        rospy.Subscriber("image_object", ObjectArray, self.__callback1)
        self.bridge = CvBridge()
        self.image_sub = rospy.Subscriber("image_raw", Image,
                                          self.__image_callback)
        self.pub = rospy.Publisher('object_tracked',
                                   ObjectArray,
                                   queue_size=10)
        self.image_pub = rospy.Publisher("object_tracked_image",
                                         Image,
                                         queue_size=10)
        self.cv_image = None
        self.visualizer = visualization.Visualization((960, 540),
                                                      update_ms=100)
        self.mot_tracker = sort.Sort(iou_th=0.1)
        self.seedling_id_list = []
        self.seedling_lifetime = dict()

    def __callback(self, data):
        def create_detections(objects):
            detection_list = []
            for obj in objects:
                bbox = np.array(
                    [obj.x_offset, obj.y_offset, obj.width, obj.height])
                confidence = obj.score
                detection_list.append(Detection(bbox, confidence, []))
            return detection_list

        print("Received detection results")

        # Load image and generate detections.
        detections = create_detections(data.objects)

        # Update tracker.
        self.tracker.predict()
        new_ids = self.tracker.update(detections)
        print(new_ids)
        for i in range(data.size):
            data.objects[i].id = new_ids[i]

        self.pub.publish(data)
        self.visualizer.set_image(self.cv_image)
        self.visualizer.draw_objects(data.objects)

        try:
            self.image_pub.publish(
                self.bridge.cv2_to_imgmsg(self.cv_image, "bgr8"))
        except CvBridgeError as e:
            print(e)

    def __callback1(self, data):
        def create_detections(objects, width, height):
            dets = []
            for obj in objects:
                det = [
                    obj.x_offset / width, obj.y_offset / height,
                    (obj.x_offset + obj.width) / width,
                    (obj.y_offset + obj.height) / height, obj.score
                ]
                dets.append(det)

            dets = np.array(dets)
            return dets

        dets = create_detections(data.objects, self.cv_image.shape[0],
                                 self.cv_image.shape[0])
        # update trackers based on the current detection result
        new_ids, trackers = self.mot_tracker.update(dets)

        print("new_ids")
        print(new_ids)
        # update the tracker list

        print(trackers[:, 4])
        for d_index, d in enumerate(trackers[:, 4]):
            if d not in self.seedling_id_list:
                self.seedling_id_list.append(d)
                # max_cls_id = len(seedling_id_list)
            cur_d = self.seedling_id_list.index(d) + 1

            if cur_d not in list(self.seedling_lifetime.keys()):
                self.seedling_lifetime[cur_d] = 0
            self.seedling_lifetime[cur_d] = self.seedling_lifetime[cur_d] + 1

        for i in range(data.size):
            data.objects[i].id = new_ids[i]

        self.pub.publish(data)
        self.visualizer.set_image(self.cv_image)
        self.visualizer.draw_objects(data.objects)

        try:
            self.image_pub.publish(
                self.bridge.cv2_to_imgmsg(self.cv_image, "bgr8"))
        except CvBridgeError as e:
            print(e)
        print("============================")

    def __image_callback(self, image):
        try:
            self.cv_image = self.bridge.imgmsg_to_cv2(image, image.encoding)
        except CvBridgeError as e:
            print(e)

コード例 #14

ファイルを表示

def YOLO():

    global metaMain, netMain, altNames
    configPath = "./cfg/yolov3-spp.cfg"
    weightPath = "./yolov3-spp.weights"
    metaPath = "./cfg/coco.data"
    if not os.path.exists(configPath):
        raise ValueError("Invalid config path `" +
                         os.path.abspath(configPath) + "`")
    if not os.path.exists(weightPath):
        raise ValueError("Invalid weight path `" +
                         os.path.abspath(weightPath) + "`")
    if not os.path.exists(metaPath):
        raise ValueError("Invalid data file path `" +
                         os.path.abspath(metaPath) + "`")
    if netMain is None:
        netMain = darknet.load_net_custom(configPath.encode("ascii"),
                                          weightPath.encode("ascii"), 0,
                                          1)  # batch size = 1
    if metaMain is None:
        metaMain = darknet.load_meta(metaPath.encode("ascii"))
    if altNames is None:
        try:
            with open(metaPath) as metaFH:
                metaContents = metaFH.read()
                import re
                match = re.search("names *= *(.*)$", metaContents,
                                  re.IGNORECASE | re.MULTILINE)
                if match:
                    result = match.group(1)
                else:
                    result = None
                try:
                    if os.path.exists(result):
                        with open(result) as namesFH:
                            namesList = namesFH.read().strip().split("\n")
                            altNames = [x.strip() for x in namesList]
                except TypeError:
                    pass
        except Exception:
            pass
    #cap = cv2.VideoCapture(0)

    #create overlay for computing heatmap
    overlay_empty = np.zeros((608, 608, 1), np.uint8)

    cap = cv2.VideoCapture("video_test/01.avi")  #IMPORTANT
    cap.set(3, 1280)
    cap.set(4, 720)
    #print ("width", cap.get(3))
    #print ("height", cap.get(4))
    """
    out = cv2.VideoWriter(
        "video_output/modded_2-5f_124_pytorch_MGN_thres00965_dthres0375_x30y160_age7000_cbox20-10w-5h-160.mp4", cv2.VideoWriter_fourcc(*"MP4V"), 20,
        (darknet.network_width(netMain), darknet.network_height(netMain)))
    print("Starting the YOLO loop...")
    """
    out = cv2.VideoWriter(
        "txt_output_n50_0960/01.avi",
        cv2.VideoWriter_fourcc(*"MP4V"),
        20,  #IMPORTANT
        (darknet.network_width(netMain), darknet.network_height(netMain)))
    print("Starting the YOLO loop...")
    # Create an image we reuse for each detect
    darknet_image = darknet.make_image(darknet.network_width(netMain),
                                       darknet.network_height(netMain), 3)

    # ================= Definition of the parameters
    max_cosine_distance = 0.096  #IMPORTANT PARAMETER
    maxAge = 100  #IMPORTANT PARAMETER
    nn_budget = None
    nms_max_overlap = 1.0

    # ================= Models location  #IMPORTANT
    #model_filename = 'model_data/mars-small128.pb'  #Deep Cosine Metric model
    model_filename = 'model_data/model_MGN.pt'  #MGN model

    # ========================= Init MGN feature extractor
    extractor = Extractor(model_filename, use_cuda=True)  #IMPORTANT

    # ========================= Init Deep Cosine feature extractor
    #encoder = gdet.create_box_encoder(model_filename,batch_size=1)    #IMPORTANT

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, max_age=maxAge)
    fps = 0.0
    counts = 1
    ids = dict()
    b = set()

    #fi = open("txt_output_n50_0960/01.txt","w")
    while True:
        prev_time = time.time()
        ret, frame_read = cap.read()
        if ret != True: break
        if counts % 1 == 0:
            print("========FRAME========: ", counts)

            frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb,
                                       (darknet.network_width(netMain),
                                        darknet.network_height(netMain)),
                                       interpolation=cv2.INTER_LINEAR)
            height, width, channels = frame_resized.shape
            #print ("Shape of frame: ",frame_resized.shape)
            darknet.copy_image_from_bytes(darknet_image,
                                          frame_resized.tobytes())

            boxs = darknet.detect_image(netMain,
                                        metaMain,
                                        darknet_image,
                                        thresh=0.375)
            boxes = []

            for detection in boxs:
                if detection[0].decode() != "person": continue
                x, y, w, h = detection[2][0],\
                detection[2][1],\
                detection[2][2],\
                detection[2][3]
                #xmin = int(round(float(x) - (float(w) / 2)))
                #ymin = int(round(float(y) - (float(h) / 2)))
                xmin, ymin, xmax, ymax = convertBack(int(x), int(y), int(w),
                                                     int(h))

                #========================= generate HEATMAP values ====================
                x_heat = int(round((xmin + xmax) / 2))
                overlay_empty[int(ymax) - 10:int(ymax) + 10,
                              x_heat - 10:x_heat + 10] += 5
                #======================================================================

                cv2.rectangle(frame_resized, (xmin, ymin), (xmax, ymax),
                              (255, 0, 255), 1)

                if (xmin > 100 and ymin > 10) and (xmax < width - 5
                                                   and ymax < height - 120):
                    if (xmax - xmin) > 30 and (ymax - ymin) > 160:
                        boxes.append([xmin, ymin, w, h])

            pt1 = (100, 10)
            pt2 = (width - 5, height - 120)
            cv2.rectangle(frame_resized, pt1, pt2, (0, 255, 0), 1)

            # ==================== EXTRACT FEATURES ==================== #IMPORTANT
            #features = encoder(frame_resized,boxes)      #Deep Cosine Metric extractor
            features = get_features(boxes, frame_resized, extractor, width,
                                    height)  #MGN extractor
            # ============================================================

            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxes, features)
            ]

            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            #update tracker
            tracker.predict()
            tracker.update(detections)

            # ====================== Fix the ID jumping problem ======================
            if counts == 1:
                count = 1
                for track in tracker.tracks:
                    ids[track.track_id] = count
                    count += 1
            else:
                count = 0
                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    if track.track_id in ids: continue
                    if len(list(ids.values())) != 0:
                        count = max(list(ids.values()))
                    ids[track.track_id] = count + 1
            #print (ids)
            # =========================================================================

            cv2.putText(frame_resized, "FrameID: " + str(counts), (350, 30), 0,
                        5e-3 * 150, (255, 255, 0), 2)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                #print ("=======ID at the frame=======: ", track.track_id)
                bbox = track.to_tlbr()
                #heat map

                #print("x heat coordinate: ", x_heat)

                cv2.rectangle(frame_resized, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 0), 2)
                if track.track_id in ids:
                    cv2.putText(frame_resized, str(ids[track.track_id]),
                                (int(bbox[2]), int(bbox[3])), 0, 5e-3 * 250,
                                (255, 255, 0), 2)
                    if counts % 2 == 0:
                        fi.write(
                            str(counts) + " " + str(int(bbox[2])) + " " +
                            str(int(bbox[3])) + " " +
                            str(ids[track.track_id]) + " " + "\n")
                b.add(track.track_id)
                #if (np.allclose(overlay_empty[int(bbox[3])-5:int(bbox[3])+5,x_heat-5:x_heat+5,0],255)): continue

            #print (b)
            if len(list(ids.values())) != 0:
                text = "Num people: {}".format(max(list(ids.values())))
                cv2.putText(frame_resized, text, (1, 30), 0, 5e-3 * 200,
                            (255, 0, 0), 2)
            overlay_empty[np.where((overlay_empty[:, :, 0] > 200))] = 230
            """
            for det in detections:
                bbox = det.to_tlbr()
                
                
                cv2.rectangle(frame_resized,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            """

            # ============== BLENDING HEATMAP VALUES TO FRAME ==============
            im_color = cv2.applyColorMap(overlay_empty, cv2.COLORMAP_JET)
            im_color = cv2.blur(im_color, (10, 10))
            #im_color[:,:,0]=0

            image = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
            #image = cvDrawBoxes(detections, frame_resized)
            heat = cv2.addWeighted(image, 0.7, im_color, 0.3, 0)
            # ===============================================================

            #counts = counts + 1
            out.write(heat)
            print("fps: ", 1 / (time.time() - prev_time))
            print("\n")
            #fps  = ( fps + (1./(time.time()-prev_time)) ) / 2
            #print("fps= %f"%(fps))
            #cv2.imshow('Demo', image)
            #cv2.imshow('', heat)
            #cv2.imshow('', cv2.resize(image,(1024,600)))
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        counts = counts + 1
        fps = fps + (1 / (time.time() - prev_time))
    print("average fps: ", fps / counts)
    cap.release()
    out.release()
    cv2.destroyAllWindows()

コード例 #15

ファイルを表示

def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    inputIndex = 0
    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 20, 30)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if class_name == "Person" or class_name == "person":
                inputIndex += 1
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                #######
                im = img[int(int(bbox[1])):int(int(bbox[3])),
                         int(int(bbox[0])):int(int(bbox[2]))]
                #######
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                ##############
                #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Cropped"+str(inputIndex)+".png", im)
                color = ('b', 'g', 'r')
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            ##  for channel, col in enumerate(color):  # for histogram
            ##  ax1 = plt.subplot(1, 1, 1)
            #ax2 = plt.subplot(1, 2, 2)
            ##  histr = cv2.calcHist([im], [channel], None, [256], [0, 256])
            #plt.plot(histr, color=col)
            #plt.plot(histr)
            #plt.xlim([0, 256])
            #plt.title('Histogram for color scale picture')
            ##  plt.axis('off')
            ##  ax1.imshow(im)
            #ax2.plot(histr)

            ##    plt.savefig("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\APlot"+str(inputIndex)+".png");
            #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Final"+str(inputIndex)+".png", im)
            ###################

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #16

ファイルを表示

def main(args, hyp):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    counter = []
    # deep_sort
    model_filename = 'saved_model/market1501'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # select yolo
    YOLO, input_details, output_details,saved_model_loaded = select_yolo(args, hyp)

    video_capture = cv2.VideoCapture(args.video)

    #Define the codec and create VideoWriter object
    if args.write_video:
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('./output/output.avi', fourcc, 15, (w, h))
        list_file = open('detection_rslt.txt', 'w')
        frame_index = -1

    fps = 0.0
    convert_class_name = load_class_name(args.data_root, args.class_file)
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            break
        t1 = time.time()

        image = np.squeeze(frame)
        img = cv2.cvtColor(image.copy(),cv2.COLOR_BGR2RGB) / 255.0
        h, w, _ = img.shape
        if h != args.img_size or w != args.img_size:
            img = cv2.resize(img, (args.img_size, args.img_size))
        inf_time = time.time()
        
        boxes, confidence, class_names, valid_detections= model_detection(img, YOLO, args, input_details, output_details)
        print("inf time",time.time()-inf_time)
        tran_time = time.time()
        y_min, x_min, y_max, x_max = convert_to_origin_shape(boxes, None, None, h, w)
        w,h = x_max-x_min , y_max-y_min
        boxes = np.concatenate([x_min, y_min, w, h], -1)

        boxes = tf.squeeze(boxes, 0)      # 100, 4
        class_names = tf.squeeze(class_names, 0)
        print("tran time",time.time()-tran_time)        
        st_t = time.time()
        features = encoder(frame, boxes[:valid_detections[0]])
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        print("tracker time",time.time()-st_t)
        i = int(0)
        indexIDs = []

        draw_st = time.time()
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3)
            cv2.putText(frame,str(track.track_id),(int(bbox[0]), int(bbox[1] -50)),0, 5e-3 * 150, (color),2)
            if len(class_names) > 0:
               cls = np.int8(class_names[0])
               cv2.putText(frame, str(convert_class_name[cls]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2)

            i += 1
            center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2))

            pts[track.track_id].append(center)

            thickness = 5
            # center point
            cv2.circle(frame,  (center), 1, color, thickness)

            # draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness)
            if args.write_video:
                list_file.write(str(frame_index) + ',')
                list_file.write(str(track.track_id) + ',')
                b0 = str(bbox[0])  # .split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1]
                b1 = str(bbox[1])  # .split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1]
                b2 = str(bbox[2] - bbox[0])  # .split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1]
                b3 = str(bbox[3] - bbox[1])

                list_file.write(str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3) + '\n')

        cv2.putText(frame, "Current Box Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2)
        cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3)
        cv2.namedWindow("YOLO4_Deep_SORT", 0)
        cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO4_Deep_SORT', frame)

        if args.write_video:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps = (fps + (1./(time.time()-t1))) / 2
        print("draw_time",time.time()-draw_st)
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()

コード例 #17

ファイルを表示

def realse(stack):
    # 解决GPU内存占用问题
    import tensorflow as tf
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    config = tf.ConfigProto()
    #config.gpu_options.per_process_gpu_memory_fraction = 0.3
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)

    print('Begin to get frame......')
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    # deep_sort
    model_filename = 'mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    # mtcnn
    detectors = mtcnn('onet')
    mtcnnDetector = MtcnnDetector(detectors=detectors,
                                  min_face_size=24,
                                  threshold=[0.9, 0.6, 0.7])
    while True:
        if len(stack) >= 20:
            frame = stack.pop()
            #frame = cv2.resize(frame, (int(frame.shape[1]/3), int(frame.shape[0]/3)))
            frame = np.array(frame)

            # 输出tmp信息为[x,y,w,h]; x:检测框左上角点的x坐标；y:检测框左上角y坐标；w:框宽;h:框高；
            # 原MTCNN输出的信息为检测框一对角点的坐标信息（左上角点、右下角点），以及检测为人脸的概率值[x1,y2,x2,y2,置信度]；
            tmp, _ = mtcnnDetector.detect_follow(frame)

            features = encoder(frame, tmp)
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(tmp, features)
            ]
            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            # Call the tracker
            tracker.predict()
            tracker.update(detections)
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0, 0.5, (0, 255, 0),
                            2)

            for det in detections:
                bbox = det.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 0, 255), 1)

            cv2.imshow("Detected", frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cv2.destroyAllWindows()

コード例 #18

ファイルを表示

ファイル: demo.py プロジェクト: tutul032/People-Counter

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture("video.mp4")

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    #polys = np.array([[700,40] ,[400,950],[800,1080],[1880,1080],[1300, 0]], dtype=np.int32)

    count = 0
    while True:
        ret, f = video_capture.read()  # frame shape 640*480*3
        rows, cols = f.shape[:2]

        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 0, 1.2)
        #f = f * mask
        frame = cv2.warpAffine(f.astype(np.uint8), M, (cols, rows))
        frame = cv2.resize(frame, (700, 500))
        cv2.line(frame, (0, 250), (700, 250), (255, 255, 0), 3)

        #frame = frame[120:300+300, 60:300+500]

        if ret != True:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

            #find object's centroid
            CoordXCentroid = int(bbox[0] + bbox[2]) / int(2)
            CoordYCentroid = int(bbox[1] + bbox[3]) / int(2)
            ObjectCentroid = (int(CoordXCentroid), int(CoordYCentroid))
            cv2.circle(frame, ObjectCentroid, 1, (255, 255, 255), 5)

            distance = abs(250 - CoordYCentroid)
            #distance = int distance

            if (distance <= 5):
                count = count + 1

            cv2.putText(frame, "Entrance: {}".format(count), (400, 50), 0,
                        5e-3 * 200, (255, 255, 255), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #19

ファイルを表示

def get_detect(id_video):
    from collections import deque
    dq = deque(maxlen=1)

    t1 = threading.Thread(target=read_video, args=(dq,))
    t1.start() 

    # myout = save_video(video_reader, "./video.mp4", sz)
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    counter = []
    my_track_dict = {} #save the info of track_id
    track_smooth_dict = {} #smooth the imshow
    pts = [deque(maxlen=30) for _ in range(9999)]
    
    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    list_file = open('detection_rslt.txt', 'w')

    my_num = 0
    num = 0

    t1 = time.time()
    fps = 0
    fps1 = time.time()
    while True:
        if dq:
            img = dq.pop()
        else:
            key = cv2.waitKey(20)
            continue

        #TEST FPS
        fps2 = time.time()
        fps += 1
        if fps2 - fps1 > 1:
            print(fps)
            fps = 0
            fps1 = time.time()

        start_time = time.time()
        num += 1  

        #frame = cv2.imread(filename)
        #1、读取中文路径
        #img = cv2.imdecode(np.fromfile(filename,dtype=np.uint8), cv2.IMREAD_COLOR)
        img_h, img_w, img_ch = img.shape
        print(img.shape)
        #2、防止裁剪或推理时把画的框裁剪上
        show_image = img.copy()
        frame = img.copy()
        
        #the predict of person.
        boxs, confidence, class_names = [], [], []
        out = preson_detect(img)

        #transform the object detection data to input tracter
        for i in range(len(out)):
            #========my_setting==============
            if out[i, 2] > 0.7:
                # print(out[i])
                left = int(out[i, 3]*img_w)
                top = int(out[i, 4]*img_h)
                p_w = int(out[i, 5]*img_w-out[i, 3]*img_w)
                p_h = int(out[i, 6]*img_h-out[i, 4]*img_h)

                right = left + p_w
                bottom = top + p_h
                
                #detect the person in setting area.
                point1 = [int((left+right)/2), bottom] 
                my_index = inner_point(point1)
                if my_index:
                    boxs.append([left, top, p_w, p_h]) 
                    class_names.append("person")
                    confidence.append(out[i, 2])

        #start use the tracker        
        features = encoder(frame,boxs)
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        
        #setting detect time
        t2 = time.time()
        detect_time = t2-t1
        #========my_setting==============
        control = 1  #detect one time in 3 second
        if detect_time > control:
            t1 = time.time()
       
        for det, track in zip(detections, tracker.tracks):
            # if not track.is_confirmed() or track.time_since_update > 1:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            #print(track.track_id)

            #draw the boxs of object detection.
            pbox = det.to_tlbr()
            #cv2.rectangle(frame,(int(pbox[0]), int(pbox[1])), (int(pbox[2]), int(pbox[3])),(255,255,255), 2)

            my_key = str(int(track.track_id))
            #========my_setting==============
            #if my_key increase or time lt 3s, will be re_detection.
            if my_key not in my_track_dict.keys() or detect_time>control:
                print(my_key)
                print(my_track_dict.keys())
                #the code of processing the person box.
                label_dict = person_detect(img, pbox)
                if not label_dict:
                    continue
                my_track_dict[my_key] = label_dict

            frame = draw_person_attr(frame, my_track_dict[my_key], pbox)

            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
            
            #center_loc = [int((bbox[0]+bbox[2])/2), int((bbox[1]+bbox[3])/2)]
            if my_key not in track_smooth_dict.keys():
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3)
                track_smooth_dict[my_key] = bbox
            else:
                fbox = track_smooth_dict[my_key]
                a = int((bbox[0]+fbox[0])/2)
                b = int((bbox[1]+fbox[1])/2)
                c = int((bbox[2]+fbox[2])/2)
                d = int((bbox[3]+fbox[3])/2)
                cv2.rectangle(frame, (a, b), (c, d),(color), 3)
                track_smooth_dict[my_key] = bbox


            #draw the boxs of track.
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3)
            cv2.putText(frame,str(track.track_id),(int(bbox[0]), int(bbox[1] -50)),0, 5e-3 * 150, (color),2) 

            if len(class_names) > 0:
               class_name = class_names[0]
               cv2.putText(frame, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2)
            i += 1

        count = len(set(counter))
        #draw the gurdline.
        draw_muti(frame)

        cv2.putText(frame, "Total Pedestrian Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 200, (0,255,0),2)
        cv2.putText(frame, "Current Pedestrian Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2)
        #cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3)
        # cv2.namedWindow("YOLO4_Deep_SORT", 0)
        #cv2.resizeWindow('YOLO4_Deep_SORT', 640, 480)

        # cv2.imshow('YOLO4_Deep_SORT', frame)
        # myout.write(frame)
        
        frame = cv2.resize(frame, (640, 360))
        ret2, jpeg = cv2.imencode('.jpg', frame)
        yield (b'--frame\r\n'
               b'application/octet-stream: image/jpeg\r\n\r\n' + jpeg.tobytes() + b'\r\n\r\n')
        
        end_time = time.time()
        my_one_time = (end_time - start_time) * 1000
        print("====={}=====".format(num), my_one_time)

コード例 #20

ファイルを表示

ファイル: eval_mot.py プロジェクト: wss321/mot

def eval(data_dir, output_dir, e_type='train', show=False, save_video=False):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    mot = Mot(data_dir, e_type)
    for index, det_dir in enumerate(mot.map_dir):
        seqs = os.listdir(det_dir + '/img1')
        dets = mot.load_detections(det_dir + '/det/det.txt')
        seqinfo = det_dir + '/seqinfo.ini'
        with open(seqinfo, 'r') as info:
            lines = info.readlines()
            lines = [line.split('=') for line in lines]
            lines = {line[0]: line[1] for line in lines if len(line) > 1}
        w, h, ext, frame_rate = int(lines['imWidth']), int(
            lines['imHeight']), lines['imExt'], int(lines['frameRate'])
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(mot.sqm[index] + '.avi', fourcc, 7, (w, h))
        tracker = Tracker(metric,
                          img_shape=(w, h),
                          max_eu_dis=0.1 * np.sqrt((w**2 + h**2)))
        with open(output_dir + mot.sqm[index] + '.txt', 'w') as f:
            for seq in seqs:
                print('track {}'.format(seq))
                frame_id = int(seq.strip(ext))
                img = np.asarray(Image.open(det_dir + '/img1/' + seq))
                bboxes, scores = mot.detection_from_frame_id(dets, frame_id)
                features = encoder(img, bboxes)
                detections = [
                    Detection(bbox_and_feature[0], scores[idx],
                              bbox_and_feature[1])
                    for idx, bbox_and_feature in enumerate(
                        zip(bboxes, features))
                ]
                tracker.predict()
                tracks_dets = tracker.update(detections, np.asarray(img))
                frame = cv2.imread(det_dir + '/img1/' + seq)
                for td in tracks_dets:
                    t = td[0]
                    d = detections[td[1]].tlwh
                    f.write('{},{},{},{},{},{},{},{},{},{}\n'.format(
                        frame_id, t, d[0], d[1], d[2], d[3], -1, -1, -1, -1))
                    if show:
                        bbox = detections[td[1]].to_tlbr()
                        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                      (int(bbox[2]), int(bbox[3])),
                                      (255, 255, 255), 2)
                        cv2.putText(frame, "{}".format(t),
                                    (int(bbox[0]), int(bbox[1])), 0,
                                    5e-3 * 200, (0, 255, 0), 2)
                if show:
                    cv2.imshow('', frame)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break
                if save_video:
                    out.write(frame)
        if save_video:
            out.release()

コード例 #21

ファイルを表示

class ObjectTracking():
    def __init__(self, name, metric, yolo_interface, model_filename, \
                 nms_max_overlap, image_sub, image_pub, detected_pub):
        # Set the shutdown function (stop the robot)
        # rospy.on_shutdown(self.shutdown)
        self.name = name
        self._cv_bridge = CvBridge()
        self._max_overlap = nms_max_overlap
        # deep_sort
        self._encoder = gdet.create_box_encoder(model_filename, batch_size=1)
        self._tracker = Tracker(metric)
        self._yolo_interface = yolo_interface
        self._fps = 0.0

        self._image_sub = rospy.Subscriber(image_sub,
                                           ROSImage,
                                           self.image_callback,
                                           queue_size=1)
        # self.thermal_sub = rospy.Subscriber(THERMAL_TOPIC, ROSImage, self.thermal_callback, queue_size=1)

        self._image_pub = rospy.Publisher(image_pub, ROSImage, queue_size=1)
        self._detected_pub = rospy.Publisher(detected_pub,
                                             DetectedFull,
                                             queue_size=1)

        # self._cv_image = None
        self._image_msg = None
        self._image_updated = False

        self.thread = threading.Thread(target=self.process, name=name)
        # self.lock = lock

        rospy.loginfo('##################### ' + name +
                      ' Initialization Finished! #####################')

    def image_callback(self, image_msg):
        # self._cv_image = self._cv_bridge.imgmsg_to_cv2(image_msg, "bgr8")
        self._image_msg = image_msg
        self._image_updated = True
        # rospy.loginfo('call back in '+self.name)

    def process(self):
        # while not rospy.is_shutdown():
        while not rospy.is_shutdown():
            now_time = time.time()

            if not self._image_updated:
                rospy.loginfo(
                    'No image in ' + self.name +
                    ' yet! Please check the existence of Subscrided topic.')
                rospy.sleep(2.13)
                continue

            self._image_updated = False
            image_msg = self._image_msg

            cv_image = self._cv_bridge.imgmsg_to_cv2(image_msg, "bgr8")
            image = Image.fromarray(cv_image)

            # self.lock.acquire()
            boxs = self._yolo_interface.detect_image(image)
            # self.lock.release()

            # print("box_num",len(boxs))
            features = self._encoder(cv_image, boxs)

            # score to 1.0 here
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxs, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, self._max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the _tracker
            self._tracker.predict()
            self._tracker.update(detections)

            detected_array = DetectedArray()
            detected_full = DetectedFull()
            detected_full.image = image_msg

            detected_array.size = 0
            for track in self._tracker.tracks:
                if track.is_confirmed() and track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                detected = Detected()
                detected.object_class = 'Person'
                detected.num = np.uint32(track.track_id)
                detected.p = 1.0

                def range_check(x, min, max):
                    if x < min: x = min
                    if x > max: x = max
                    return x

                detected.x = np.uint16(
                    range_check(int(bbox[0]), 0, cv_image.shape[1]))
                detected.y = np.uint16(
                    range_check(int(bbox[1]), 0, cv_image.shape[0]))
                detected.width = np.uint16(
                    range_check(
                        int(bbox[2]) - int(bbox[0]), 0, cv_image.shape[1]))
                detected.height = np.uint16(
                    range_check(
                        int(bbox[3]) - int(bbox[1]), 0, cv_image.shape[0]))

                cv2.rectangle(cv_image, (int(detected.x), int(detected.y)),
                              (int(detected.x + detected.width),
                               int(detected.y + detected.height)), (255, 0, 0),
                              2)
                cv2.putText(cv_image, 'Person:' + str(track.track_id),
                            (int(detected.x), int(detected.y)), 0, 5e-3 * 100,
                            (0, 255, 0), 2)

                detected_array.size = detected_array.size + 1
                detected_array.data.append(detected)

            # for det in detections:
            #     bbox = det.to_tlbr()
            #     cv2.rectangle(cv_image,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,0), 2)

            # cv2.imshow('', cv_image)
            detected_full.header.stamp = image_msg.header.stamp
            detected_full.detections = detected_array
            if self._detected_pub.get_num_connections() > 0:
                self._detected_pub.publish(detected_full)

            ros_image = self._cv_bridge.cv2_to_imgmsg(cv_image)
            if self._image_pub.get_num_connections() > 0:
                self._image_pub.publish(ros_image)

            self._fps = (self._fps + (1. / (time.time() - now_time))) / 2
            rospy.loginfo(self.name + " processing fps = %f" % (self._fps))

        rospy.loginfo("Existing " + self.name + " object tracking...")

コード例 #22

ファイルを表示

ファイル: camera_yolo.py プロジェクト: WarHammer0/Multi-Camera-Kafka-Yolo4

    def yolo_frames(unique_name):
        device = unique_name[1]

        # Definition of the parameters
        max_cosine_distance = 0.7
        nn_budget = None
        nms_max_overlap = 3

        # Deep SORT
        model_filename = 'model_data/mars-small128.pb'
        encoder = gdet.create_box_encoder(model_filename, batch_size=1)

        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)
        yolo = YOLO()

        show_detections = True  # show object box blue when detect
        writeVideo_flag = False  # record video ouput

        defaultSkipFrames = 2  # skipped frames between detections

        # set up collection of door
        H1 = 210
        W1 = 370
        H2 = 235
        W2 = 470
        H = None
        W = None

        R = 80  # min R is 56

        def solve_quadratic_equation(a, b, c):
            """ax2 + bx + c = 0"""
            delta = b**2 - 4 * a * c
            if delta < 0:
                print("Phương trình vô nghiệm!")
            elif delta == 0:
                return -b / (2 * a)
            else:
                print("Phương trình có 2 nghiệm phân biệt!")
                if float((-b - sqrt(delta)) / (2 * a)) > float(
                    (-b + sqrt(delta)) / (2 * a)):
                    return float((-b - sqrt(delta)) / (2 * a))
                else:
                    return float((-b + sqrt(delta)) / (2 * a))

        def setup_door(H1, W1, H2, W2, R):
            # bước 1 tìm trung điểm của W1, H1 W2, H2
            I1 = (W1 + W2) / 2
            I2 = (H1 + H2) / 2

            # tìm vecto AB
            u1 = W2 - W1
            u2 = H2 - H1

            # AB chính là vecto pháp tuyến của d
            # ta có phương trình trung tuyến của AB
            # y = -(u1 / u2)* x - c/u2
            c = -u1 * I1 - u2 * I2  # tìm c

            # bước 2 tìm tâm O của đường tròn
            al = c / u2 + I2
            # tính D: khoảng cách I và O
            fi = acos(sqrt((I1 - W1)**2 + (I2 - H1)**2) / R)
            D = sqrt((I1 - W1)**2 + (I2 - H1)**2) * tan(fi)

            O1 = solve_quadratic_equation((1 + u1**2 / u2**2),
                                          2 * (-I1 + u1 / u2 * al),
                                          al**2 - D**2 + I1**2)
            O2 = -u1 / u2 * O1 - c / u2
            # phương trình 2 nghiệm chỉ chọn nghiệm phía trên

            # Bước 3 tìm các điểm trên đường tròn
            door_dict = dict()
            for w in range(W1, W2):
                h = O2 + sqrt(R**2 - (w - O1)**2)
                door_dict[w] = round(h)
            return door_dict

        door_dict = setup_door(H1, W1, H2, W2, R)

        totalFrames = 0
        totalIn = 0

        # create a empty list of centroid to count traffic
        pts = [deque(maxlen=30) for _ in range(9999)]

        fps_imutils = fps_callback().start()

        get_feed_from = ('camera', device)

        while True:
            cam_id, frame = BaseCamera.get_frame(get_feed_from)

            # Resize frame
            # frame = cv2.resize(frame, (736, 480))
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

            # if the frame dimensions are empty, set them
            if W is None or H is None:
                (H, W) = frame.shape[:2]

            # Draw a door boundary
            for w in range(W1, W2):
                cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1)
            cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1)
            cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1)
            cv2.circle(frame, (204, 201), 4, (0, 0, 255), -1)

            if True:  # totalFrames % defaultSkipFrames == 0:
                # t2 = time.time()
                boxes, confidence, classes = yolo.detect_image(
                    image)  # average time: 1.2s
                # print(time.time() - t2)

                features = encoder(frame, boxes)
                detections = [
                    Detection(bbox, confidence, cls, feature)
                    for bbox, confidence, cls, feature in zip(
                        boxes, confidence, classes, features)
                ]

                # Run non-maxima suppression.
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                classes = np.array([d.cls for d in detections])
                indices = preprocessing.non_max_suppression(
                    boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]

                # Call the tracker
                tracker.predict()
                tracker.update(detections)

            # else:
            #     # Call the tracker
            #     tracker.predict()
            #     tracker.update(detections)

            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    det_cls = det.cls
                    score = "%.2f" % (det.confidence * 100) + "%"
                    cv2.putText(frame,
                                str(det_cls) + " " + score,
                                (int(bbox[0]), int(bbox[3]) - 10), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 40:
                    continue
                bbox = track.to_tlbr()

                # if not_count_staff(frame, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])):
                #     # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 1)
                #     cv2.putText(frame, "STAFF", (int(bbox[0]), int(bbox[1]) - 10), 0,
                #                 1e-3 * frame.shape[0], (0, 0, 255), 1)
                #     continue

                # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

                x = [c[0] for c in pts[track.track_id]]
                y = [c[1] for c in pts[track.track_id]]
                under_door = True

                centroid_x = int(((bbox[0]) + (bbox[2])) / 2)
                centroid_y = int(((bbox[1]) + (bbox[3])) / 2)

                # checker: Count person through store
                if not track.Counted and centroid_x in range(W1, W2):
                    # if all centroid of user have detect in last 30 frame in door range(W1, W2)
                    # if user is found in store so under_door is fail --> do not check through door
                    if all(u[0] in range(W1, W2) for u in pts[track.track_id]):
                        if all(u[1] < door_dict[u[0]]
                               for u in pts[track.track_id]):
                            under_door = False
                    '''
                    check condition
                    1. person must go up: entroid_y < np.mean (y)
                    2. person must pass door circle: door_dict[centroid_x] > centroid_y
                    3. person must move around Horizontal at least 20 px: np.max (x) - np.min (x) > 20
                    4. person must have at least 1 centroid under the door
                    '''
                    if centroid_y < np.mean(y) and door_dict[centroid_x] > centroid_y and np.max(x) - np.min(x) > 20 \
                            and under_door:
                        totalIn += 1
                        track.Counted = True
                        print(track.track_id, track.Counted)

                cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1)
                pts[track.track_id].append((centroid_x, centroid_y))

            info = [("Time", "{:.4f}".format(fps_imutils.update_time())),
                    ("In", totalIn)]

            # loop over the info tuples and draw them on our frame
            for (i, (k, v)) in enumerate(info):
                text = "{}: {}".format(k, v)
                cv2.putText(frame, text, (W - 150, ((i * 20) + 20)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

            yield cam_id, frame

コード例 #23

ファイルを表示

ファイル: demo_caseys.py プロジェクト: Nuzhny007/DeepSort-YOLOv3

def main(yolo):

    # Determining the FPS of a video having variable frame rate
    # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos
    filename = "clip1.mp4"
    # Determining the total duration of the video
    clip = VideoFileClip(filename)

    cap2 = cv2.VideoCapture(filename)
    co = 0
    ret2 = True
    while ret2:
        ret2, frame2 = cap2.read()
        # Determining the total number of frames
        co += 1
    cap2.release()

    # Computing the average FPS of the video
    Input_FPS = co / clip.duration

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    frame_count = 0
    
    # Implementing Deep Sort algorithm
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    # Cosine distance is used as the metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    
    video_capture = cv2.VideoCapture(filename)

    # Define the codec and create a VideoWriter object to save the output video
    out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4))))

    # To calculate the frames processed by the deep sort algorithm per second
    fps = 0.0

    # Initializing empty variables for counting and tracking purpose
    queue_track_dict = {}         # Count time in queue
    alley_track_dict = {}         # Count time in alley
    store_track_dict = {}         # Count total time in store
    latest_frame = {}             # Track the last frame in which a person was identified
    reidentified = {}             # Yes or No : whether the person has been re-identified at a later point in time
    plot_head_count_store = []    # y-axis for Footfall Analysis
    plot_head_count_queue = []    # y-axis for Footfall Analysis
    plot_time = []                # x-axis for Footfall Analysis

    # Loop to process each frame and track people
    while True:
        ret, frame = video_capture.read()
        if ret != True:
            break

        maxIntensity = 255.0
        phi = 1
        theta = 1
        newImage1 = (maxIntensity/phi)*(frame/(maxIntensity/theta))**1.3
        frame = array(newImage1,dtype=uint8)
        cv2.imwrite('testing.jpg', frame)

        if frame_count == 5000:
            break

        head_count_store = 0
        head_count_queue = 0
        t1 = time.time()

        image = Image.fromarray(frame[...,::-1])   # BGR to RGB conversion
        boxs = yolo.detect_image(image)
        features = encoder(frame,boxs)
        
        # Getting the detections having score of 0.0 to 1.0
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression on the bounding boxes
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker to associate tracking boxes to detection boxes
        tracker.predict()
        tracker.update(detections)

        # Defining the co-ordinates of the area of interest
        pts2 = np.array([[380, 250], [380, 360], [170, 480], [0, 480], [0, 380]], np.int32)
        pts2 = pts2.reshape((-1,1,2))     # Queue Area
        pts = np.array([[0, 380], [0, 0], [640, 0], [640, 480], [170, 480], [380, 360], [380, 250]], np.int32)
        pts = pts.reshape((-1,1,2))   # Alley Region
        cv2.polylines(frame, [pts2], True, (0,255,255), thickness=2)
        cv2.polylines(frame, [pts], True, (255,0,255), thickness=1)
        
        # Drawing tracker boxes and frame count for people inside the areas of interest
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test = center_point_inside_polygon(bbox, pts2)
            alley_point_test = center_point_inside_polygon(bbox, pts)

            # Checking if a person has been reidentified in a later frame
            if queue_point_test == 'inside' or alley_point_test == 'inside':
                if track.track_id in latest_frame.keys():
                    if latest_frame[track.track_id] != frame_count - 1:
                        reidentified[track.track_id] = 1

            # Initializing variables incase a new person has been seen by the model
            if queue_point_test == 'inside' or alley_point_test == 'inside':
                head_count_store += 1
                if track.track_id not in store_track_dict.keys():
                    store_track_dict[track.track_id] = 0
                    queue_track_dict[track.track_id] = 0
                    alley_track_dict[track.track_id] = 0
                    reidentified[track.track_id] = 0

            # Processing for people inside the Queue Area
            if queue_point_test == 'inside':
                head_count_queue += 1
                queue_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                wait_time = round((queue_track_dict[track.track_id] / Input_FPS), 2)
                cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4)
                cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2)

            # Processing for people inside the Alley Region
            if alley_point_test == 'inside':
                alley_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4)
                cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2)

            # Getting the total Store time for a person
            if track.track_id in store_track_dict.keys():
                store_track_dict[track.track_id] = queue_track_dict[track.track_id] + alley_track_dict[track.track_id]

        # Drawing bounding box detections for people inside the store
        for det in detections:
            bbox = det.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test = center_point_inside_polygon(bbox, pts)
            alley_point_test = center_point_inside_polygon(bbox, pts2)

            # if queue_point_test == 'inside' or alley_point_test == 'inside':
                # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 	2)

        # Video Overlay - Head Count Data at that instant
        cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Calculating the average wait time in queue
        total_people = len([v for v in queue_track_dict.values() if v > 0])
        total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0)
        avg_queue_frames = 0
        if total_people != 0:
            avg_queue_frames = total_queue_frames / total_people
        avg_queue_time = round((avg_queue_frames / Input_FPS), 2)

        # Video Overlay - Average Wait Time in Queue
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Calculating the average wait time in the store
        total_people = len(store_track_dict)
        total_store_frames = sum(store_track_dict.values())
        avg_store_frames = 0
        if total_people != 0:
            avg_store_frames = total_store_frames / total_people
        avg_store_time = round((avg_store_frames / Input_FPS), 2)

        # Video Overlay - Average Store time
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Write the frame onto the VideoWriter object
        out.write(frame)

        # Calculating the frames processed per second by the model  
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        frame_count += 1

        # Printing processing status to track completion
        op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(round(fps, 2))
        print("\r" + op , end = "")

        # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now)
        if frame_count % 50 == 0:
            plot_time.append(round((frame_count / Input_FPS), 2))
            plot_head_count_store.append(head_count_store)
            plot_head_count_queue.append(head_count_queue)
        
        # Press Q to stop the video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Data Processed as per the video provided
    print("\n-----------------------------------------------------------------------")
    print("QUEUE WAIT TIME ( Unique Person ID -> Time spent )\n")
    for k, v in queue_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    print("\n-----------------------------------------------------------------------")
    print("ALLEY TIME ( Unique Person ID -> Time spent )\n")
    for k, v in alley_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    print("\n-----------------------------------------------------------------------")
    print("STORE TIME ( Unique Person ID -> Time spent  )\n")
    for k, v in store_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    # Defining data to be written into the csv file - Detailed Report
    csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time', 'Re-Identified']
    csv_data = []
    csv_row = {}
    detailed_csv_file = 'Detailed_Store_Report.csv'
    for k, v in store_track_dict.items():
         csv_row = {}
         if reidentified[k] == 1:
             reid = 'Yes'
         else:
             reid = 'No'
         csv_row = {csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2), csv_columns[3]: reid}
         csv_data.append(csv_row)

    # Writing the data into the csv file - Detailed Report
    with open(detailed_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in csv_data:
            writer.writerow(data)

    # Defining data to be written into the csv file - Brief Report
    csv_columns_brief = ['Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time']
    brief_csv_file = 'Brief_Store_Report.csv'
    csv_data_brief = {csv_columns_brief[0]: len(store_track_dict), csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time}

    # Writing the data into the csv file - Brief Report
    with open(brief_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief)
        writer.writeheader()
        writer.writerow(csv_data_brief)

    # Plotting a time-series line graph for store and queue head count data and saving it as a .png file
    plt.plot(plot_time, plot_head_count_queue)
    plt.plot(plot_time, plot_head_count_store)
    plt.legend(['Queue Head Count', 'Store Head Count'], loc='upper left')
    plt.xlabel('Time Stamp (in seconds)')
    plt.ylabel('Head Count')
    plt.xlim(0, round(frame_count / Input_FPS) + 1)
    plt.ylim(0, max(plot_head_count_store) + 2)
    plt.title('Footfall Analysis')
    plt.savefig('Footfall_Analysis.png', bbox_inches='tight')

    # Printing plot data
    for i in range(len(plot_time)):
        print(plot_time[i], plot_head_count_queue[i], plot_head_count_store[i]) 

    # Releasing objects created
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()

コード例 #24

ファイルを表示

ファイル: main.py プロジェクト: iGiraffeBlack/yolo

def main(queue,camID,initial_id,r_id,cam_id,unique_id):
    '''
    Detect, Track and save infomation of persons

    Objectives: 

    1. Use YOLO to detect person and store coordinates 
    2. Use DeepSORT to track detected persons throughout video frames
    3. Save detected persons for re-identification
    4. If re-identified, replace camID with global camID across camera views
    '''
    # Init YOLO model and load to memory
    yolo = YOLO()
    start = time.time()

    counter = []

    #deep_sort
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    w = int(650)
    h = int(576)
    writeVideo_flag = True # Set to False to not save videos and detections
    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('./output/'+str(camID)+'_output.avi', fourcc, 10, (w, h))
        list_file = open('logs/detection_camera'+str(camID)+'.txt', 'w')
        frame_index = -1
    
    fps = 0.0

    frame_counter = 0

    #Diretory Creation
    if not os.path.isdir('./images/frames/'+str(camID)):
        os.mkdir('./images/frames/'+str(camID))
    if not os.path.isdir('./images/detections/'+str(camID)):
        os.mkdir('./images/detections/'+str(camID))

    # Empty folders
    for file in (sorted(os.listdir('./images/detections/'+str(camID)))):
        os.remove('./images/detections/'+str(camID)+'/'+file)
    for file in (sorted(os.listdir('./images/frames/'+str(camID)))):
        os.remove('./images/frames/'+str(camID)+'/'+file)
    

    while not queue.empty():
        # Retrieve a frame from the queue
        frame = queue.get()
        cv2.imwrite('./images/frames/'+str(camID)+'/'+str(frame_counter)+'.jpg',frame)
        frame_counter+=1
        t1 = time.time()
        # frame_copy --> to be cropped according to detected person and saved
        # frame_save --> Frame to be saved with video only showing unique camID
        frame_copy = frame.copy()
        frame_save = frame.copy()

        image = Image.fromarray(frame[...,::-1]) #bgr to rgb
        # Perform YOLO detection (Objective 1)
        boxs,class_names = yolo.detect_image(image)
        backend.clear_session()
        features = encoder(frame,boxs)
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker and update with current detections
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            #boxes.append([track[0], track[1], track[2], track[3]])
            # Store tracking_id as seperate variable for replacement
            tracking_id = track.track_id
            indexIDs.append(int(tracking_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            if int(bbox[0])<0 or int(bbox[1])<0 or int(bbox[2])<0 or int(bbox[3])<0:
                continue

            # Change camID's of re-identifed targets
            # Current using Camera camID 1 as the base of reference to compare other cameras
            if not camID == 1:
                for a in range(len(initial_id)):
                    if int(track.track_id) == initial_id[a]:
                        tracking_id = int(r_id[a]) # r_id is for only CAM 1
                        #Prevent donated camID from CAM 1 conflict with CAM 2 issued camID (which is coincidentally 1 as well)
                    elif int(track.track_id) == r_id[a]: #Prevent identital camID on 1 source
                        tracking_id = int(initial_id[a])
                    else:
                        tracking_id = track.track_id 
            else:
                tracking_id = track.track_id #Deepsort id
            color = [int(c) for c in COLORS[tracking_id]]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint

            # Select which camID to be displayed (Local or Global if re-identified)
            display_id = tracking_id
            for b in range(len(unique_id)):
                if tracking_id == r_id[b]:
                    display_id = unique_id[b]
                    
            cv2.putText(frame,str(display_id),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2)

            if len(class_names) > 0:
               class_name = class_names[0]
               #cv2.putText(frame, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2)
               
            # Save bounding box data for re-identification (Objective 3)
               frame1 = frame_copy[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]#create instance of cropped frame using current frame, crop according to bounding box coordinates
               query_path = image_path+'/query'
               gallery_path = image_path+'/gallery'
            
               #Perform resizing to ensure equal size of features extracted from images
               frame2 = cv2.resize(frame1,(46,133),interpolation = cv2.INTER_AREA) #resize cropped image
               cv2.imwrite('./images/detections/'+str(camID)+'/frame'+str(frame_counter)+'_'+str(tracking_id)+'.jpg',frame2)


               if not camID == 1:
                   dst_path = gallery_path
                   #if file does not exist --> save
                   file_path = dst_path+'/'+str(tracking_id)+'_'+str(camID)+'.png' 
                   if frame_counter % 10 == 0 or not os.path.isfile(file_path):
                       cv2.imwrite(file_path,frame2)#save cropped frame


               if camID == 1:    
                   dst_path = query_path 
                    #if file does not exist --> save
                   file_path = dst_path+'/'+str(tracking_id)+'.png' 
                   if frame_counter % 10 == 0 or not os.path.isfile(file_path):
                        cv2.imwrite(file_path,frame2)#save cropped frame

            # Draw bounding boxes and Unique IDs for video to be saved
            if tracking_id in initial_id or tracking_id in r_id:
                
                if tracking_id in initial_id and not camID == 1:
                    index = initial_id.index(tracking_id)
                    color = [int(c) for c in COLORS[int(unique_id[index].split('P')[1])]] #Determine color of bounding box
                    # Draw box and label with unique ID
                    cv2.rectangle(frame_save, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint
                    cv2.putText(frame_save,str(unique_id[index]),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2)

                elif tracking_id in r_id and camID == 1:
                    index = r_id.index(tracking_id)
                    color = [int(c) for c in COLORS[int(unique_id[index].split('P')[1])]]
                    cv2.rectangle(frame_save, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint
                    cv2.putText(frame_save,str(unique_id[index]),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2)
            
            i += 1
            #bbox_center_point(x,y)
            center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2))
            #track_id[center]
            pts[track.track_id].append(center)
            #center point
            #cv2.circle(frame,  (center), 1, color, 5)
            '''
	    #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None:
                   continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness)
                #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2)
            '''
        count = len(set(counter))
        # Visualize result

        cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3)
        cv2.namedWindow('Camera '+str(camID), 0)
        cv2.resizeWindow('Camera '+str(camID), w ,h)
        cv2.imshow('Camera '+str(camID), frame)

        if writeVideo_flag:
            #save a frame
            frame_save = cv2.resize(frame_save,(w,h)) # Resize frame to fit video
            out.write(frame_save)
            frame_index = frame_index + 1
            # Write detections onto file
            list_file.write('./images/frames/'+str(camID)+'/'+str(frame_counter)+'.jpg'+' | ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ' + str(class_names[i][0])+', ')
            list_file.write('\n')

        fps  = ( fps + (1./(time.time()-t1)) ) / 2 

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
       print(source_names[camID-1]+": "+ str(count) + " " + str(class_name) +' Found')
       
    else:
       print("[No Found]")

    if writeVideo_flag:
        out.release()
        list_file.close()
    
    print('Time taken: '+str(round(end-start))+' seconds')

    cv2.destroyAllWindows()

コード例 #25

ファイルを表示

ファイル: Tracking.py プロジェクト: FrancoisMasson1990/Hackathon-ConcordIA

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #26

ファイルを表示

ファイル: object_tracker.py プロジェクト: SvenElyes/SvenFPQualityMatch

def Object_tracking(Yolo,
                    video_path,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    counter = 1
    f = open("trackinfo.txt", 'w')
    while True:
        _, frame = vid.read()

        print(counter)
        counter = counter + 1

        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_frame),
                                      [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        #t1 = time.time()
        #pred_bbox = Yolo.predict(image_data)
        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function
        json.dump(tracked_bboxes, f)
        f.write("\n")
        # draw detection on frame
        image = draw_bbox(original_frame,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)
        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)
        #print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
    f.close()

コード例 #27

ファイルを表示

class Fence():
    def __init__(self):
        self.yolo = YOLO()
        self.max_cosine_distance = 0.3
        self.nn_budget = None
        self.nms_max_overlap = 1.0
        self.encoder = gdet.create_box_encoder('model_data/mars-small128.pb',
                                               batch_size=1)
        self.tracker = Tracker(
            nn_matching.NearestNeighborDistanceMetric("cosine",
                                                      self.max_cosine_distance,
                                                      self.nn_budget))
        self.dqs = [deque() for _ in range(9999)]
        self.poly = []
        self.id_cnt_dict = {}
        self.queue_dict = {}

    def initArea(self, image):
        '''
        初始化敏感区域 输入图像点选保存坐标
        :param image: hape w*h*3
        :return:
        '''
        def on_EVENT_LBUTTONDOWN(event, x, y, flags, param):
            if event == cv2.EVENT_LBUTTONDOWN:
                xy = "%d,%d" % (x, y)
                print(xy)
                cv2.circle(image, (x, y), 1, (255, 0, 0), thickness=-1)
                cv2.putText(image,
                            xy, (x, y),
                            cv2.FONT_HERSHEY_PLAIN,
                            1.0, (0, 0, 0),
                            thickness=1)
                self.poly.append([float(x), float(y)])
                cv2.imshow("image", image)

        cv2.namedWindow("image")
        cv2.setMouseCallback("image", on_EVENT_LBUTTONDOWN)
        cv2.imshow("image", image)

        while (len(self.poly) < 4):
            try:
                cv2.waitKey(100)
            except Exception:
                cv2.destroyWindow("image")
                break
        cv2.destroyWindow("image")
        return self.poly

    def initPoly(self, poly):
        '''
        初始化敏感区域
        :param poly: 输入区域坐标顶点[[float(x), float(y)]...]
        :return:
        '''
        self.poly = poly

    def detect(self, image):
        '''
        只用检测模型获取bbox，输出不包含目标id
        :param image: shape w*h*3
        :return: bboxes[[min x, min y, max x, max y]...]
        '''
        img = Image.fromarray(image[..., ::-1])  # bgr to rgb
        boxs, ret_clss = self.yolo.detect_image(img)
        features = self.encoder(
            image, boxs)  # The image of each frame is coded to match the box
        # score to 1.0 here).  Each detection box and feature is encapsulated as an object
        detections = [
            Detection(bbox, 1.0, feature, ret_cls)
            for bbox, feature, ret_cls in zip(boxs, features, ret_clss)
        ]
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        bboxes = []

        for det in detections:
            bbox = det.to_tlbr()
            bboxes.append(bbox)

        return bboxes, ret_clss, detections

    def trackByDetect(self, image):
        '''
        检测跟踪获取bbox
        :param image: shape w*h*3
        :return: b   [[min x, min y, max x, max y]...]
                 t   [id1,id2,id3...]
        '''

        bbox, ret_clss, detections = self.detect(image)

        # Call the tracker
        self.tracker.predict()
        self.tracker.update(detections)

        b = []
        c = []
        t = []

        for track in self.tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            b.append(track.to_tlbr())
            c.append(track.track_cls)
            t.append(track.track_id)

        return b, c, t

    def entanceAlert(self, bboxes):
        '''
        判断bbox中心是否在敏感区域中
        :param bboxes: [[min x, min y, max x, max y]...]
        :return: isIn :[True,False,True....]
        '''
        assert len(self.poly) == 4, '未初始化四边形铭感区域'
        isIn = [
            self.winding_number(int(((bbox[0]) + (bbox[2])) / 2),
                                int(((bbox[1]) + (bbox[3])) / 2)) == 'in'
            for bbox in bboxes
        ]
        return isIn

    def winding_number(self, point):
        '''
        区域坐标与中心坐标关系判断
        :param point: [x,y]
        :return: "on" 边界线上
                 "in" 区域内
                 "out" 区域外
        '''
        self.poly.append(self.poly[0])
        px = point[0]
        py = point[1]
        sum_of_p = 0
        length = len(self.poly) - 1
        # length = len(poly)

        for index in range(0, length):
            sx = self.poly[index][0]
            sy = self.poly[index][1]
            tx = self.poly[index + 1][0]
            ty = self.poly[index + 1][1]

            # The points coincide with the vertices of a polygon or are on the edges of a polygon
            if ((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0
                    and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            # The Angle between a point and an adjacent vertex
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)

            # Make sure the Angle is within the range（-π ~ π）
            if angle >= math.pi:
                angle = angle - math.pi * 2
            elif angle <= -math.pi:
                angle = angle + math.pi * 2
            sum_of_p += angle

            # Calculate the number of turns and judge the geometric relationship between points and polygons
        result = 'out' if int(sum_of_p / math.pi) == 0 else 'in'
        return result

    def get_poly(self):
        pts = np.array(self.poly, np.int32)
        # 顶点个数：4，矩阵变成4*1*2维,第一个参数为-1, 表明长度是根据后面的维度计算的。
        pts = pts.reshape((-1, 1, 2))
        return pts

    def person_in_area(self, q, flg=False):
        '''
        判断进入区域的动作
        :param q: 人物的位置坐标的队列
        :param flg: 开始的状态，默认在区域内
        :return:
        '''
        while True:
            if not q:
                return "non"
            box1 = q.popleft()
            if self.winding_number(box1) == "out":
                flg = True
                continue
            elif self.winding_number(box1) == "in" and flg:
                return "yes"

コード例 #28

ファイルを表示

ファイル: object_tracker.py プロジェクト: Octaves0911/Cross-Relating-Videos

def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    time_stamp = []
    time_stamp_vid2 = []
    time_ev_vid1 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f")
    time_ev_vid2 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f")
    flag = 0
    time_dict = collections.defaultdict(list)
    time_ev1 = 0
    time_ev2 = 0
    time_diff = 0

    reader = easyocr.Reader(['en'])
    print("easy ocr vocab loaded")
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    video_path1 = FLAGS.video1

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
        vid1 = cv2.VideoCapture(int(video_path1))
    except:
        vid = cv2.VideoCapture(video_path)
        vid1 = cv2.VideoCapture(video_path1)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    cost = 0
    _, frame_prev = vid1.read()
    frame_prev = cv2.cvtColor(frame_prev, cv2.COLOR_BGR2GRAY)
    # while video is running
    while True:
        return_value, frame = vid.read()
        return_value1, frame1 = vid1.read()
        result1 = frame1
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        if (return_value1 and flag == 0):

            frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
            cost = MSE(frame_prev, frame1)
            print("cost between two frames = ", cost)
            if (cost > 2000):
                text_in_frame2 = reader.readtext(frame1)
                time_in_frame2 = text_in_frame2[0][1]
                print("time in frame 1 ", time_in_frame2)
                time_frame2 = datetime.strptime(time_in_frame2, "%H:%M:%S:%f")
                time_stamp_vid2.append(time_frame2)
                time_ev_vid1 = time_frame2
                time_ev1 = time_in_frame2
                #time_dict['Event 1'].extend(str(time_in_frame2))
                flag = 1
            frame_prev = frame1

        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            center_coordinates = (int(
                (bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2))
            print(center_coordinates)
            if (class_name == 'person'
                    and center_coordinates[0] in range(120, 476)
                    and center_coordinates[1] in range(191, 979)):
                # adding the ocr
                text_in_frame = reader.readtext(frame_gray)
                # if(len(text_in_frame) > 0):
                print("text in frame", text_in_frame)
                time_in_frame1 = text_in_frame[0][1]
                time_ev2 = time_in_frame1
                print("time in frame", time_in_frame1)
                time_frame1 = datetime.strptime(time_in_frame1, "%H:%M:%S:%f")
                #time_stamp.append(time_frame1)
                if (flag == 1):
                    time_ev_vid2 = time_frame1
                    #time_dict['Event two'].extend(str(time_in_frame1))
                    time_stamp.append(time_frame1)
                    print("the time difference is ",
                          (time_ev_vid2 - time_ev_vid1).seconds)
                    flag = 2
                    break

                #print(vid.get(cv2.CAP_PROP_POS_MSEC)/1000)
                #time_stamp.append(vid.get(cv2.CAP_PROP_POS_MSEC)/1000)
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        #print(time_stamp)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        #result1 = cv2.cvtColor(frame1, cv2.COLOR_GRAY2BGR)
        # with open("video.txt", "w") as f:
        #     f.write(str(time_stamp))

        # with open("video2.txt", "w") as k:
        #     k.write(str(time_stamp_vid2))

        result1 = cv2.resize(result1, (400, 400))
        result = cv2.resize(result, (400, 400))

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", np.concatenate((result1, result),
                                                      axis=1))
        if (flag == 2):
            print("the time difference is ",
                  (time_ev_vid2 - time_ev_vid1).seconds)
            time_diff = (time_ev_vid2 - time_ev_vid1).seconds
            #time_dict['Time difference in seconds'].extend(str((time_ev_vid2 - time_ev_vid1).seconds))
            break

        if FLAGS.output:
            out.write(result)

        if cv2.waitKey(1) & 0xFF == ord('q'): break

    time_diction = {
        "Event 1 Occured at ": time_ev1,
        "Event 2 occured at ": time_ev2,
        "time difference in seconds ": time_diff
    }

    # if output flag is set, save video file
    with open('output.json', "w") as out:
        json.dump(time_diction, out)
    cv2.destroyAllWindows()

コード例 #29

ファイルを表示

ファイル: run.py プロジェクト: Doublexe/Video_Process

def main(
        root,
        new_root,
        config_file
):

    """
    Parameters
    ----------
    root : str
        The root for the whole file hierachy. The root will be parsed by glob in "parse_hierachy".
    new_root : str
        In which the new hierachy will be constructed.
    config_file : str
        Configurations in ./config.
    """

    cfg.merge_from_file(config_file)
    cfg.freeze()

    file_nodes = parse_hierachy(root, '**/*.mp4')

    # META definitions
    encoder_factory = EncoderFactory()
    detector_factory = DetectorFactory()
    ratio_thres_min = cfg.RATIO_THRESHOLD_MIN
    ratio_thres_max = cfg.RATIO_THRESHOLD_MAX
    height_thres = cfg.HEIGHT_THRESHOLD
    width_thres = cfg.WIDTH_THRESHOLD
    output_dir = new_root
    nms_max_overlap = cfg.NMS_MAX_OVERLAP
    distance_metric = cfg.NND.METRIC  # cosine
    max_cosine_distance = cfg.NND.MAX_COS_DISTANCE  # 0.3
    nn_buget = cfg.NND.BUGET  # None
    # writeVideo_flag = cfg.OUTPUT_DIR is not False
    max_iou_distance = cfg.TRACK.MAX_IOU_DISTANCE  # 0.7

    # deep_sort
    encoder = encoder_factory.make_encoder(cfg, cfg.ENCODER.NAME)
    detector = detector_factory.make_detector(cfg, cfg.DETECTOR.NAME)
    metric = nn_matching.NearestNeighborDistanceMetric(distance_metric,
                                                       max_cosine_distance,
                                                       nn_buget)

    for file_node in tqdm(file_nodes, desc='Files', leave=False):
        video_capture = cv2.VideoCapture(file_node.path)
        vfps = float(video_capture.get(CV_CAP_PROP_FPS))
        num_frame = video_capture.get(CV_CAP_PROP_FRAME_COUNT)
        # w = int(video_capture.get(CV_CAP_PROP_FRAME_WIDTH))
        # h = int(video_capture.get(CV_CAP_PROP_FRAME_HEIGHT))
        max_age = int(cfg.TRACK.MAX_AGE_TIME * vfps)  # 30
        n_init = int(cfg.TRACK.N_INIT_TIME * vfps)  # 3

        # A kalman filter tracker
        # Predict the next position based on kalman filter.
        # Update the detections to calibrate the predictions.
        tracker = Tracker(metric, max_iou_distance, max_age, n_init)

        # A dictionary to collect tracks
        # Dict[track_id -> List[(frame_count, image)]]
        track_collection = {}

        frame_count = 0
        eof_flag = False

        while not eof_flag:

            # Get a batch of images
            frames = []
            batch_count = 0
            while batch_count < cfg.BATCH:

                skip_count = 0
                while skip_count < cfg.SKIP:
                    ret, frame = video_capture.read()  # frame shape 640*480*3
                    skip_count +=1
                ret, frame = video_capture.read()  # frame shape 640*480*3
                if not ret:
                    eof_flag = True
                    break
                else:
                    batch_count += 1
                    frames.append(frame)
            if eof_flag:
                break  # the last batch cannot satisfy the shape constrain

            t1 = time.time()

            # Batch process
            images = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                      for frame in frames]
            images = np.array(images)
            img_bboxes, img_scores = detector(images)
            img_features = encoder(images, img_bboxes)

            # Batch update
            for i in range(cfg.BATCH):
                bboxes, scores, features, image = img_bboxes[i], img_scores[i], img_features[i], images[i]
                detections = [Detection(bbox, score, feature)
                              for bbox, score, feature in zip(bboxes, scores, features)]

                detections = preprocessing.standardize_detections(
                    detections, ratio_thres_min, ratio_thres_max, height_thres, width_thres)

                # Run non-maxima suppression.
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(
                    boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]

                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                active_tracks = [track for track in tracker.tracks if track.time_since_update==0]

                crops = crop_tracks(image, active_tracks)
                crops = {track_id: (frame_count, img)
                         for track_id, img in crops.items()}

                for track_id, img in crops.items():
                    track_collection.setdefault(track_id, []).append(img)

                frame_count += 1

            # fps = (cfg.BATCH / (time.time() - t1))
            # print("Processing fps={:3f}. Video time {:.3f} passed.".format(
            #     fps, frame_count / vfps))

        # Close files
        video_capture.release()

        # Write to output file. Each track a directory.
        dir = file_node.echo(output_dir, make_dir=True)
        for track_id, packs in track_collection.items():
            track_dir = os.path.join(dir, str(track_id))
            os.mkdir(track_dir)
            for pack in packs:
                f_count, img = pack
                if img.size != 0:
                    cv2.imwrite(os.path.join(track_dir,
                                             file_node.name + '_' + str(f_count) + '_' + str(track_id) + '.png'),
                                cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

コード例 #30

ファイルを表示

ファイル: demo.py プロジェクト: shmilymm/deep_sort_yolov3

def main(yolo):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
       # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            
        cv2.imshow('', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        print("fps= %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()

コード例 #31

ファイルを表示

ファイル: tracker.py プロジェクト: jwangjie/deep_sort_yolov3

def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.25
    nn_budget = None
    nms_max_overlap = 0.1

    # deep_sort
    model_filename = 'model_data/veri.pb'

    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture('test_scene5.mp4')  # feed testing video
    #video_capture = cv2.VideoCapture('round.mp4')
    video_fps = video_capture.get(cv2.CAP_PROP_FPS)
    #print ("Frames per second".format(video_fps))

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        # https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html#cv2.VideoWriter
        out = cv2.VideoWriter('test_scene5_tar.mp4', fourcc, video_fps, (w, h))
        #out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('tracking_DJI_0006.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        #boxs = darknet.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        bbox_center = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr(
            )  # generate_detections.py, input is a BGR color image
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255),
                          2)  # red color
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)  # RGB, Green color
            bbox_center = (str(track.track_id), [
                int(bbox[0] + bbox[2]) / 2,
                int(bbox[1] + bbox[3]) / 2
            ])
            print(bbox_center)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

            for track in tracker.tracks:
                bbox = track.to_tlbr()
                bbox_center_o = ([
                    int(bbox[0] + bbox[2]) / 2,
                    int(bbox[1] + bbox[3]) / 2
                ])

                list_file.write(
                    str(track.track_id) + ',' + str(bbox_center_o) + '; ')

            list_file.write('\n')
        """
        # don't need detections
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)  # RGB, (0, 0, 255), blue color
            
        cv2.imshow('', frame)
        """

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()