コード例 #1
0
def run(sequence_dir, detection_file, output_file, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget,
        display):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """
    seq_info = gather_sequence_info(sequence_dir, detection_file)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    results = []

    test_dict = {}

    def frame_callback(vis, frame_idx):
        if frame_idx % 100 == 0:
            print("Tracked frame %d" % frame_idx)

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"], frame_idx,
                                       min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Update visualization.
        if display:
            # print("came here-1")
            image = cv2.imread(seq_info["image_filenames"][frame_idx],
                               cv2.IMREAD_COLOR)
            # cv2.imshow("test", image)
            # cv2.waitKey()
            # print("came here-2")

            vis.set_image(image.copy())
            # print("came here-3")

            vis.draw_detections(detections)
            # print("came here-4")

            vis.draw_trackers(tracker.tracks, test_dict)
            # print("came here-5")

        new_track_id = []
        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlwh()
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]
            ])
            # print(frame_idx, track.track_id)

            new_track_id.append(track.track_id)

        # print("track-id-list:", new_track_id)

        for i in new_track_id:
            if not i in test_dict:
                test_dict[i] = datetime.now().strftime('%H:%M:%S')

        # print(test_dict)

    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=5)
    else:
        visualizer = visualization.NoVisualization(seq_info)
    visualizer.run(frame_callback)

    # Store results.
    f = open(output_file, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5]),
              file=f)
コード例 #2
0
def main(yolo):
    points=[]
    tpro=0.
   # Definition of the parameters
    max_cosine_distance = 0.9
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        
        frame=cv2.flip(frame,1)
        image = Image.fromarray(frame)
        

        # ___________________________________________________________________________DETECT WITH YOLO 
        t1 = time.time()
        

        boxs = yolo.detect_image(image)



        # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        
        detections = [detections[i] for i in indices]

        

        # ___________________________________________________________________________DRAW DETECT BOX


        to_move=[]
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,0,255), 1)


            temp=int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3])
            to_move.append(temp)




        
       
        # now feed tracked box to move

        # ___________________________________________________________________________MOVE

        if to_move :
            

            # Initial co-ordinates of the object to be tracked 
            # Create the tracker object
            mover = [dlib.correlation_tracker() for _ in range(len(to_move))]
            # Provide the tracker the initial position of the object



            [mover[i].start_track(frame, dlib.rectangle(*rect)) for i, rect in enumerate(to_move)] ## FEED FIRST BOX HERE

            for i in range (0,100):  ##### START LOOP MOVER
                ret, frame = video_capture.read()  # tempo
                full_frame_mover=[]        
                frame=cv2.flip(frame,1)    #tempo


                # Update the mover
                for i in range(len(mover)):


                    #_____________FEED NEW IMAGE
                    mover[i].update(frame)
                    


                    #_________________DRAW
                    rect = mover[i].get_position()
                    pt1 = (int(rect.left()), int(rect.top()))
                    pt2 = (int(rect.right()), int(rect.bottom()))

                    cv2.rectangle(frame, pt1, pt2, (255, 255, 255), 3)
                    
                    full_frame_mover.append((pt1,pt2))
                    #print(full_frame_mover) # finish 1 frame

                    
                    


                    # ___________________________________________________________________________Call the tracker 
                    tracker.predict()
                    tracker.update(detections)
                    
                    

                    # ___________________________________________________________________________DRAW TRACK RECTANGLE 
                    
                    
                    for track in tracker.tracks:
                        if track.is_confirmed() and track.time_since_update >1 :
                            continue 
                        
                        bbox = track.to_tlbr()







                        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                        cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])+30),0, 5e-3 * 200, (0,255,0),3)

                        dot=int(int(bbox[0])+((int(bbox[2])-int(bbox[0]))/2)),int(bbox[3]-10)
                        
                        cv2.circle(frame,dot, 10, (0,0,255), -1)


                    
                    
                    


                
  
                cv2.imshow('', frame)
                # Continue until the user presses ESC key
                if cv2.waitKey(1) == 27:
                    break

            # END LOOP MOVER




















        # ___________________________________________________________________________Call the tracker 
        tracker.predict()
        tracker.update(full_frame_mover)
        
        

        # ___________________________________________________________________________DRAW TRACK RECTANGLE 
        
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            
            bbox = track.to_tlbr()







            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])+30),0, 5e-3 * 200, (0,255,0),3)

            dot=int(int(bbox[0])+((int(bbox[2])-int(bbox[0]))/2)),int(bbox[3]-10)
            
            cv2.circle(frame,dot, 10, (0,0,255), -1)

        

































        # ___________________________________________________________________________GET POINTS From click

        if(cv2.waitKey(1)==ord('p')):
            points = get_lines.run(frame, multi=True) 
            print(points)
        if points :
            for line in points:
                cv2.line(frame, line[0:2], line[2:5], (0,255,255), 2) # draw line








        cv2.imshow('', frame)
        
        print('process time : ',time.time()-tpro)
        tpro=time.time()

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #3
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort see github deep sort for more information
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    """
    A nearest neighbor distance metric that, for each target, returns
    the closest distance to any sample that has been observed so far.
    """
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)

    # multi target tracker
    tracker = Tracker(metric)

    # Return an identifiable list of physical devices visible to the host runtime
    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        # enable memory growth for physical devices

    # utilised to identify type of YoloV3 used
    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    # load pre-trained weights
    # pre-trained from open sources, many from public repos on github.
    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    # array contains name of classes (flags)
    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    # capture a video from the camera or a video file, files for our demonstrations.
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    # output video is empty
    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    _, img = vid.read()
    h, w, c = img.shape
    h_numStep = 12
    # number of boxes in a column
    w_numStep = 20
    # number of boxes in a row

    #make matrix-array M of categories of different areas 1=food area, etc.
    M = [[1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5],
         [1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5],
         [1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 8, 8],
         [2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 8, 8, 8, 8],
         [2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8],
         [2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 7, 7],
         [2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7],
         [2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 7, 7, 7, 7],
         [2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7],
         [2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 7, 7, 7, 7, 7],
         [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7],
         [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7]]

    # store the total time that customers stay in box[i][j]
    total_time_engage = [[0 for i in range(w_numStep + 1)]
                         for j in range(h_numStep + 1)]

    # store the time that customer k is stationary in box[i][j]
    stationary_time = [[[0 for i in range(w_numStep + 1)]
                        for j in range(h_numStep + 1)] for k in range(100000)]

    # store the positions of single customer
    x_single_tracking = []
    y_single_tracking = []
    # single customer's trackingID
    single_trackingID = 34

    # store the current position of customer
    max_trackID = 0
    x_trackID = [-1] * 1000000
    y_trackID = [-1] * 1000000

    # file store the total_time_engage
    file = 'total_time_engage.txt'

    fps = 0.0
    count = 0
    while True:

        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        # convert an image from one color space to another
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # return a tensor with a length 1 axis inserted at index 0
        img_in = tf.expand_dims(img_in, 0)

        # resize the image to 416x416
        # remember resolution has to be able to work with it
        # tensorflow.image.resize: resize image to size
        img_in = transform_images(img_in, FLAGS.size)

        # return the number of seconds passed since epoch
        t1 = time.time()
        time_finish_last_tracking = t1

        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        # detections
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Pass detections to the deepsort object and obtain the track information
        # predicts and updates via detection
        tracker.predict()
        tracker.update(detections)

        # draw horizontal boxes
        y_step = int(h / h_numStep)
        y_start = 0
        while True:
            y_end = y_start + y_step
            cv2.rectangle(img, (0, y_start), (int(w), y_end), (0, 0, 0), 1)
            y_start = y_end
            if y_start >= int(h):
                break  # finish drawing here

        # draw vertical boxes
        x_step = int(w / w_numStep)
        x_start = 0
        while True:
            x_end = x_start + x_step
            cv2.rectangle(img, (x_start, 0), (x_end, int(h)), (0, 0, 0), 1)
            x_start = x_end
            if x_start >= int(w):
                break  # finish drawing here

        time_step = time.time() - time_finish_last_tracking
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()  # get the corrected/predicted bounding box
            class_name = track.get_class(
            )  # get the class name of particular object
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            # identify center of a boundary box
            x_cent = int(bbox[0] + (bbox[2] - bbox[0]) / 2)
            y_cent = int(bbox[1] + (bbox[3] - bbox[1]) / 2)

            # draw detection on frame
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color,
                          2)  # draw rectangle
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)  # insert objectName and objectID

            # display the area each person is in
            # cv
            # update the stationary_time and total_time_engage array
            if class_name == "person":
                x_pos = int(x_cent / x_step)
                y_pos = int(y_cent / y_step)
                #print(str(track.track_id) + ": [" + str(y_pos) + ", " + str(x_pos) + "]")
                if track.track_id > max_trackID:
                    max_trackID = track.track_id
                x_trackID[track.track_id] = y_pos
                y_trackID[track.track_id] = x_pos
                stationary_time[track.track_id][y_pos][x_pos] += time_step
                total_time_engage[y_pos][x_pos] += time_step

            # track a single person
            if class_name == "person" and track.track_id == single_trackingID:
                x_single_tracking.append(x_pos)
                y_single_tracking.append(y_pos)

        for track_index in range(max_trackID + 1):
            if x_trackID[track_index] != -1:
                print("customerID " + str(track_index) + ": [" +
                      str(x_trackID[track_index]) + "," +
                      str(y_trackID[track_index]) + "] in " + market_section(M[
                          x_trackID[track_index]][y_trackID[track_index]]))

        with open(file, 'w') as filetostore:
            for i in range(h_numStep):
                for j in range(w_numStep):
                    filetostore.write(
                        "{:.2f}".format(total_time_engage[i][j]) + " ")
                filetostore.write("\n")

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
        time_finish_last_tracking = time.time()

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break

    f = open("total_time_engage.txt", "rt")
    f.close()

    # insert data into the database

    # initialise track arrays
    track_time = [0] * 10000000
    track_customerID = [0] * 10000000
    track_area = ["" for x in range(10000000)]
    x_single = [0] * 10000000
    y_single = [0] * 10000000

    # organise data to be inserted
    track_index = -1
    for k in range(1000):
        for h in range(h_numStep):
            for w in range(w_numStep):
                if stationary_time[k][h][w] != 0:
                    track_index += 1
                    track_time[track_index] = stationary_time[k][h][w]
                    track_customerID[track_index] = k
                    track_area[track_index] = str(h) + ', ' + str(w)
    x_tmp = -1
    y_tmp = -1
    single_track_index = -1
    for k in range(len(x_single_tracking)):
        if x_single_tracking[k] != x_tmp and y_single_tracking[k] != y_tmp:
            single_track_index += 1
            x_single[single_track_index] = x_single_tracking[k]
            y_single[single_track_index] = y_single_tracking[k]
            x_tmp = x_single[single_track_index]
            y_tmp = y_single[single_track_index]
    single_tracking_areas = ""
    for k in range(single_track_index):
        single_tracking_areas += '[' + str(x_single[k]) + ',' + str(
            y_single[k]) + '] , '

    # connect and insert the appropriate data in primary_table
    for k in range(track_index + 1):
        try:
            conn = mariadb.connect(user="******",
                                   password="******",
                                   host="localhost",
                                   database="trackingDB")

            cur = conn.cursor()
            mySql_insert_query = """INSERT INTO primary_table(trackID, customerID, area) 
                                    VALUES (%s, %s, %s) """

            recordTuple = (k, track_customerID[k], track_area[k])
            cur.execute(mySql_insert_query, recordTuple)
            conn.commit()

        except mariadb.Error as error:
            print("Failed to insert record into the primary_table {}".format(
                error))
        finally:
            if (conn.is_connected()):
                cur.close()
                conn.close()

    # connect and insert the appropriate data in "engaged" table
    for k in range(track_index + 1):
        try:
            conn = mariadb.connect(user="******",
                                   password="******",
                                   host="localhost",
                                   database="trackingDB")

            cur = conn.cursor()
            mySql_insert_query = """INSERT INTO engaged(trackID, engagement_time) 
                                    VALUES (%s, %s) """

            recordTuple = (k, track_time[k])
            cur.execute(mySql_insert_query, recordTuple)
            conn.commit()

        except mariadb.Error as error:
            print("Failed to insert record into the engaged table {}".format(
                error))
        finally:
            if (conn.is_connected()):
                cur.close()
                conn.close()

    # connect and insert the appropriate data in "total_areas" table
    try:
        conn = mariadb.connect(user="******",
                               password="******",
                               host="localhost",
                               database="trackingDB")

        cur = conn.cursor()
        mySql_insert_query = """INSERT INTO total_areas(customerID, all_areas_visited) 
                                    VALUES (%s, %s) """

        recordTuple = (single_trackingID, single_tracking_areas)
        cur.execute(mySql_insert_query, recordTuple)
        conn.commit()

    except mariadb.Error as error:
        print("Failed to insert record into the total_areas table {}".format(
            error))
    finally:
        if (conn.is_connected()):
            cur.close()
            conn.close()

    # plot the graph
    fig = plt.figure(1)
    fig.suptitle('Engagement time on different areas', fontsize=20)
    ax = plt.axes(projection='3d')
    ax = plt.axes(projection='3d')

    # Data for a three-dimensional line
    x = np.arange(w_numStep - 1, -1, -1)
    y = np.linspace(0, h_numStep - 1, h_numStep)
    X, Y = np.meshgrid(x, y)
    Z = [[0 for j in range(w_numStep)] for i in range(h_numStep)]
    for i in range(h_numStep):
        for j in range(w_numStep):
            Z[i][j] = total_time_engage[i][j]
    Z = np.array(Z)

    # Plot the surface.
    ax.plot_surface(X,
                    Y,
                    Z,
                    rstride=1,
                    cstride=1,
                    cmap='viridis',
                    edgecolor='none')
    ax.set_xlabel('width')
    ax.set_ylabel('height')
    ax.set_zlabel('time')

    ax.view_init(35, 80)
    #gets the polar axis on the current image
    frame = plt.gca()
    #gets x and y axis list of x and y axis tick locations
    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])
    #Plots the figure
    fig2 = plt.figure(2)
    fig2_title = 'Walking pattern of a single customer( trackingID = ' + str(
        single_trackingID) + ')'
    fig2.suptitle(fig2_title, fontsize=15)
    plt.plot(x_single_tracking, y_single_tracking, 'ro')
    plt.axis([0, w_numStep, h_numStep, 0])

    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])

    fig.savefig('engage_level.jpg')
    fig2.savefig('single_tracking.jpg')
    plt.show()

    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #4
0
ファイル: demo.py プロジェクト: alievilya/deep-sort-yolov4
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0

    output_format = 'mp4'
    video_name = 'bus4_2in_4out.mp4'
    file_path = join('data_files/videos', video_name)
    output_name = 'save_data/out_' + video_name[0:-3] + output_format
    initialize_door_by_yourself = False
    door_array = None
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    counter = Counter(counter_in=0, counter_out=0, track_id=0)

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_name, fourcc, 15, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    ret, first_frame = video_capture.read()

    if door_array is None:
        if initialize_door_by_yourself:
            door_array = select_object(first_frame)[0]
            print(door_array)
        else:
            all_doors = read_door_info('data_files/doors_info.csv')
            door_array = all_doors[video_name]

    border_door = door_array[3]
    error_values = []
    truth = get_truth(video_name)
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            total_count = counter.return_total_count()
            true_total = truth.inside + truth.outside
            err = abs(total_count - true_total) / true_total
            log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \
                      " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in,
                                                                             truth.inside,
                                                                             counter.counter_out, truth.outside,
                                                                             total_count, true_total, err)
            with open('log_results.txt', 'w') as file:
                file.write(log_res)
            print(log_res)
            error_values.append(err)
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])),
                      (int(door_array[2]), int(door_array[3])), (23, 158, 21),
                      2)

        for det in detections:
            bbox = det.to_tlbr()
            if show_detections and len(classes) > 0:
                score = "%.2f" % (det.confidence * 100) + "%"
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(int(door_array[0]), int(door_array[1]),
                                      int(door_array[2]), int(door_array[3]))
                intersection = rect_head & rect_door

                if intersection:
                    squares_coeff = rect_square(*intersection) / rect_square(
                        *rect_head)
                    cv2.putText(
                        frame,
                        score + " inter: " + str(round(squares_coeff, 3)),
                        (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0],
                        (0, 100, 255), 5)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            # first appearence of object with id=track.id

            if track.track_id not in counter.people_init or counter.people_init[
                    track.track_id] == 0:
                counter.obj_initialized(track.track_id)
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(door_array[0], door_array[1],
                                      door_array[2], door_array[3])
                res = rect_head & rect_door
                if res:

                    inter_square = rect_square(*res)
                    head_square = rect_square(*rect_head)
                    #     was initialized in door, probably going in
                    if (inter_square / head_square) >= 0.8:
                        counter.people_init[track.track_id] = 2
                        #     initialized in the bus, mb going out
                    elif (inter_square /
                          head_square) <= 0.4 or bbox[3] > border_door:
                        counter.people_init[track.track_id] = 1
                # res is None, means that object is not in door contour
                else:
                    counter.people_init[track.track_id] = 1

                counter.people_bbox[track.track_id] = bbox
            counter.cur_bbox[track.track_id] = bbox

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 5)

            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

        id_get_lost = [
            track.track_id for track in tracker.tracks
            if track.time_since_update >= 25 and track.age >= 29
        ]
        id_inside_tracked = [
            track.track_id for track in tracker.tracks if track.age > 60
        ]
        for val in counter.people_init.keys():
            # check bbox also
            cur_c = find_centroid(counter.cur_bbox[val])
            init_c = find_centroid(counter.people_bbox[val])
            vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

            if val in id_get_lost and counter.people_init[val] != -1:
                # if vector_person < 0 then current coord is less than initialized, it means that man is going
                # in the exit direction
                if vector_person[1] > 70 and counter.people_init[
                        val] == 2:  # and counter.people_bbox[val][3] > border_door \
                    counter.get_in()

                elif vector_person[1] < -70 and counter.people_init[val] == 1:
                    counter.get_out()

                counter.people_init[val] = -1
                print(f"person left frame")
                print(f"current centroid - init : {cur_c} - {init_c}\n")
                print(f"vector: {vector_person}\n")

                del val
            # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \
            #         and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \
            #         and vector_person[1] > 0:  # and \
            #     # counter.people_bbox[val][3] > border_door:
            #     counter.get_in()
            #
            #     counter.people_init[val] = -1
            #     print(f"person is tracked for a long time")
            #     print(f"current centroid - init : {cur_c} - {init_c}\n")
            #     print(f"vector: {vector_person}\n")
            #     imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]),
            #                       find_centroid(counter.people_bbox[val]),
            #                       (0, 0, 255), 7)

            # cv2.imshow('frame', imaggg)
            # cv2.waitKey(0)

        ins, outs = counter.show_counter()
        cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0,
                    1e-3 * frame.shape[0], (255, 0, 0), 5)

        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('image', 1400, 800)
        cv2.imshow('image', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            # print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

    mean_error = np.mean(error_values)
    print("mean error for {} video: {}".format(video_name, mean_error))
コード例 #5
0
def run(sequence_dir,
        detection_file,
        output_file,
        min_confidence,
        nms_max_overlap,
        min_detection_height,
        max_cosine_distance,
        nn_budget,
        display,
        offset,
        n_frames,
        max_iou_distance,
        max_age,
        n_init,
        alpha_ds=0.0):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """

    seq_info = gather_sequence_info(sequence_dir, detection_file, offset,
                                    n_frames)
    metric = nn_matching.NearestNeighborDistanceMetric(alpha_ds,
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric,
                      max_iou_distance=max_iou_distance,
                      max_age=max_age,
                      n_init=n_init)
    results = []

    def frame_callback(vis, frame_idx):
        aaa = n_frames / 10
        if frame_idx % aaa == 0:
            print("Processing frame {} / {} ".format(frame_idx, n_frames))

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"], frame_idx,
                                       min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Update visualization.
        if display:
            image = cv2.imread(seq_info["image_filenames"][frame_idx],
                               cv2.IMREAD_COLOR)
            vis.set_image(image.copy())
            vis.draw_detections(detections)
            vis.draw_trackers(tracker.tracks)

        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlwh()
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3],
                1, -1, -1, -1
            ] + list(track.last_feature()))

    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=5)
    else:
        visualizer = visualization.NoVisualization(seq_info)
    visualizer.run(frame_callback)

    # Store results.
    np.save(output_file, results)
コード例 #6
0
def human_tracking(frames, yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    new_frames = []

    for frame in frames:

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
            frame = cv2.putText(frame, str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                                (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        new_frames.append(frame)

    return new_frames, boxs
コード例 #7
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap1 = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    OPTICAL = False

    # video_filename = './dataset/people.mp4'
    # video_filename = 'C:/tensorflow1\models/research\object_detection/videos/IMG_1101.MOV'
    video_filename = 'C:/tensorflow1\models/research\object_detection/videos/IMG_1105-diet.mp4'
    video_capture = cv2.VideoCapture(video_filename)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        out = cv2.VideoWriter(
            'C:/tensorflow1\models/research\object_detection/videos/output_0419.avi',
            fourcc, 30, (w, h))
        list_file = open('detection.txt', 'w')
        list_file2 = open('tracking.txt', 'w')
        frame_index = -1

    fps = 0.0
    firstflag = 1
    while True:
        ok, frame = video_capture.read()  # frame shape 640*480*3
        # cv2.imwrite("test.png", frame)
        # exit()

        if ok != True:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)  # [x,y,w,h]
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression (NMS)
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap1,
                                                    scores)
        detections = [detections[i] for i in indices]

        ### Call the tracker
        tracker.predict()
        tracker.update(detections)

        ### Add one more step of optical flow
        # convert detections to bboxs for optical flow
        n_object = len(detections)
        bboxs = np.empty((n_object, 4, 2), dtype=float)
        i = 0
        for det in detections:
            bbox = det.to_tlbr()  # (min x, min y, max x, max y)
            (xmin, ymin, boxw, boxh) = (int(bbox[0]), int(bbox[1]),
                                        int(bbox[2]) - int(bbox[0]),
                                        int(bbox[3]) - int(bbox[1]))
            bboxs[i, :, :] = np.array([[xmin, ymin], [xmin + boxw, ymin],
                                       [xmin, ymin + boxh],
                                       [xmin + boxw,
                                        ymin + boxh]]).astype(float)
            i = i + 1

        if firstflag:
            oldframe = frame
        else:

            startXs, startYs = getFeatures(cv2.cvtColor(
                frame, cv2.COLOR_RGB2GRAY),
                                           bboxs,
                                           use_shi=False)
            newXs, newYs = estimateAllTranslation(startXs, startYs, oldframe,
                                                  frame)
            Xs, Ys, newbboxs = applyGeometricTransformation(
                startXs, startYs, newXs, newYs, bboxs)
            oldframe = frame
            ## generate new detections
            boxs = bbox_transform(newbboxs)
            features = encoder(frame, boxs)
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxs, features)
            ]

            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap1, scores)
            detections = [detections[i] for i in indices]

            ## Call the tracker again
            tracker.predict()
            tracker.update(detections)
        origin_frame = frame.copy()
        draw_count_line(frame)
        draw_people_point_line(frame)
        draw_count_text(frame)
        boxes_tracking = np.array(
            [track.to_tlwh() for track in tracker.tracks])
        ### Deep sort tracker visualization
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            central_point = Point(
                int((bbox[0] + bbox[2]) / 2),
                int((bbox[1] + bbox[3]) / 2 + (bbox[3] - bbox[1]) / 3))
            # 원본 이미지를 넣어줌
            crop_img = origin_frame[int(bbox[1]):int(bbox[3]),
                                    int(bbox[0]):int(bbox[2])]

            append_point(central_point, track.track_id, crop_img)
            # detect_face_gender(crop_img)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        ### Start from the first frame, do optical flow for every two consecutive frames.
        if OPTICAL:
            if firstflag:
                n_object = len(detections)
                bboxs = np.empty((n_object, 4, 2), dtype=float)
                i = 0
                for det in detections:
                    bbox = det.to_tlbr()  # (min x, min y, max x, max y)
                    (xmin, ymin, boxw, boxh) = (int(bbox[0]), int(bbox[1]),
                                                int(bbox[2]) - int(bbox[0]),
                                                int(bbox[3]) - int(bbox[1]))
                    bboxs[i, :, :] = np.array([[xmin, ymin],
                                               [xmin + boxw, ymin],
                                               [xmin, ymin + boxh],
                                               [xmin + boxw,
                                                ymin + boxh]]).astype(float)
                    i = i + 1
                startXs, startYs = getFeatures(cv2.cvtColor(
                    frame, cv2.COLOR_RGB2GRAY),
                                               bboxs,
                                               use_shi=False)
                oldframe = frame
                oldbboxs = bboxs
            else:
                ### add new tracking object
                # new_n_object = len(detections)
                # if new_n_object > n_object:
                #     # Run non-maxima suppression (NMS)
                #     tmp_boxes = np.array([d.tlwh for d in detections])
                #     tmp_scores = np.array([d.confidence for d in detections])
                #     tmp_indices = preprocessing.non_max_suppression(tmp_boxes, nms_max_overlap2, tmp_scores)
                #     tmp_detections = [detections[i] for i in indices]
                # if len(tmp_detections)>n_object:

                newXs, newYs = estimateAllTranslation(startXs, startYs,
                                                      oldframe, frame)
                Xs, Ys, newbboxs = applyGeometricTransformation(
                    startXs, startYs, newXs, newYs, oldbboxs)
                # update coordinates
                (startXs, startYs) = (Xs, Ys)

                oldframe = frame
                oldbboxs = newbboxs

                # update feature points as required
                n_features_left = np.sum(Xs != -1)
                print('# of Features: %d' % n_features_left)
                if n_features_left < 15:
                    print('Generate New Features')
                    startXs, startYs = getFeatures(
                        cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY), newbboxs)

                # draw bounding box and visualize feature point for each object
                for j in range(n_object):
                    (xmin, ymin, boxw,
                     boxh) = cv2.boundingRect(newbboxs[j, :, :].astype(int))
                    cv2.rectangle(frame, (xmin, ymin),
                                  (xmin + boxw, ymin + boxh), (255, 255, 255),
                                  2)  # BGR color
                    cv2.putText(frame, str(j), (xmin, ymin), 0, 5e-3 * 200,
                                (0, 255, 0), 2)
                    # red color features
                    # for k in range(startXs.shape[0]):
                    #     cv2.circle(frame, (int(startXs[k,j]),int(startYs[k,j])),3,(0,0,255),thickness=2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0),
                          2)  # BGR color

        # cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            # detection
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
            # tracking
            list_file2.write(str(frame_index) + ' ')
            if len(boxes_tracking) != 0:
                for i in range(0, len(boxes_tracking)):
                    list_file2.write(
                        str(boxes_tracking[i][0]) + ' ' +
                        str(boxes_tracking[i][1]) + ' ' +
                        str(boxes_tracking[i][2]) + ' ' +
                        str(boxes_tracking[i][3]) + ' ')
            list_file2.write('\n')

        firstflag = 0

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #     break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #8
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 0.7

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    webcam_flag = False
    resize_flag = True
    resize_size = (800, 450)

    # some links from earthcam https://github.com/Crazycook/Working/blob/master/Webcams.txt    https://www.vlcm3u.com/web-cam-live/
    # video_url = 'https://videos3.earthcam.com/fecnetwork/lacitytours1.flv/chunklist_w683585821.m3u8' # HOLLYWOOD
    # video_url = 'https://videos3.earthcam.com/fecnetwork/9974.flv/chunklist_w1421640637.m3u8' # NYC
    # video_url = 'https://videos3.earthcam.com/fecnetwork/5775.flv/chunklist_w1803081483.m3u8' # NYC 2
    # video_url = 'http://181.1.29.189:60001/cgi-bin/snapshot.cgi?chn=0&u=admin'
    # video_url = 'https://videos-3.earthcam.com/fecnetwork/15559.flv/chunklist_w573709200.m3u8' # NYC 3
    video_url = 'https://hddn01.skylinewebcams.com/live.m3u8?a=97psdt8nv2hsmclta3nuu4di94'

    if webcam_flag:
        video_capture = cv2.VideoCapture(0)
    else:
        video_capture = cv2.VideoCapture()
        video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2)
        video_capture.open(video_url)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        if resize_flag:
            frame = cv2.resize(frame,
                               resize_size,
                               interpolation=cv2.INTER_AREA)

    # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        if np.array(boxs).size > 0:
            features = encoder(frame, np.array(boxs)[:, 0:4].tolist())

            class_names = yolo.class_names

            # score to 1.0 here).
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxs, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            #Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1]) - 10), 0, 5e-3 * 100,
                            (0, 0, 255), 2)

            for det in detections:
                bbox = det.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                cv2.putText(
                    frame, class_names[int(det.label)] + "(" +
                    str(round(det.score, 2)) + ")",
                    (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 90, (255, 0, 0), 2)
                #cv2.putText(frame, str(int(bbox[0])) + "-" + str(int(bbox[3])) ,(int(bbox[0]), int(bbox[3])),0, 5e-3 * 90, (0,0,255),2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    model_par, valid_transform = model_init_par()
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                crop_img = image.crop(
                    [int(bbox[0]),
                     int(bbox[1]),
                     int(bbox[2]),
                     int(bbox[3])])
                #res_txt = demo_par(model_par, valid_transform, crop_img)

                #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1)

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                font = ImageFont.truetype(
                    '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf',
                    size=10)
                # positive_cnt = 1
                # for txt in res_txt:
                #     if 'personal' in txt:
                #         #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font)
                #         cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0,
                #                     1e-3 * frame.shape[0], (0, 255, 0), 1)
                #         positive_cnt += 1

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #10
0
def main():
    min_confidence = 20
    max_cosine_distance = 0.5
    min_detection_height = 0
    nms_max_overlap = 1.0
    max_cosine_distance = 0.2
    nn_budget = 100

    args = parser()
    check_arguments_errors(args)

    encoder = generate_detections.create_box_encoder(
        args.model_file, batch_size=args.batch_size)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    results = []

    # yolov4
    # yolo(args=args)
    random.seed(3)  # deterministic bbox colors
    network, class_names, class_colors = darknet.load_network(
        args.config_file,
        args.data_file,
        args.weights,
        batch_size=args.batch_size)

    images = load_images(args.input)

    writeVideo_flag = True
    video_path = "./output/output.mp4"

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        # w = int(video_capture.get(3))
        # h = int(video_capture.get(4))
        width, height = get_image_size(network)
        first_image = cv2.imread(images[0])
        org_h, org_w = first_image.shape[:2]
        print(width, height, org_w, org_h)
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(video_path, fourcc, 15, (width, height))
        list_file = open(args.output_file, 'w')

    counter = []

    fps = 0.0

    index = 0
    while True:
        # loop asking for new image paths if no list is given
        if args.input:
            if index >= len(images):
                break
            image_name = images[index]
        else:
            image_name = input("Enter Image Path: ")
        prev_time = time.time()
        # after darknet detection, bbox = (center_x, center_y, w, h)
        image, detections = image_detection(image_name, network, class_names,
                                            class_colors, args.thresh)
        # if args.save_labels:
        #     save_annotations(image_name, image, detections, class_names, output=args.output)

        boxs = []
        predicted_names = []
        for label, confidence, bbox in detections:
            # extract feature, we need bbox = (left, top, w, h)
            center_x, center_y, w, h = bbox
            xmin = int(round(center_x - (w / 2)))
            ymin = int(round(center_y - (h / 2)))
            boxs.append([xmin, ymin, w, h])
            # predict_name = class_names.index(label)
            # predicted_names.append(predict_name)
        features = encoder(image, boxs)

        # 为每个框创建检测器
        box_idx = 0
        dets = []
        for label, confidence, bbox in detections:
            if bbox[3] < min_detection_height:
                continue
            if bbox[2] > 0.8 * width:
                print(label, ", ", confidence, bbox)
            # create Detection, we need bbox = (left, top, w, h)
            center_x, center_y, w, h = bbox
            xmin = int(round(center_x - (w / 2)))
            ymin = int(round(center_y - (h / 2)))
            dets.append(
                Detection([xmin, ymin, w, h], confidence, features[box_idx]))
            box_idx += 1

        # 过滤置信度小于阈值的框
        detections = [d for d in dets if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        # 对框进行非极大值抑制
        detections = [detections[i] for i in indices]

        # Update tracker.
        # 卡尔曼滤波对tracker跟踪器进行状态预测
        # 第一帧没有tracker
        tracker.predict()
        # 对跟踪器进行更新
        # 对未匹配的detections进行初始化,添加track
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # 图像经过darknet检测后,被resize到(608, 608)
            # 此时存储结果的话,需要重新resize回原图(1920 1080)对应的位置
            bbox = track.to_tlwh()
            x, y, w, h = bbox
            a_x, a_y, a_w, a_h = x / width, y / height, w / width, h / height
            results.append([
                index + 1, track.track_id, org_w * a_x, org_h * a_y,
                org_w * a_w, org_h * a_h
            ])

            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 1)
            cv2.putText(image, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, (color), 1)
            # if len(class_names) > 0:
            #    cv2.putText(image, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (color),1)

            # pt1 = int(bbox[0]), int(bbox[1])
            # pt2 = int(bbox[2]), int(bbox[3])
            # cv2.rectangle(image, pt1, pt2, color, 1)
            # if track.track_id is not None:
            #     text_size = cv2.getTextSize(str(track.track_id), cv2.FONT_HERSHEY_PLAIN, 1, 1)

            #     center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1]
            #     pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + text_size[0][1]
            #     cv2.rectangle(image, pt1, pt2, color, -1)
            #     cv2.putText(image, str(track.track_id), center, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

            i += 1

            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            pts[track.track_id].append(center)
            thickness = 1
            #center point
            cv2.circle(image, (center), 1, color, thickness)

            #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(image, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (color), 1)
                #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),1)

        # count = len(set(counter))
        # cv2.putText(image, "Total Object Counter: "+str(count),(int(20), int(120)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0),1)
        # cv2.putText(image, "Current Object Counter: "+str(i),(int(20), int(80)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0),1)
        cv2.putText(image, "FPS: %f" % (fps), (int(20), int(40)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        cv2.namedWindow("YOLO_Deep_SORT", 0)
        cv2.resizeWindow('YOLO_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO_Deep_SORT', image)

        if writeVideo_flag:
            out.write(image)
        fps = (fps + (1. / (time.time() - prev_time))) / 2
        #print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        # fps = int(1./(time.time() - prev_time))
        print("FPS: {}".format(fps))
        index = index + 1

    if writeVideo_flag:
        for row in results:
            print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
                  (row[0], row[1], row[2], row[3], row[4], row[5]),
                  file=list_file)
コード例 #11
0
class Client:
    """
    The camera client component of the experiment.
    Responsible for:
    1. run object detection (YoloV4) and tracking (Deep SORT) and send results;
    2. read the images from dataset and convert to h264 video
    3. convert the video to request bitrate and send to the server

    """
    def __init__(self, id, batch_size = 15, dataset_dir='../others/dds/dataset/WildTrack/src/C'):
        # id: string
        self.id = id
        # video batch size integer
        self.batch_size = batch_size

        # displacement
        self.displacement_check = {}

        # Deep SORT encoding. setting is the same for now
        self.max_cosine_distance = 0.3
        self.nn_budget = None
        self.nms_max_overlap = 1.0

        self.temp_dir = 'temp-cropped'
        os.makedirs(self.temp_dir, exist_ok = True)



        self.dataset_dir = dataset_dir + id

        # read the total number of file from the server
        fnames = sorted(os.listdir(dataset_dir + id))
        self.total_frame = len(fnames)
        print("Total number of frames: ", str(self.total_frame))
        print("Simulating the camera with video frame size 15")
        # initiate the yolo v4 network
        network, class_names, class_colors = darknet.load_network(
            './darknet/cfg/yolov4.cfg',
            './darknet/cfg/coco.data',
            './darknet/yolov4.weights',
            batch_size=1
        )

        self.network = network
        self.class_names = class_names
        # initiate the deep sort network
        # multi-person tracking
        model_filename = 'model_data/mars-small128.pb'
        self.encoder = gdet.create_box_encoder(model_filename, batch_size=1)

        self.metric = nn_matching.NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget)
        self.tracker = Tracker(self.metric)

        print("Camera initiated")


    def first_phase(self, start_id):
        # read the images batch and run detections
        end_id = min(self.total_frame, int(start_id) + self.batch_size)
        print(end_id)
        total_obj = 0
        unique_obj_bbox = {}
        displacement_check = self.displacement_check

        for i in range(int(start_id), end_id):
            # print(self.dataset_dir + "/" + f"{str(i).zfill(10)}.png")
            image = cv2.imread(self.dataset_dir + "/" + f"{str(i).zfill(10)}.png")
            darknet_image = darknet.make_image(1920, 1080, 3)

            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            darknet.copy_image_from_bytes(darknet_image, image_rgb.tobytes())

            # detections list of tuple: (class_name, confidence_score, (bbox_info))
            detections = darknet.detect_image(self.network, self.class_names, darknet_image, thresh=0.4)

            total_obj = total_obj + len(detections)
            bboxes = [obj[2] for obj in detections]
            confidence = [obj[1] for obj in detections]
            classes = [obj[0] for obj in detections]

            features = self.encoder(image_rgb, bboxes)

            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                          zip(bboxes, confidence, classes, features)]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            self.tracker.predict()
            self.tracker.update(detections)

            for track in self.tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                if track.track_id not in unique_obj_bbox:
                    # unique_obj_bbox.append(track.track_id)
                    unique_obj_bbox[track.track_id] = {'feature': track.features[0].tolist()}
                    unique_obj_bbox[track.track_id]['length'] = 0

                # find the center point of the tracking object
                center_point = track.to_tlwh()
                c_x = center_point[0] + (center_point[2]) / 2

                c_y = center_point[1] + (center_point[3]) / 2

                if track.track_id not in displacement_check:
                    displacement_check[track.track_id] = (c_x, c_y)
                else:
                    disp = math.sqrt((c_x - displacement_check[track.track_id][0]) ** 2 + (c_y - displacement_check[track.track_id][1]) ** 2)
                    # print(unique_obj_bbox[track.track_id])
                    # print('disp for cam: ', str(track.track_id), " ", str(disp))
                    unique_obj_bbox[track.track_id]['length'] = unique_obj_bbox[track.track_id]['length'] + disp

                    # update the center point for next iteration
                    displacement_check[track.track_id] = (c_x, c_y)
                # print(displacement_check)
                # print(unique_obj_bbox[track.track_id]['length'])

        self.displacement_check = displacement_check
        # print(displacement_check)
        return {'total_obj': total_obj, 'unique_obj_bbox': unique_obj_bbox}


    def second_phase(self, bitrate, start_id):
        encoded_vid_path = os.path.join(self.temp_dir, "temp.mp4")
        if not bitrate:
            encoding_result = subprocess.run(["ffmpeg", "-y",
                                              "-loglevel", "error",
                                              "-start_number", str(start_id),
                                              '-i', f"{self.dataset_dir}/%010d.png",
                                              "-vcodec", "libx264", "-g", "15",
                                              "-keyint_min", "15",
                                              "-pix_fmt", "yuv420p",
                                              "-frames:v",
                                              str(self.batch_size),
                                              encoded_vid_path],
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE,
                                             universal_newlines=True)
        else:
            rate=str(bitrate)+"k"
            encoding_result = subprocess.run(["ffmpeg", "-y",
                                              "-loglevel", "error",
                                              "-start_number", str(start_id),
                                              '-i', f"{self.dataset_dir}/%010d.png",
                                              "-vcodec", "libx264",
                                              "-g", "15",
                                              "-keyint_min", "15",
                                              "-maxrate", f"{rate}",
                                              "-b", f"{rate}",
                                              "-bufsize", f"{rate}",
                                              "-pix_fmt", "yuv420p",
                                              "-frames:v",
                                              str(self.batch_size),
                                              encoded_vid_path],
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE,
                                             universal_newlines=True)

        return "OK"
コード例 #12
0
def run(metadata_fpath, detections_dir, out_fpath, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """
    metadata = pd.read_csv(metadata_fpath)
    fnames = metadata['filename']
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    results = []

    for fname in fnames:
        frame_idx = int(fname.strip('.png').split('_')[-1])
        det_fpath = Path(detections_dir, fname + '.deepsort.npy')
        det = np.load(det_fpath)

        # Load image and generate detections.
        detections = create_detections(det, min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlwh()
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]
            ])

    # Store results.
    f = open(out_fpath, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5]),
              file=f)
コード例 #13
0
def detect_and_track(file_path, save_path, detection_mode="YOLO3"):
	# Definition of the parameters
	max_cosine_distance = 0.3
	nn_budget = None
	nms_max_overlap = 1.0
	# 如果要保存视频,定义视频size
	size = (640, 480)
	save_fps = 24

	# use deep_sort tracker
	model_filename = '../deep_sort/model_data/resources/networks/mars-small128.pb'
	encoder = gdet.create_box_encoder(model_filename, batch_size=1)
	metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
	tracker = Tracker(metric)

	write_video_flag = True
	# 假设图中最多300个目标,生成300种随机颜色
	colours = np.random.rand(300, 3) * 255
	video_capture = cv2.VideoCapture(file_path)

	if write_video_flag:
		output_video = cv2.VideoWriter(save_path + 'output.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps,
		                               size)
		object_list_file = open(save_path + 'detection.txt', 'w')
		frame_index = -1

	if detection_mode == "YOLO3":
		yolo = YOLO()
	elif detection_mode == "SSD":
		ssd = SSD()

	# appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复)
	appear = {}
	number = 0

	while True:
		ret, frame = video_capture.read()
		if ret is not True:
			break
		frame = cv2.resize(frame, size)
		# 记录每一帧开始处理的时间
		start_time = time.time()
		if detection_mode == "YOLO3":
			image = Image.fromarray(frame[..., ::-1])
			# boxes为[x,y,w,h]形式坐标,detect_scores为目标分数,origin_boxes为左上角+右下角坐标形式
			boxes, detect_scores, origin_boxes = yolo.detect_image(image)
		elif detection_mode == "SSD":
			rclasses, rscores, rbboxes = ssd.process_image(frame)
			height, width = frame.shape[0], frame.shape[1]
			boxes = []
			# 遍历一帧图片中每个目标的(对应classes)
			for i in range(rclasses.shape[0]):
				# rbboxes原始形式为0-1范围的左上角和右下角坐标
				xmin, ymin = int(rbboxes[i, 1] * width), int(rbboxes[i, 0] * height)
				xmax, ymax = int(rbboxes[i, 3] * width), int(rbboxes[i, 2] * height)
				# 转换为x,y,w,h形式的坐标
				x, y, w, h = int(xmin), int(ymin), int(xmax - xmin), int(ymax - ymin)
				if x < 0:
					w = w + x
					x = 0
				if y < 0:
					h = h + y
					y = 0
				boxes.append([x, y, w, h])
			boxes = np.array(boxes)

		features = encoder(frame, boxes)
		# score to 1.0 here
		detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)]
		# 非极大值抑制
		boxes = np.array([d.tlwh for d in detections])
		scores = np.array([d.confidence for d in detections])
		indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
		detections = [detections[i] for i in indices]
		# 追踪器预测和更新
		tracker.predict()
		tracker.update(detections)

		for track in tracker.tracks:
			if not track.is_confirmed() or track.time_since_update > 1:
				continue
			bbox = track.to_tlbr()
			color = (int(colours[track.track_id % 300, 0]), int(colours[track.track_id % 300, 1]),
			         int(colours[track.track_id % 300, 2]))
			# (int(bbox[0]), int(bbox[1]))为左上角坐标,(int(bbox[2]), int(bbox[3]))为右下角坐标
			cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
			cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, color, 2)
			if track.track_id in appear.keys():
				appear[track.track_id] += 1
			else:
				number += 1
				appear[track.track_id] = 1

		show_fps = 1. / (time.time() - start_time)
		cv2.putText(frame, text="FPS: " + str(int(show_fps)), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
		            fontScale=0.50, color=(0, 255, 0), thickness=2)
		cv2.putText(frame, text="number: " + str(number), org=(3, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
		            fontScale=0.50, color=(0, 255, 0), thickness=2)
		cv2.imshow('result', frame)

		if write_video_flag:
			# 保存视频每一帧
			output_video.write(frame)
			# 更新视频帧编号
			frame_index = frame_index + 1
			# detection.txt写入下一帧的编号
			object_list_file.write(str(frame_index) + ' ')
			# 写入每一帧探测到的目标的框四个点坐标
			if len(boxes) != 0:
				for i in range(0, len(boxes)):
					object_list_file.write(
						str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(
							boxes[i][3]) + ' ')
			object_list_file.write('\n')

		# 按q可退出
		if cv2.waitKey(1) & 0xFF == ord('q'):
			break

	video_capture.release()
	if write_video_flag:
		output_video.release()
		object_list_file.close()
	cv2.destroyAllWindows()
コード例 #14
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 2.0
    nn_budget = None
    nms_max_overlap = 3.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True  # show object box blue when detect
    writeVideo_flag = True  # record video ouput

    defaultSkipFrames = 10  # skipped frames between detections

    # set up collection of door
    H1 = 245
    W1 = 370
    H2 = 280
    W2 = 480
    H = None
    W = None

    R = 80  # min R is 56

    def solve_quadratic_equation(a, b, c):
        """ax2 + bx + c = 0"""
        delta = b**2 - 4 * a * c
        if delta < 0:
            print("Phương trình vô nghiệm!")
        elif delta == 0:
            return -b / (2 * a)
        else:
            print("Phương trình có 2 nghiệm phân biệt!")
            if float((-b - sqrt(delta)) / (2 * a)) > float(
                (-b + sqrt(delta)) / (2 * a)):
                return float((-b - sqrt(delta)) / (2 * a))
            else:
                return float((-b + sqrt(delta)) / (2 * a))

    def setup_door(H1, W1, H2, W2, R):
        # bước 1 tìm trung điểm của W1, H1 W2, H2
        I1 = (W1 + W2) / 2
        I2 = (H1 + H2) / 2

        # tìm vecto AB
        u1 = W2 - W1
        u2 = H2 - H1

        # AB chính là vecto pháp tuyến của d
        # ta có phương trình trung tuyến của AB
        # y = -(u1 / u2)* x - c/u2
        c = -u1 * I1 - u2 * I2  # tìm c

        # bước 2 tìm tâm O của đường tròn
        al = c / u2 + I2
        # tính D: khoảng cách I và O
        fi = acos(sqrt((I1 - W1)**2 + (I2 - H1)**2) / R)
        D = sqrt((I1 - W1)**2 + (I2 - H1)**2) * tan(fi)

        O1 = solve_quadratic_equation((1 + u1**2 / u2**2),
                                      2 * (-I1 + u1 / u2 * al),
                                      al**2 - D**2 + I1**2)
        O2 = -u1 / u2 * O1 - c / u2
        # phương trình 2 nghiệm chỉ chọn nghiệm phía trên

        # Bước 3 tìm các điểm trên đường tròn
        door_dict = dict()
        for w in range(W1, W2):
            h = O2 + sqrt(R**2 - (w - O1)**2)
            door_dict[w] = round(h)
        return door_dict

    door_dict = setup_door(H1, W1, H2, W2, R)

    totalFrames = 0
    totalIn = 0

    # create a empty list of centroid to count traffic
    pts = [deque(maxlen=30) for _ in range(9999)]

    file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4'
    video_capture = cv2.VideoCapture(file_path)

    fps_imutils = imutils.video.FPS().start()

    if writeVideo_flag:
        fourcc = cv2.VideoWriter_fourcc(*'MP4V')
        out = cv2.VideoWriter('output_yolov4.mp4', fourcc, 3, (736, 480))

    while True:
        oke, frame = video_capture.read()  # frame shape 640*480*3
        if not oke:
            break

        frame = cv2.resize(frame, (736, 480))
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # if the frame dimensions are empty, set them
        if W is None or H is None:
            (H, W) = frame.shape[:2]

        # calculate video time
        videotime = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000

        # Draw a door line
        for w in range(W1, W2):
            cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1)
        cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1)
        cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1)

        if totalFrames % defaultSkipFrames == 0:
            t2 = time.time()
            boxes, confidence, classes = yolo.detect_image(
                image)  # average time: 1.2s
            print(time.time() - t2)

            features = encoder(frame, boxes)
            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.cls for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    det_cls = det.cls
                    score = "%.2f" % (det.confidence * 100) + "%"
                    cv2.putText(frame,
                                str(det_cls) + " " + score,
                                (int(bbox[0]), int(bbox[3]) - 10), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1)

            for track in tracker.tracks:
                if not track.is_confirmed():
                    continue
                bbox = track.to_tlbr()

                if not_count_staff(frame, int(bbox[0]), int(bbox[1]),
                                   int(bbox[2]), int(bbox[3])):
                    # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (0, 255, 255),
                                  2)
                    cv2.putText(frame, "STAFF",
                                (int(bbox[0]), int(bbox[1]) - 10), 0,
                                1e-3 * frame.shape[0], (0, 0, 255), 1)
                    continue
                else:
                    # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame, "ID: " + str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)

                x = [c[0] for c in pts[track.track_id]]
                y = [c[1] for c in pts[track.track_id]]

                centroid_x = int(((bbox[0]) + (bbox[2])) / 2)
                centroid_y = int(((bbox[1]) + (bbox[3])) / 2)

                if not track.Counted and centroid_x in range(W1, W2):
                    if centroid_y < np.mean(y) and door_dict[
                            centroid_x] > centroid_y and np.max(x) - np.min(
                                x) > 20:
                        totalIn += 1
                        track.Counted = True
                        print(track.track_id, track.Counted)

                cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1)
                pts[track.track_id].append((centroid_x, centroid_y))

            info = [("Time", "{:.4f}".format(videotime)), ("In", totalIn)]

            # loop over the info tuples and draw them on our frame
            for (i, (k, v)) in enumerate(info):
                text = "{}: {}".format(k, v)
                cv2.putText(frame, text, (W - 150, ((i * 20) + 20)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
            if writeVideo_flag:
                # save a frame
                out.write(frame)

            if show_detections:
                cv2.imshow('People counter', frame)
                # Press Q to stop!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        else:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

        fps_imutils.update()

        totalFrames += 1

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if writeVideo_flag:
        out.release()

    video_capture.release()

    cv2.destroyAllWindows()
コード例 #15
0
class YoloV4DeepsortServer(BaseDetectionServer):
    def __init__(self, weightPath="/yolov4_sb_best.weights", configPath="/yolov4_sb.cfg", metaPath="/voc_sb.data", deepsort_modelPath="/mars_sb_14.pb", image_size=640, nms_conf_thresh=0.4, nms_iou_thresh=0.5,max_cosine_distance = 0.6, nn_budget = 50, nms_max_overlap = 1.0):
        try:
            import darknet            
            from deep_sort.tracker import Tracker
            from deep_sort import nn_matching
            from tools import generate_detections as gdet
            from deep_sort import preprocessing
            from deep_sort.detection import Detection as deep_detection
            import os
        except ImportError:
            raise
        self.image_size = image_size
        project_dir = os.path.dirname(__file__)
        configPath = os.path.join(project_dir, configPath)
        weightPath = os.path.join(project_dir, weightPath)
        metaPath = os.path.join(project_dir, metaPath)
        deepsort_modelPath = os.path.join(project_dir, deepsort_modelPath)
        self.network, self.class_names, self.class_colors = darknet.load_network(configPath, metaPath, weightPath, batch_size=1)
        self.nms_max_overlap = nms_max_overlap
       
        #DarknetImage
        self.darknet = darknet
        ww = 640
        hh = 480
        self.darknet_image = darknet.make_image(ww, hh, 3)
        
        # deep_sort
        self.preprocessing = preprocessing
        self.deep_detection = deep_detection
        self.encoder = gdet.create_box_encoder(deepsort_modelPath, batch_size=1)
        metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric)     
        BaseDetectionServer.__init__(self)

    @staticmethod
    def citation_notice():
        return "YoloV4 Inference and Feature Extractor by Ya Xiong(Bill)\n" \
               "Maintained by Robert Belshaw ([email protected])"

    @function_timer.interval_logger(interval=10)
    def get_detector_results(self, request):
        """
        Args:
            request (GetDetectorResultsRequest):
        Returns:
            GetDetectorResultsResponse
        """
        try:
            import cv2
        except ImportError:
            raise

        if self.currently_busy.is_set():
            return GetDetectorResultsResponse(status=ServiceStatus(BUSY=True))
        self.currently_busy.set()
        detections_msg = Detections()
        try:
            frame = ros_numpy.numpify(request.image)
            original_shape = frame.shape
            frame = cv2.resize(frame, (self.image_size, int(self.image_size*0.75)))
            self.darknet.copy_image_from_bytes(self.darknet_image, frame.tobytes())   
            detections_yolo = self.darknet.detect_image(self.network, self.class_names, self.darknet_image, thresh=0.7)
            boxs = []
            confidences = []
            class_name= []
            for detection in detections_yolo:   
                if detection[0] != "ripe":  # only track ripe berry whose id is 0    
                    x1, y1, x2, y2 = self._convertBack(detection[2][0], \
                    detection[2][1], \
                    detection[2][2], \
                    detection[2][3])
                    x1 = x1*original_shape[1]/frame.shape[1]
                    x2 = x2*original_shape[1]/frame.shape[1]
                    y1 = y1*original_shape[0]/frame.shape[0]
                    y2 = y2*original_shape[0]/frame.shape[0]
                    x1 = max(min(original_shape[1]-1, x1), 1)
                    x2 = max(min(original_shape[1]-1, x2), 1)
                    y1 = max(min(original_shape[0]-1, y1), 1)
                    y2 = max(min(original_shape[0]-1, y2), 1)
                    detections_msg.objects.append(Detection(roi=RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2), seg_roi=SegmentOfInterest(x=[], y=[]), id=self._new_id(), track_id=-1, confidence=float(detection[1])/100, class_name="unripe"))                        
                    continue
                confidences.append(float(detection[1])/100)
                class_name.append(detection[0])
                bounds = detection[2]   
                xCoord = int(bounds[0] - bounds[2] / 2)
                yCoord = int(bounds[1] - bounds[3] / 2)
                boxs.append([xCoord, yCoord, int(bounds[2]), int(bounds[3])])
            features = self.encoder(frame, boxs)
            detections = [self.deep_detection(bbox, confidence, feature) for bbox, confidence, feature in
                        zip(boxs, confidences, features)]
            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = self.preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            # Call the tracker
            self.tracker.predict()
            self.tracker.update(detections)
            for track in self.tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    track_id = 0
                    continue
                track_id = (int(track.track_id))
                bbox = track.to_tlbr()
                x1, y1, x2, y2 = bbox[0],bbox[1],bbox[2],bbox[3]   
                x1 = x1*original_shape[1]/frame.shape[1] 
                x2 = x2*original_shape[1]/frame.shape[1]  
                y1 = y1*original_shape[0]/frame.shape[0]  
                y2 = y2*original_shape[0]/frame.shape[0]
                x1 = max(min(original_shape[1]-1, x1), 1)
                x2 = max(min(original_shape[1]-1, x2), 1)
                y1 = max(min(original_shape[0]-1, y1), 1)
                y2 = max(min(original_shape[0]-1, y2), 1)                     
                roi = (RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2))
                detections_msg.objects.append(Detection(roi=roi, seg_roi=SegmentOfInterest(x=[], y=[]), id=self._new_id(), track_id=track_id,confidence=0.99, class_name="Ripe Strawberry"))
            self.currently_busy.clear()
        except Exception as e:
            print("FruitCastServer error: ", e)
            return GetDetectorResultsResponse(status=ServiceStatus(ERROR=True), results=detections_msg)
        return GetDetectorResultsResponse(status=ServiceStatus(OKAY=True), results=detections_msg)

    def _convertBack(self, x, y, w, h):
        xmin = int(round(x - (w / 2)))
        xmax = int(round(x + (w / 2)))
        ymin = int(round(y - (h / 2)))
        ymax = int(round(y + (h / 2)))
        return xmin, ymin, xmax, ymax
コード例 #16
0
ファイル: developer_kit.py プロジェクト: ZXHSunSalt/my_detect
def detect_video_with_deepsort(yolo,
                               video_path,
                               rot_number,
                               output_path="",
                               deepsort_model_filename=None):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # 读取视频
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")

    # 保存录像的代码 ,保存和原视频流一直
    video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps = vid.get(cv2.CAP_PROP_FPS)
    video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC),
              type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)

    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()

    # deep_sort 加载
    encoder = gdet.create_box_encoder(deepsort_model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    frame_index = 0
    while True:
        return_value, frame_bgr = vid.read()
        frame_index += 1
        if frame_bgr is None:
            break
        #目标检测使用frame_rbg格式,因为训练时是用rfg图片训练的,deepsort使用bgr格式图片,因为原始代码是这样
        # 1、opencv是以brg方式打开的,所以要转换成rbg才能识别
        frame_rbg = cv2.cvtColor(frame_bgr.copy(), cv2.COLOR_BGR2RGB)

        # 2、图片旋转
        frame_rbg = np.rot90(frame_rbg, rot_number)

        # 3、yolo检测,输出的是tlbr
        frame_rbg_Image = Image.fromarray(frame_rbg)
        out_boxes_tlbr, out_scores, out_classes = yolo.get_detect_boxes(
            frame_rbg_Image)

        #4、将目标检测输出的tlbr框转成tlwh框
        out_boxes_tlwh = []
        out_boxes_tlbr_1 = copy.deepcopy(out_boxes_tlbr)  #如果列表中有列表,只能使用深度复制列表
        if len(out_boxes_tlbr_1) != 0:
            for bbox in out_boxes_tlbr_1:
                bbox[2:] -= bbox[:2]
                out_boxes_tlwh.append(bbox)
                # print('out_boxes:',out_boxes[i])
        ###################################################

        features = encoder(frame_bgr, out_boxes_tlwh)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(out_boxes_tlwh, features)
        ]
        # 5、Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        #6 、deepsort跟踪
        tracker.predict()
        tracker.update(detections)
        # index +=1

        # track while 事实上这样做没有什么用
        # if index >=20 and len(out_boxes_tlwh) != 0:
        #         # if len(out_boxes_tlwh) != 0:
        #         #     track_while(encoder, tracker, vid, nms_max_overlap, out_boxes_tlwh,detections)
        #             # index = 0

        #7 、deepsort跟踪画框
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # 4、将tlwh转成tlbr
            bbox = track.to_tlbr()
            #取出跟踪轨迹
            track.update_trajectory()
            trajectorys = track.trajectory
            #画点显示
            # for trajectory in trajectorys:
            # cv2.circle(frame_bgr, trajectory, 1, (0, 0, 213), -1)
            # 画线显示
            for i in range(0, len(trajectorys), 2):
                try:
                    start_point = trajectorys[i]
                    end_point = trajectorys[i + 1]
                except Exception:  #如果最后一位溢出
                    end_point = start_point

                cv2.line(frame_bgr, start_point, end_point, (0, 255, 255),
                         2)  # 绿色,3个像素宽度
            # 画框框和文字
            cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
            cv2.putText(frame_bgr, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        #8、目标检测画框
        detections = out_boxes_tlbr
        for bbox in detections:
            cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)

        #计算一帧时间
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time  # accum_time是总时间之和
        curr_fps = curr_fps + 1

        # 显示速度信息
        if accum_time > 1:  #如果累计够一秒,则更新fps数量
            accum_time = accum_time - 1
            curr_fps = curr_fps + 2
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(frame_bgr,
                    text=fps,
                    org=(3, 15),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(0, 255, 0),
                    thickness=2)
        #保存录像
        if isOutput:
            out.write(frame_bgr)
        #显示图像
        cv2.imshow('', frame_bgr)
        cv2.waitKey(1)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    yolo.close_session()
コード例 #17
0
def main(yolo):

    start = time.time()
    #Definition of the parameters
    max_cosine_distance = 0.5  #余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3  #非极大抑制的阈值

    counter = []
    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    #video_path = "./output/output.avi"
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(
            './output/' + args["input"][43:57] + "_" + args["class"] +
            '_output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs, class_names = yolo.detect_image(image)
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            #boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                        (color), 2)
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                            (color), 2)

            i += 1
            #bbox_center_point(x,y)
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            #track_id[center]
            pts[track.track_id].append(center)
            thickness = 5
            #center point
            cv2.circle(frame, (center), 1, color, thickness)

            #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (color), thickness)
                #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2)

        count = len(set(counter))
        cv2.putText(frame, "Total Object Counter: " + str(count),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(i),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)
        cv2.namedWindow("YOLO3_Deep_SORT", 0)
        cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO3_Deep_SORT', frame)

        if writeVideo_flag:
            #save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = (fps + (1. / (time.time() - t1))) / 2
        #print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")

    video_capture.release()

    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #18
0
    def system(self,
               video_path,
               output_path,
               input_size=320,
               show=False,
               CLASSES='tiny_yolo/data/coco.names',
               score_threshold=0.3,
               iou_threshold=0.45,
               rectangle_colors='',
               Track_only=[],
               display_tm=False,
               realTime=True):
        #arducam_utils = ArducamUtils(0)

        # Definition of the  deep sort parameters
        max_cosine_distance = 0.7
        nn_budget = None

        #initialize deep sort object
        model_filename = 'model_data/mars-small128.pb'  # deep sort tensorflow pretrained model
        encoder = gdet.create_box_encoder(model_filename, batch_size=1)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)

        times, times_2 = [], []  #parameters for finding fps

        if video_path:
            vid = cv2.VideoCapture(video_path)  # detect on video
        else:
            print("\n\n\nSelected device 0")
            vid = cv2.VideoCapture(0, cv2.CAP_V4L2)  # detect from webcam
            #vid.set(cv2.CAP_PROP_CONVERT_RGB, arducam_utils.convert2rgb)
            vid.set(cv2.CAP_PROP_FPS, 2)

        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        #fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*'MPEG')  # defining video writer
        out = cv2.VideoWriter(output_path, codec, 30,
                              (width, height))  # output_path must be .avi

        NUM_CLASS = self.read_class_names(
            CLASSES)  # reading coco classes in the form of key value
        num_classes = len(NUM_CLASS)
        key_list = list(NUM_CLASS.keys())
        val_list = list(NUM_CLASS.values())

        # calculating parameters for img processing fucntion
        loop_check, original_frame = vid.read()
        if not loop_check:
            print("\n\nCouldn't read the video")
            return False
        # colors for detection
        hsv_tuples = [(1.0 * x / num_classes, 1., 1.)
                      for x in range(num_classes)]
        detection_colors = list(
            map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        detection_colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                detection_colors))
        # random.seed(0)
        random.shuffle(detection_colors)  # to shuffle shades of same color
        # random.seed(None)
        newTime = 0
        prevTime = 0
        dummy_time = 1
        t3 = 0
        playsound('system_ready.wav')
        # loop for video
        while True:
            loop_check, original_frame = vid.read(
            )  # loop_check is bool value for reading correctly or not
            # cv2.imshow("org",original_frame)
            if not loop_check:
                return True
            prevTime = newTime
            newTime = time.time()
            t1 = time.time()
            bboxes = self.Yolo.predict(original_frame)
            t2 = time.time()
            # extract bboxes to boxes (x, y, width, height), scores and names
            boxes, scores, names = [], [], []
            #tracking
            for bbox in bboxes:  #loop to sperate the bounding boxes in the frames
                if len(Track_only) != 0 and NUM_CLASS[int(
                        bbox[5])] in Track_only or len(Track_only) == 0:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    scoreVal = bbox[4]
                    class_id = int(bbox[5])
                    boxes.append([x1, y1, x2, y2])
                    scores.append(scoreVal)
                    label = NUM_CLASS[class_id]
                    names.append(label)
                    #self.image = cv2.rectangle(original_frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Obtain all the detections for the given frame.
            boxes = np.array(boxes)
            names = np.array(names)
            scores = np.array(scores)
            features = np.array(encoder(original_frame, boxes))
            # create deep sort object for detection
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    boxes, scores, names, features)
            ]

            if realTime:
                tracked_bboxes = time_to_contact(original_frame,
                                                 tracker.matchedBoxes,
                                                 newTime,
                                                 prevTime,
                                                 key_list,
                                                 val_list,
                                                 display_tm=display_tm)
            else:
                tracked_bboxes = time_to_contact(original_frame,
                                                 tracker.matchedBoxes,
                                                 dummy_time,
                                                 dummy_time - 0.01666666666,
                                                 key_list,
                                                 val_list,
                                                 display_tm=display_tm)

            # Pass detections to the deepsort object and obtain the track information.
            tracker.predict()
            tracker.update(detections)

            # draw detection on frame
            self.image = self.draw_bbox(original_frame,
                                        tracked_bboxes,
                                        detection_colors,
                                        NUM_CLASS,
                                        tracking=True)

            # calculating fps
            t3 = time.time()
            times.append(t2 - t1)
            times_2.append(t3 - t1)

            times = times[-20:]
            times_2 = times_2[-20:]

            ms = sum(times) / len(times) * 1000
            fps = 1000 / ms
            fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

            print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".
                  format(ms, fps, fps2))
            if output_path != '': out.write(self.image)
            if False:
                cv2.imshow('Tracked', self.image)

                if cv2.waitKey(25) & 0xFF == ord("q"):
                    cv2.destroyAllWindows()
                    break

        cv2.destroyAllWindows()
コード例 #19
0
def main(yolo):

    start = time.time()
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.3

    counter = []
    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    find_objects = ['person']
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('./output/output.avi', fourcc, 15, (w, h))
        list_file = open('detection_rslt.txt', 'w')
        frame_index = -1

    fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        classIDs = []
        #image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs, confidence, class_names = yolo.detect_image(image)
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        center2 = []
        co_info = []
        x_l = []
        y_l = []
        s_close_pair = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            #print(class_names)
            #print(class_names[p])

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            #boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
            #print(frame_index)
            list_file.write(str(frame_index) + ',')
            list_file.write(str(track.track_id) + ',')
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            b0 = str(bbox[0]
                     )  #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1]
            b1 = str(bbox[1]
                     )  #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1]
            b2 = str(bbox[2] - bbox[0]
                     )  #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1]
            b3 = str(bbox[3] - bbox[1])

            list_file.write(
                str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3))
            #print(str(track.track_id))
            list_file.write('\n')
            #list_file.write(str(track.track_id)+',')
            cv2.putText(frame, "ID:" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                        (color), 2)
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                            (color), 2)

            i += 1
            #bbox_center_point(x,y)
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            #track_id[center]
            pts[track.track_id].append(center)
            thickness = 5
            # draw distance line
            (w, h) = (bbox[2], bbox[3])
            center2.append(center)
            co_info.append([w, h, center2])
            #print(center2)

            #calculateDistance
            if len(center2) > 2:
                for i in range(len(center2)):
                    for j in range(len(center2)):
                        #g = isclose(co_info[i],co_info[j])
                        #D = dist.euclidean((center2[i]), (center2[j]))
                        x1 = center2[i][0]
                        y1 = center2[i][1]
                        x2 = center2[j][0]
                        y2 = center2[j][1]
                        dis = calculateDistance(x1, y1, x2, y2)

                        if dis < 200:
                            #print(dis)
                            cv2.line(frame, (center2[i]), (center2[j]),
                                     (0, 128, 255), 2)

                        if dis < 100:
                            #x_l.append(center2[i])
                            cv2.line(frame, (center2[i]), (center2[j]),
                                     (0, 0, 255), 5)
                            #cv2.putText(frame, "KEEP DISTANCE",(int(960), int(1060)),0, 5e-3 * 200, (0,0,255),2)

            else:
                pass

            #center point
            cv2.circle(frame, (center), 1, color, thickness)

            # draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                #cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness)

        count = len(set(counter))
        cv2.putText(frame, "Total Pedestrian Counter: " + str(count),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Pedestrian Counter: " + str(i),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps * 2), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)
        cv2.namedWindow("YOLO3_Deep_SORT", 0)
        cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO3_Deep_SORT', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps = (fps + (1. / (time.time() - t1))) / 2
        out.write(frame)
        frame_index = frame_index + 1

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")
    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #20
0
def main(_argv):

    cam1 = open('cam1.txt', 'w')
    cam2 = open('cam2.txt', 'w')
    cam3 = open('cam3.txt', 'w')
    cam4 = open('cam4.txt', 'w')

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    previous = []
    current = []
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        # print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes

        utils.save_files(cam1, cam2, cam3, cam4, count)

        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            current = []
            current.append(class_name + "-" + str(track.track_id))
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        if len(previous) > 0:
            for prev in previous:
                if prev not in current:
                    print(prev)
        previous = current

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
コード例 #21
0
def Object_tracking(YoloV3,
                    video_path,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times = []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break
        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = tf.expand_dims(image_data, 0)

        t1 = time.time()
        pred_bbox = YoloV3.predict(image_data)
        t2 = time.time()

        times.append(t2 - t1)
        times = times[-20:]

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_image, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        # draw detection on frame
        image = draw_bbox(original_image,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
コード例 #22
0
def main():
    global frame, frame_index, out, list_file, track, count

    start = time.time()

    # 参数定义
    max_cosine_distance = 0.5  # 0.9 余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3  # 非极大抑制的阈值
    # 是否保存识别结果
    write_video_flag = True

    counter = []

    # load our serialized model from disk
    # print("[INFO] loading model...")
    net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

    # deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture(args["input"])
    obj_count_txt_filename = 'counter.txt'
    count_file = open(obj_count_txt_filename, 'a')
    count_file.write('\n')

    if write_video_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        # DIVX, XVID, MJPG, X264, WMV1, WMV2.(XVID is more preferable.MJPG results in high size ideo.X264 gives ery small size video)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(
            os.path.join('video',
                         str(args['input'].split('.')[0][-7:]) + '_out.avi'),
            fourcc, 20, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1
    # 帧率计数
    fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        time1 = time.time()

        # frame = imutils.resize(frame, width=800)
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843,
                                     (300, 300), 127.5)
        # predictions 检测
        time2 = time.time()

        net.setInput(blob)
        detections = net.forward()

        # detections.shape
        # >>> (1, 1, n, 7)
        # eg:(1, 1, 2, 7)
        # [[[[0.          9.          0.42181703  0.4647404   0.610577
        #     0.6360997   0.8479532]
        #    [0.         15.          0.8989926   0.21603307  0.42735672
        #    0.58441484  0.8699994]]]]
        boxs = []
        class_names = []
        for i in np.arange(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            # greater than the minimum confidence
            if confidence > args["confidence"]:
                idx = int(detections[0, 0, i, 1])
                class_name = CLASSES[idx]

                # 筛选类别
                if class_name in NEED_CLASSES:
                    class_names.append(class_name)
                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                    # 转为整形坐标
                    (startX, startY, endX, endY) = box.astype("int")
                    startX = 0 if startX < 0 else startX
                    startY = 0 if startY < 0 else startY

                    boxs.append([startX, startY, endX - startX, endY - startY])

        print(boxs, class_names)
        time3 = time.time()
        print('detect cost is', time3 - time2)

        # 特征提取
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, class_name, 1.0, feature)
            for bbox, class_name, feature in zip(boxs, class_names, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        time4 = time.time()
        print('features extract is', time4 - time3)

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        time5 = time.time()
        print('update tracker cost:', time5 - time4)

        i = 0
        # 跟踪器id
        indexIDs = []

        for track in tracker.tracks:

            # todo and or
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(track.track_id)
            counter.append(track.track_id)
            bbox = track.to_tlbr()
            start_x, start_y, end_x, end_y = bbox.astype('int')
            color = COLORS[indexIDs[i] % len(COLORS)].tolist()

            if not track.flag and track.class_name == 'person':
                track.flag = handle_face_car('person', start_x, start_y, end_x,
                                             end_y)
            else:
                track.flag = handle_face_car(track.class_name, start_x,
                                             start_y, end_x, end_y,
                                             not track.flag)
            # 画目标跟踪框、id标注
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), color, 3)
            cv2.putText(frame, track.class_name + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 40)), 0, 0.75, color, 2)

            i += 1
            # 画运动轨迹 draw motion path
            center = int(((bbox[0]) + (bbox[2])) / 2), int(
                ((bbox[1]) + (bbox[3])) / 2)
            pts[track.track_id].append(center)
            thickness = 5
            cv2.circle(frame, center, 1, color, thickness)

            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / (j + 1.0)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), color, thickness)

        time6 = time.time()
        print('handle tracker cost:', time6 - time5)

        # 画目标检测白框
        # for det in detections:
        #     bbox = det.to_tlbr()
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

        count = len(set(counter))
        cv2.putText(frame, "Total Object Counter: " + str(count), (20, 120), 0,
                    0.75, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(i), (20, 80), 0,
                    0.75, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % fps, (20, 40), 0, 1.0, (0, 255, 0), 2)
        # time7 = time.time()
        # print('Draw Rectangle and Text cost:', time7 - time6)

        cv2.namedWindow("SSD_Deep_SORT", 0)
        cv2.resizeWindow('SSD_Deep_SORT', 1024, 768)
        cv2.imshow('SSD_Deep_SORT', frame)

        if write_video_flag:
            # save a frame
            out.write(frame)
            frame_index += 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = (fps + (1. / (time.time() - time1))) / 2
        # print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print("[Finish]")
    end = time.time()

    # if len(pts[track.track_id]):
    #     print(str(args["input"]) + ": " + str(count) + 'target Found')
    #     count_file.write(str("[VIDEO]: " + args["input"]) + " " + (
    #         str(count)) + " " + "[MODEL]: MobileNetSSD" + " " + "[TIME]:" + (str('%.2f' % (end - start))))
    # else:
    #     print("[No Found]")

    video_capture.release()
    count_file.write('\n')
    count_file.close()
    if write_video_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #23
0
def main(yolo):

    print('Using {} model'.format(yolo))
       
   # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 0.4
    
   # deep_sort 
    model_filename = 'model_data/models/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1) # use to get feature
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric, max_age=100)

    output_frames = []
    output_rectanger = []
    output_areas = []
    output_wh_ratio = []

    is_vis = True
    out_dir = 'videos/output/'
    print('The output folder is',out_dir)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    all_frames = []
    for video in args.videos:
        loadvideo = LoadVideo(video)
        video_capture, frame_rate, w, h = loadvideo.get_VideoLabels()
        while True:
            ret, frame = video_capture.read() 
            if ret != True:
                video_capture.release()
                break
            all_frames.append(frame)

    frame_nums = len(all_frames)
    tracking_path = out_dir+'tracking'+'.avi'
    combined_path = out_dir+'allVideos'+'.avi'
    if is_vis:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h))
        out2 = cv2.VideoWriter(combined_path, fourcc, frame_rate, (w, h))
        #Combine all videos
        for frame in all_frames:
            out2.write(frame)
        out2.release()
        
    #Initialize tracking file
    filename = out_dir+'/tracking.txt'
    open(filename, 'w')
    
    fps = 0.0
    frame_cnt = 0
    t1 = time.time()
    
    track_cnt = dict()
    images_by_id = dict()
    ids_per_frame = []
    for frame in all_frames:
        image = Image.fromarray(frame[...,::-1]) #bgr to rgb
        boxs = yolo.detect_image(image) # n * [topleft_x, topleft_y, w, h]
        features = encoder(frame,boxs) # n * 128
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # length = n
        text_scale, text_thickness, line_thickness = get_FrameLabels(frame)

        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.delete_overlap_box(boxes, nms_max_overlap, scores) #preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices] # length = len(indices)

        # Call the tracker 
        tracker.predict()
        tracker.update(detections)
        tmp_ids = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            
            bbox = track.to_tlbr()
            area = (int(bbox[2]) - int(bbox[0])) * (int(bbox[3]) - int(bbox[1]))
            if bbox[0] >= 0 and bbox[1] >= 0 and bbox[3] < h and bbox[2] < w:
                tmp_ids.append(track.track_id)
                if track.track_id not in track_cnt:
                    track_cnt[track.track_id] = [[frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]]
                    images_by_id[track.track_id] = [frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]]
                else:
                    track_cnt[track.track_id].append([frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area])
                    images_by_id[track.track_id].append(frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])])
            cv2_addBox(track.track_id,frame,int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),line_thickness,text_thickness,text_scale)
            write_results(filename,'mot',frame_cnt+1,str(track.track_id),int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),w,h)
        ids_per_frame.append(set(tmp_ids))

        # save a frame               
        if is_vis:
            out.write(frame)
        t2 = time.time()
        
        frame_cnt += 1
        print(frame_cnt, '/', frame_nums)

    if is_vis:
        out.release()
    print('Tracking finished in {} seconds'.format(int(time.time() - t1)))
    print('Tracked video : {}'.format(tracking_path))
    print('Combined video : {}'.format(combined_path))

    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    reid = REID()
    threshold = 320
    exist_ids = set()
    final_fuse_id = dict()

    print('Total IDs = ',len(images_by_id))
    feats = dict()
    for i in images_by_id:
        print('ID number {} -> Number of frames {}'.format(i, len(images_by_id[i])))
        feats[i] = reid._features(images_by_id[i]) #reid._features(images_by_id[i][:min(len(images_by_id[i]),100)])
    
    ids_per_frame2 = copy.deepcopy(ids_per_frame)
    
    for f in ids_per_frame:
        if f:
            if len(exist_ids) == 0:
                for i in f:
                    final_fuse_id[i] = [i]
                exist_ids = exist_ids or f
            else:
                new_ids = f-exist_ids
                for nid in new_ids:
                    dis = []
                    if len(images_by_id[nid])<10:
                        exist_ids.add(nid)
                        continue
                    unpickable = []
                    for i in f:
                        for key,item in final_fuse_id.items():
                            if i in item:
                                unpickable += final_fuse_id[key]
                    print('exist_ids {} unpickable {}'.format(exist_ids,unpickable))
                    for oid in (exist_ids-set(unpickable))&set(final_fuse_id.keys()):
                        tmp = np.mean(reid.compute_distance(feats[nid],feats[oid]))
                        print('nid {}, oid {}, tmp {}'.format(nid, oid, tmp))
                        dis.append([oid, tmp])
                    exist_ids.add(nid)
                    if not dis:
                        final_fuse_id[nid] = [nid]
                        continue
                    dis.sort(key=operator.itemgetter(1))
                    if dis[0][1] < threshold:
                        combined_id = dis[0][0]
                        images_by_id[combined_id] += images_by_id[nid]
                        final_fuse_id[combined_id].append(nid)
                    else:
                        final_fuse_id[nid] = [nid]
    print('Final ids and their sub-ids:',final_fuse_id)
    print('MOT took {} seconds'.format(int(time.time() - t1)))
    t2 = time.time()

    # To generate MOT for each person, declare 'is_vis' to True
    is_vis=False
    if is_vis:
        print('Writing videos for each ID...')
        output_dir = 'videos/output/tracklets/'
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        loadvideo = LoadVideo(combined_path)
        video_capture,frame_rate, w, h = loadvideo.get_VideoLabels()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        for idx in final_fuse_id:
            tracking_path = os.path.join(output_dir, str(idx)+'.avi')
            out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h))
            for i in final_fuse_id[idx]:
                for f in track_cnt[i]:
                    video_capture.set(cv2.CAP_PROP_POS_FRAMES, f[0])
                    _, frame = video_capture.read()
                    text_scale, text_thickness, line_thickness = get_FrameLabels(frame)
                    cv2_addBox(idx, frame, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale)
                    out.write(frame)
            out.release()
        video_capture.release()

    # Generate a single video with complete MOT/ReID              
    if args.all:
        loadvideo = LoadVideo(combined_path)
        video_capture, frame_rate, w, h = loadvideo.get_VideoLabels()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        complete_path = out_dir+'/Complete'+'.avi'
        out = cv2.VideoWriter(complete_path, fourcc, frame_rate, (w, h))
        
        for frame in range(len(all_frames)):
            frame2 = all_frames[frame]
            video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame)
            _, frame2 = video_capture.read()
            for idx in final_fuse_id:
                for i in final_fuse_id[idx]:
                    for f in track_cnt[i]:
                        #print('frame {} f0 {}'.format(frame,f[0]))
                        if frame == f[0]:
                            text_scale, text_thickness, line_thickness = get_FrameLabels(frame2)
                            cv2_addBox(idx, frame2, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale)
            out.write(frame2)
        out.release()
        video_capture.release()

    os.remove(combined_path)
    print('\nWriting videos took {} seconds'.format(int(time.time() - t2)))
    print('Final video at {}'.format(complete_path))
    print('Total: {} seconds'.format(int(time.time() - t1)))
コード例 #24
0
def main():
    start = time.time()
    counter = []
    writeVideo_flag = False
    fps = 0.0
    filename_path = os.path.join(result_path, 'submission.txt')
    list_video, list_ids = load_list_video(list_video_path, id_path)
    result_file = open(filename_path, 'w')

    max_cosine_distance=0.8
    nn_budget = 100
    nms_max_overlap = 1.0
    display = True
    for video in list_video: 
        path = os.path.join(video_path, video)
        ROI = load_roi(zones_path, video)
        vis=visualization.Visualization(img_shape=(960,1280,3), update_ms=2000)

        metric = nn_matching.NearestNeighborDistanceMetric(
        "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)
        results = []
        print("Processing video: ", video )
        video_capture = cv2.VideoCapture(path)

        pause_display = False
        frame_num = 0
        while True:
            
            start = time.time()
            # print(count)
            video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
            ret, frame = video_capture.read()  # frame shape 640*480*3
            # gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
            # frame = np.zeros_like(frame1)
            # frame[:,:,0] = gray
            # frame[:,:,1] = gray
            # frame[:,:,2] = gray
            
            if ret != True:
                break
             #   print(count1)
            #print(frame.shape)
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))   
            result = []
            t1 = time.time()
            
            img = letterbox(frame, new_shape=img_size)[0]
            img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
            img = np.ascontiguousarray(img)
            dets = run_detect(model,img,device,frame)

            detectionss=[]
            for det in dets:
                feature = gdet.HOG_feature(frame, det[:4])
                detectionss.append(Detection(det[:4], det[4], feature, det[-1]))   
            #detectionss.append(Detection(det[:4], det[4], det[-1]) for det in dets)
            img = np.zeros((h, w, 3), np.uint8)
            img = frame.copy()
            min_confidence = 0.4
            detections = [d for d in detectionss if d.confidence >= min_confidence]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Update tracker.
            tracker.predict()
            tracker.update(detections)

            if display:
                vis.set_image(frame.copy())
                vis.draw_detections(detections)
                vis.draw_trackers(tracker.tracks)
            res = vis.return_img()
            draw_roi(ROI, res)
            cv2.imshow('frame', res)
            print('frame_num', frame_num)
            if not pause_display:
                key = cv2.waitKey(2)
                if key == ord('q'):
                    break
                if key == ord(' '):
                    pause_display = not pause_display
                frame_num += 1
            else:
                key = cv2.waitKey(0)
                if key == ord('q'):
                    break
                if key == ord(' '):
                    pause_display = not pause_display
            
        print(" ")
        print("[Finish]")
        

    video_capture.release()

    if writeVideo_flag:
        out.release()
        #list_file.close()
    result_file.close()
    cv2.destroyAllWindows()
コード例 #25
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            submit()
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 1)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            # if name_1 == "Select Player" or  name_2 == "Select Player" or name_3 == "Select Player" or name_4 == "Select Player" or name_5 == "Select Player" or height_1 == NameError or  height_2 == NameError or height_3 == NameError or height_4 == NameError or height_5 == NameError:
            #     cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            if class_name + str(track.track_id) == "Player1":
                cv2.putText(img, name_1, (int(bbox[0]), int(bbox[1] - 10)), 0,
                            0.75, (255, 255, 255), 2)
                # print("1: ", int(bbox[3]))
                s_height0 = ((int(bbox[3]) - (int(bbox[1]))) / height_1) * 1.15
                new_height_player1 = int(int(bbox[3]) - int(s_height0))
                cv2.line(img, (int(bbox[0]), int(new_height_player1)),
                         (int(bbox[2]), int(new_height_player1)), (0, 255, 0),
                         2)
            if class_name + str(track.track_id) == "Player2":
                cv2.putText(img, name_2, (int(bbox[0]), int(bbox[1])), 0, 0.75,
                            (255, 255, 255), 2)
                # print("2: ", int(bbox[3]))
                s_height1 = ((int(bbox[3]) - (int(bbox[1]))) / height_2) * 1.15
                new_height_player2 = int(int(bbox[3]) - int(s_height1))
                cv2.line(img, (int(bbox[0]), int(new_height_player2)),
                         (int(bbox[2]), int(new_height_player2)), (0, 255, 0),
                         2)
            if class_name + str(track.track_id) == "Player3":
                cv2.putText(img, name_3, (int(bbox[0]), int(bbox[1])), 0, 0.75,
                            (255, 255, 255), 2)
                # print("2: ", int(bbox[3]))
                s_height2 = ((int(bbox[3]) - (int(bbox[1]))) / height_3) * 1.15
                new_height_player3 = int(int(bbox[3]) - int(s_height2))
                cv2.line(img, (int(bbox[0]), int(new_height_player3)),
                         (int(bbox[2]), int(new_height_player3)), (0, 255, 0),
                         2)
            if class_name + str(track.track_id) == "Player4":
                cv2.putText(img, name_4, (int(bbox[0]), int(bbox[1])), 0, 0.75,
                            (255, 255, 255), 2)
                # print("2: ", int(bbox[3]))
                s_height3 = ((int(bbox[3]) - (int(bbox[1]))) / height_4) * 1.15
                new_height_player4 = int(int(bbox[3]) - int(s_height3))
                cv2.line(img, (int(bbox[0]), int(new_height_player4)),
                         (int(bbox[2]), int(new_height_player4)), (0, 255, 0),
                         2)
            if class_name + str(track.track_id) not in {
                    'Player1', 'Player2', 'Player3', 'Player4'
            }:
                label7.configure(text=class_name + str(track.track_id))
                if class_name + str(track.track_id) == "Player" + str(
                        track.track_id):
                    cv2.putText(img, name_5, (int(bbox[0]), int(bbox[1])), 0,
                                0.75, (255, 255, 255), 2)
                    # print("2: ", int(bbox[3]))
                    s_height4 = ((int(bbox[3]) -
                                  (int(bbox[1]))) / height_5) * 1.15
                    new_height_player5 = int(int(bbox[3]) - int(s_height4))
                    cv2.line(img, (int(bbox[0]), int(new_height_player5)),
                             (int(bbox[2]), int(new_height_player5)),
                             (0, 255, 0), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #26
0
        if cv2.__version__.split(".")[0] == "2":
          frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
        else:
          # opencv 3/4
          frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

      # initialize tracking module
      if args.get_tracking:
        tracking_objs = args.tracking_objs.split(",")
        tracker_dict = {}
        tracking_results_dict = {}
        tmp_tracking_results_dict = {}
        for tracking_obj in tracking_objs:
          metric = metric = nn_matching.NearestNeighborDistanceMetric(
              "cosine", args.max_cosine_distance, args.nn_budget)
          tracker_dict[tracking_obj] = Tracker(
              metric, max_iou_distance=args.max_iou_distance)
          tracking_results_dict[tracking_obj] = []
          tmp_tracking_results_dict[tracking_obj] = {}

      # videoname = os.path.splitext(os.path.basename(videofile))[0]
      videoname = os.path.basename(videofile)
      if args.out_dir is not None:  # not saving box json to save time
        video_out_path = os.path.join(args.out_dir, videoname)
        if not os.path.exists(video_out_path):
          os.makedirs(video_out_path)

      # for box feature, saving them to disk if needed
      if args.get_box_feat:
        feat_out_path = os.path.join(args.box_feat_path, videoname)
        if not os.path.exists(feat_out_path):
          os.makedirs(feat_out_path)
コード例 #27
0
ファイル: tracker.py プロジェクト: bdebbabi/SlowFast
 def __init__(self, max_age=120, max_cosine_distance=0.4):
     nn_budget = None
     metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", max_cosine_distance, nn_budget)
     self.tracker = Tracker(metric, max_age=max_age)
     self.encoder = load_encoder()
コード例 #28
0
parser.add_argument('--model_feature',
                    type=str,
                    default='model_data/market1501.pb',
                    help='target tracking model file.')
ARGS = parser.parse_args()

box_size = 2  # 边框大小
font_scale = 0.4  # 字体比例大小

if __name__ == '__main__':
    # Deep SORT 跟踪器
    encoder = generate_detections.create_box_encoder(ARGS.model_feature,
                                                     batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       ARGS.min_score, None)
    tracker = Tracker(metric)

    # 载入模型
    mrcnn = MRCNN(ARGS.model_file, ARGS.input_size, ARGS.min_score)

    # 读取视频
    video = cv2.VideoCapture(ARGS.video_file)

    # 输出保存视频
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = video.get(cv2.CAP_PROP_FPS)
    size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    video_out = cv2.VideoWriter(out_path + "/outputVideo.mp4", fourcc, fps,
                                size)
コード例 #29
0
def run(sequence_dir, detection_file, output_dir, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget,
        display):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """
    seq_info = gather_sequence_info(sequence_dir, detection_file)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    results = []

    def frame_callback(vis, frame_idx):
        print("Processing frame %05d" % frame_idx)

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"], frame_idx,
                                       min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Update visualization.
        if display:
            image = cv2.imread(seq_info["image_filenames"][frame_idx],
                               cv2.IMREAD_COLOR)
            vis.set_image(image.copy())
            vis.draw_detections(detections)
            vis.draw_trackers(tracker.tracks)

        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlwh()
            boxes = track.tlwh
            #boxes = detection.tlwh
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3],
                boxes[0], boxes[1], boxes[2], boxes[3]
            ])

    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=5)
    else:
        visualizer = visualization.NoVisualization(seq_info)
    visualizer.run(frame_callback)

    # Store results.
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = os.path.join(output_dir,
                               os.path.split(sequence_dir)[-1] + '.txt')
    f = open(output_file, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7],
               row[8], row[9]),
              file=f)
コード例 #30
0
def main(yolo,read_type):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    #writeVideo_flag = True

    #geneate a video object
    video_dir='./model_data/demo2.wmv'
    video=video_open(read_type,video_dir)
    video_capture = video.generate_video()
    fps=0
    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()

        # 1、yolov3进行目标检测,找到目标位置和相应信息
        # 2、跟踪物体在图像中的变化轨迹

        # 1、yolov3进行目标检测,找到目标位置和相应信息
        image = Image.fromarray(frame)
        time3=time.time()
        boxs = yolo.detect_image(image)
        time4=time.time()
        print('detect cost is',time4-time3)
       # print("box_num",len(boxs))
        time3=time.time()
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        time4=time.time()
        print('features extract is',time4-time3)
        # Call the tracker
        # 2、跟踪物体在图像中的变化轨迹。利用卡尔曼滤波进行位置修正
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            
        cv2.imshow('', frame)

        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        print("fps= %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()

    cv2.destroyAllWindows()
コード例 #31
0
ファイル: demo.py プロジェクト: shmilymm/deep_sort_yolov3
def main(yolo):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
       # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            
        cv2.imshow('', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        print("fps= %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()