Beispiel #1
0
 def _to_output(self, image, boxes, scores, classes, nums):
     scores = scores[:nums]
     classes = classes[:nums].astype(np.int32)
     boxes = boxes[:nums]
     indexes = scores >= self.score_threshold
     scores = scores[indexes]
     classes = classes[indexes]
     boxes = convert_boxes(image, boxes[indexes])
     features = self.encoder(image, boxes)
     return [
         Detection(bbox, score, self.class_names[clazz],
                   feature) for bbox, score, clazz, feature in zip(
                       boxes, scores, classes, features)
     ]
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        print(fps)
        print(width)
        print(height)
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    tot = -1

    coord = {}
    pred = {}
    framelists = []
    while True:
        tot = tot + 1
        _, img = vid.read()
        print(len(framelists))

        temp_img = img
        count = 0
        temp_framelists = []
        if (len(framelists) == 5):
            framelists.remove(framelists[0])
            for i in range(0, 5):
                img_v = cv2.imread("data/video/raw1/frame%d.jpg" % i)
                temp_framelists.append(img_v)
            temp_framelists.remove(temp_framelists[0])
            temp_framelists.append(temp_img)

            for i in range(0, len(temp_framelists)):
                cv2.imwrite("data/video/raw1/frame%d.jpg" % i,
                            temp_framelists[i])

        else:

            if (count < 5):
                if (count == 0):
                    cv2.imwrite("data/video/raw1/frame0.jpg", temp_img)

                else:
                    cv2.imwrite("data/video/raw1/frame%d.jpg" % count,
                                temp_img)

            for i in range(0, count + 1):
                img_v = cv2.imread("data/video/raw1/frame%d.jpg" % i)
                temp_framelists.append(img_v)
        framelists.append(temp_img)

        if (len(framelists) > 0):
            cv2.imwrite("data/video/raw/frame%d.jpg" % tot, framelists[-1])

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []

        #if(90<tot<115):
        #cv2.putText(img,"warning",(10,50),cv2.FONT_HERSHEY_SIMPLEX,fontScale=2.5,thickness=5,color=(255,0,0))

        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]
        #print(detections)
        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        #print(detections)

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        #####

        ls = []  #to get the track ids of each frame

        a = framelists[-1]

        #cv2.imwrite("data/video/raw1/frame%d.jpg" % tot, temp_framelists[-1])
        for track in tracker.tracks:

            x_min = []
            x_max = []
            y_max = []

            temp_box = track.to_tlbr()
            ls.append(track.track_id)  #add track ids
            if track.track_id not in coord:
                coord[track.track_id] = []
            else:
                leng = len(coord[track.track_id])
                if (leng == 5):
                    coord[track.track_id].remove(coord[track.track_id][0])
                coord[track.track_id].append(list(temp_box))

            for i in range(0, len(coord[track.track_id])):
                if (coord[track.track_id][i][0] > 0
                        and coord[track.track_id][i][2] < 720
                        and coord[track.track_id][i][3] < 720):
                    x_min.append(coord[track.track_id][i][0])
                    x_max.append(coord[track.track_id][i][2])
                    y_max.append(coord[track.track_id][i][3])

            #base - frames series
            base = []

            num_of_frames = len(x_min)
            if (num_of_frames >= 3):
                base = list(range(1, (num_of_frames + 1)))
                #for i in range(0,len(x_min)):
                #base.append(i+1)

                model1 = polyfit(base, x_min, 1)
                predict_x_min = poly1d(model1)

                model2 = polyfit(base, x_max, 1)
                predict_x_max = poly1d(model2)

                model3 = polyfit(base, y_max, 1)
                predict_y_max = poly1d(model3)

                tempo_framelists = []

                if ((50 < predict_x_min(60) < 650
                     or 50 < predict_x_max(60) < 650)
                        and 500 < predict_y_max(60) < 800):
                    cv2.putText(img,
                                "warning", (10, 50),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                fontScale=2.5,
                                thickness=5,
                                color=(255, 0, 0))
                    for i in range(0, len(framelists)):
                        image_vehicle = cv2.imread(
                            "data/video/raw1/frame%d.jpg" % i)
                        tempo_framelists.append(image_vehicle)

                    for j in range(0, len(coord[track.track_id])):
                        vehicle = coord[track.track_id][-(j + 1)]
                        crop_img = tempo_framelists[-(j + 1)][
                            int(vehicle[1]) - 10:int(vehicle[3]) + 10,
                            int(vehicle[0]) - 10:int(vehicle[2]) + 10]
                        inp_img = tempo_framelists[-(j + 1)]
                        crop_height = crop_img.shape[0]
                        crop_width = crop_img.shape[1]

                        print(crop_height, crop_width)
                        print(
                            int(vehicle[3]) - int(vehicle[1]),
                            int(vehicle[2]) - int(vehicle[0]))

                        input_dir = os.path.join("data/video/cropped/turn%d" %
                                                 tot)
                        output_dir = os.path.join("data/video/segment/turn%d" %
                                                  tot)
                        if not os.path.exists(input_dir):
                            os.mkdir(input_dir)
                            os.mkdir(output_dir)

                        #cv2.imwrite("data/video/cropped/frame%d.jpg" % j,crop_img )

                        #input_image = "data/video/cropped/frame%d.jpg" % j
                        #output_image ="data/video/segment/frame%d.jpg" % j
                        input_folder = "data/video/cropped/turn%d" % tot
                        output_folder = "data/video/segment/turn%d" % tot

                        input_image = input_folder + "/frame%d.jpg" % j
                        output_image = output_folder + "/frame%d.jpg" % j

                        #cv2.imwrite(input_image ,crop_img )
                        cv2.imwrite(input_image, inp_img)
                        xmin = str(int(vehicle[0]) - 10)
                        xmax = str(int(vehicle[2]) + 10)
                        ymin = str(int(vehicle[1]) - 10)
                        ymax = str(int(vehicle[3]) + 10)
                        dim = xmin + "," + xmax + "," + ymin + "," + ymax

                        os.system(
                            "python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image="
                            + input_image + ":" + output_image +
                            " --dimension=" + dim)
                        #os.system("python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=car2.jpg")
                        #os.system("python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=car2.jpg:output2.jpg")
                        #crop_img = img[y:y+h, x:x+w]
                        #cv2.imshow("cropped", crop_img)
                        #cv2.waitKey(0)

                        #cv.imwrite("images/frame%d.jpg" % count, frame)

            #if(track.track_id ==13):
            #print("temp  " + str(temp_box ))
            #if not track.is_confirmed() or track.time_since_update > 1:
            #continue

            bbox = track.to_tlbr()

            #if(track.track_id ==13):
            #print("special  " + str(bbox))
            #print(bbox)
            #print(track.track_id , bbox)
            #print(track.track_id)
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)

            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        #print("track ids : " +str(ls))
        #print("dict :   " + str(coord))
        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        #if tot == 6 :

        #cv2.imwrite("data/video/images/000.jpg",framelists[0])
        #cv2.imwrite("data/video/images/001.jpg",framelists[1])
        #cv2.imwrite("data/video/images/002.jpg",framelists[2])
        #cv2.imwrite("data/video/images/003.jpg",framelists[3])
        #cv2.imwrite("data/video/images/004.jpg",framelists[4])
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
    def track(self, img, yolooutput, recognizeoutput):

        #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img_in = tf.expand_dims(img_in, 0)
        #img_in = transform_images(img_in, 416) #416 =size

        t1 = time.time()

        boxes, detect_scores, classes, nums = yolooutput
        persons, names, scores = recognizeoutput
        self.person_dict = dict(self.person_dict, **persons)
        ###############################################################################################################################################################################
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = self.encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores, names, features)
        ]  #if there is an error here try removing "[0]"

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        self.tracker.predict()
        self.tracker.update(detections)
        output_data = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if class_name in self.person_dict:
                person = self.person_dict[class_name]
                if person:
                    person_data = {
                        "key": person.id,
                        "name": person.familyName + " " + person.firstName,
                        "age": person.age,
                        "address": person.address,
                    }
                    output_data.append(person_data)
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0,
                        0.75, (255, 255, 255), 2)
        """if self.output_path:
            self.out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(converted_boxes) != 0:
                for i in range(0,len(converted_boxes)):
                    list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')"""
        self.output_data = output_data
        return img
Beispiel #4
0
def main():

    class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()]
    yolo = YoloV3(classes=len(class_names))
    yolo.load_weights('./weights/yolov3.tf')


    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8


    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
    tracker = Tracker(metric)


    vid = cv2.VideoCapture("traffic1.mkv")
    #vid = cv2.VideoCapture("video.webm")
    #vid = VideoCaptureAsync("video.webm")
    #vid = vid.start()


    codec = cv2.VideoWriter_fourcc(*'XVID')
    vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
    vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height))

    from collections import deque
    pts = [deque(maxlen=30) for _ in range(1000)]

    counter = []

    directory1 = "/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset/"

    result = []

    new_cnt = 0

    while True:
        
        _, img = vid.read()
        if img is None:
            print('Completed')
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()

        boxes, scores, classes, nums = yolo.predict(img_in)

        classes = classes[0]
        names = []
        
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
            
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)

        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                    zip(converted_boxes, scores[0], names, features)]


        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]


        tracker.predict()
        tracker.update(detections)

        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0,1,20)]

        #current_count = int(0)
        #count = 0

        for track in tracker.tracks:
            
            if not track.is_confirmed() or track.time_since_update >1:
                continue
            
            bbox = track.to_tlbr()
            class_name= track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
        
            cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                        +len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            
            cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                        (255, 255, 255), 2)

            center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2))
            pts[track.track_id].append(center)

            for j in range(1, len(pts[track.track_id])):
                
                if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64/float(j+1))*2)
                cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)


            height, width, _ = img.shape
            cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
            #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)
            cv2.line(img, (220, 460), (1000, 450), (0, 0, 255), 2)
            center_y = int(((bbox[1])+(bbox[3]))/2)

            #count = 0
            if center_y <= int(3*height/6+height/20) and center_y >= int(3*height/6-height/20):
                
                if class_name == 'car' or class_name == 'truck' or class_name == 'person':
                    counter.append(int(track.track_id))  

                    directory = r'/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset'
                    for filename in os.listdir(directory):
                        if filename.endswith(".jpg") or filename.endswith(".png"):
                            a1 = os.path.join(directory, filename)
                            b = int(re.search(r'\d+', a1).group())
                            result.append(b)
                        else:
                            continue
                        
                    
                    b1 = max(result) + 1
                    count = 0

                    while(True):
                        
                        count += 1
                        print(count)
                        #count = b1
                        
                        ##increase image size and resoulation
                        #new_img = img[int(bbox[0]):(int(bbox[2])+int(bbox[3])), int(bbox[1]):(int(bbox[2])+int(bbox[3]))]
                        new_img = img[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))]
                        #new_rgb = rgb[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))]
                        #new_img = cv2.resize(new_img, (360, 360), interpolation = cv2.INTER_NEAREST)

                        cv2.imwrite(directory1 + f"image{b1}.jpg", new_img)
                                    
                        if count > 1:
                            print("break the loop..............")
                            break
                    #current_count += 1


        total_count = len(set(counter))
        #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2)
        cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0,130), 0, 1, (0,0,255), 2)


        fps = 1./(time.time()-t1)
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
        #cv2.resizeWindow('output', 1024, 768)
        cv2.imshow('output', img)
        out.write(img)


        if cv2.waitKey(1) == ord('q'):
            break
        
        
    vid.release()
    out.release()
    cv2.destroyAllWindows()
Beispiel #5
0
def main(_argv):
    # Definition of the parameters
    right2left_koi = 0
    right2left_til = 0
    left2right_koi = 0
    left2right_til = 0
    font = cv2.FONT_HERSHEY_DUPLEX
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    #midline position variables
    midline_pos_x = int(width / 2) - 3
    midline_pos_y = int(height)
    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        #draw midline
        cv2.line(img, (midline_pos_x, 0), (midline_pos_x, midline_pos_y),
                 (0, 0, 0), 3)

        screen1_koi = 0
        screen1_til = 0
        screen2_koi = 0
        screen2_til = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            c_curr = (int(bbox[0] + abs(bbox[0] - bbox[2]) / 2),
                      int(bbox[1] + abs(bbox[1] - bbox[3]) / 2))
            center_x = c_curr[0]
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 1)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 17)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 14,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 1)), font, 0.6, (0, 0, 0),
                        1)

            #store patterns of individual fish
            pattern = get_patterns(c_curr, track.track_id, class_name)
            pre_p = c_curr
            #Draw the patterns on the screen
            for p in pattern[-50::5]:
                cv2.circle(img, p, 3, color, -1)
                if pre_p != c_curr:
                    cv2.line(img, pre_p, p, color, 1)
                pre_p = p

            if len(pattern) >= 2:
                moving2right = center_x > pattern[-2][0]
                on_screen_left = pattern[-2][0] < midline_pos_x
                moving2left = center_x < pattern[-2][0]
                on_screen_right = pattern[-2][0] > midline_pos_x

                if (class_name == 'Koi') and on_screen_left:
                    screen1_koi += 1
                    if moving2right and center_x > midline_pos_x:
                        left2right_koi += 1
                if (class_name == 'Tilapia') and on_screen_left:
                    screen1_til += 1
                    if moving2right and center_x > midline_pos_x:
                        left2right_til += 1
                if (class_name == 'Koi') and on_screen_right:
                    screen2_koi += 1
                    if moving2left and center_x < midline_pos_x:
                        right2left_koi += 1
                if (class_name == 'Tilapia') and on_screen_right:
                    screen2_til += 1
                    if moving2left and center_x < midline_pos_x:
                        right2left_til += 1

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        # for det in detections:
        #     bbox = det.to_tlbr()
        #     cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        #Print the instantenous numbers detected on the screens
        cv2.putText(img, "Koi : " + str(screen1_koi), (20, 30), font, 0.7,
                    (30, 50, 205), 2)
        cv2.putText(img, "Tla : " + str(screen1_til), (20, 70), font, 0.7,
                    (255, 0, 0), 2)

        cv2.putText(img, "Koi : " + str(screen2_koi), (int(width) - 120, 30),
                    font, 0.7, (30, 50, 205), 2)
        cv2.putText(img, "Tla : " + str(screen2_til), (int(width) - 120, 70),
                    font, 0.7, (255, 0, 0), 2)

        #Print left2right and right2left counts and total of them
        cv2.putText(img,
                    str(right2left_koi) + " <-- Koi",
                    (midline_pos_x - 75, int(height) - 30), font, 0.7,
                    (0, 0, 0), 2)
        cv2.putText(img,
                    str(right2left_til) + " <-- Tla",
                    (midline_pos_x - 75, int(height) - 70), font, 0.7,
                    (0, 0, 0), 2)

        cv2.putText(img, "Koi  --> " + str(left2right_koi),
                    (midline_pos_x - 55, 30), font, 0.7, (0, 0, 0), 2)
        cv2.putText(img, "Tla  --> " + str(left2right_til),
                    (midline_pos_x - 55, 70), font, 0.7, (0, 0, 0), 2)

        cv2.putText(img, "Total L2R : " + str(left2right_koi + left2right_til),
                    (int(width) - 200, int(height) - 30), font, 0.7, (0, 0, 0),
                    2)
        cv2.putText(img, "Total R2L : " + str(right2left_koi + right2left_til),
                    (int(width) - 200, int(height) - 70), font, 0.7, (0, 0, 0),
                    2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (20, int(height) - 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (200, 0, 100), 2)

        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        img = cv2.resize(img, (1200, 720))
        cv2.imshow('output', img)
        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(_argv):
    # PARAMS
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0
    pure_yolo = False

    AVG_PERSON_HEIGHT = 1.7  # meters
    DANGER_THRESHOLD = 3.0  # meters

    #initialize deep sort
    output_name = FLAGS.output
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if output_name:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(output_name, codec, fps,
                              (width + width // 3 + 3, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    setup = False

    while True:
        _, img = vid.read()
        image = img.copy()
        h_image, w_image = image.shape[:2]

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        if not setup:
            mouse_pts = pu.get_reference_pts_by_ui(image, pu.ui_callback)

            cv2.namedWindow("Worker Monitoring", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("Worker Monitoring", 1000, 700)

            # points of reference and ROI chosen by UI
            ref_pts = np.array(mouse_pts[:4])
            ref_len_pts = np.array(mouse_pts[4:6])
            roi = mouse_pts[6:]

            # length between reference points
            w_dst = max(pu.euclidean(ref_pts[0], ref_pts[2]),
                        pu.euclidean(ref_pts[1], ref_pts[3]))
            ref_len = pu.euclidean(ref_len_pts[0], ref_len_pts[1])

            # calculating parallel vectors of lines
            c_1 = pu.get_perpendicular_vector(mouse_pts[0],
                                              mouse_pts[1],
                                              direction='ccw',
                                              magnitude=w_dst)
            c_2 = pu.get_perpendicular_vector(mouse_pts[1],
                                              mouse_pts[0],
                                              direction='cw',
                                              magnitude=w_dst)

            # getting the transformation matrix between the original reference
            # and the perpendicular "corrected" points
            dst = [ref_pts[0], ref_pts[1], c_2, c_1]
            new_M, Ht, borders = pu.get_homography_matrix(ref_pts, dst, roi)

            setup = True

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        obj_pts = []
        obj_classes = []

        if not pure_yolo:
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                class_name = track.get_class()
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]

                cntr_x = (bbox[0] + bbox[2]) / 2
                cntr_y = (bbox[1] + bbox[3]) / 2
                obj_pts.append([cntr_x, cntr_y])
                obj_classes.append(str(class_name))
        else:
            for det in detections:
                bbox = det.to_tlbr()
                class_name = det.get_class()

                cntr_x = (bbox[0] + bbox[2]) / 2
                cntr_y = (bbox[1] + bbox[3]) / 2
                obj_pts.append([cntr_x, cntr_y])
                obj_classes.append(str(class_name))

        if len(obj_pts) == 0:
            temp_canvas = np.zeros((h_image, w_image + w_image // 3 + 3, 3),
                                   dtype='uint8')
            temp_canvas[:, w_image // 3 + 3:, :] = image
            cv2.imshow('Worker Monitoring', temp_canvas)

            if cv2.waitKey(1) == ord('q'):
                break
            continue

        obj_pts = np.array(obj_pts, dtype='float32').reshape(-1, 1, 2)

        # transforming the object coordinates and the reference length points
        transformed_obj_pts = cv2.perspectiveTransform(
            obj_pts, new_M).astype('int').reshape(-1, 2)

        # filtering the points that are not in the ROI
        valid_pts, valid_classes = pu.remove_objects_off_limits(
            transformed_obj_pts, obj_classes)

        px_per_meter = ref_len / AVG_PERSON_HEIGHT

        indices_in_danger, classes_in_danger = pu.detect_in_danger(
            valid_pts, valid_classes, px_per_meter, DANGER_THRESHOLD)

        final_visualization = pu.visualize(image, borders, valid_pts, obj_pts,
                                           indices_in_danger, Ht)

        cv2.namedWindow("Worker Monitoring", cv2.WINDOW_NORMAL)
        cv2.resizeWindow("Worker Monitoring", 1000, 700)
        cv2.imshow("Worker Monitoring", final_visualization)

        if output_name:
            out.write(final_visualization)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            cv2.destroyAllWindows()
            break

    vid.release()
    if output_name:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Beispiel #7
0
def get_bboxes(img):
    # YOLO START
    img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0)
    img_in = transform_images(img_in, 416)

    # print("-------------------------------")
    # print(type(img_in)) # <class 'tensorflow.python.framework.ops.EagerTensor'>
    # print(img_in.shape) # (1, 416, 416, 3)
    # print(img_in.dtype) # <dtype: 'float32'>
    # print("-------------------------------")

    boxes, scores, classes, nums = yolo.predict(img_in)

    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])

    names = np.array(names)

    converted_boxes = convert_boxes(img, boxes[0])

    features = encoder(img, converted_boxes)

    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      converted_boxes, scores[0], names, features)
    ]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]

    #p.print(detections)  // [<deep_sort.detection.Detection object at 0x00000218F72D8D68>, <deep_sort.detection.Detection object at 0x00000218F7325940>, <deep_sort.detection.Detection object at 0x00000218F7325C18>, <deep_sort.detection.Detection object at 0x00000218F7325F28>, <deep_sort.detection.Detection object at 0x00000218F7325EF0>, <deep_sort.detection.Detection object at 0x00000218F7325F60>, <deep_sort.detection.Detection object at 0x00000218F7325D68>, <deep_sort.detection.Detection object at 0x00000218F7325DA0>, <deep_sort.detection.Detection object at 0x00000218F7329400>]
    #p.type_(detections) // TYPE => <class 'list'>
    # p.print(detections[1]) // <deep_sort.detection.Detection object at 0x00000266814E3828>

    tracker.predict()
    tracker.update(detections)

    # Matplotlib has a number of built-in colormaps accessible via matplotlib.cm.get_cmap.
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

    # Current vehicle count
    current_count = int(0)

    # (tek frame içindeki tüm araçlar için döner) tracker'ın tüm sonuçları için for döngüsü
    bboxes = []
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        bbox = track.to_tlbr(
        )  # [848.78925062 113.98058018 901.1299524  144.32627563]
        # class_name = track.get_class() # car (nesne ismi)
        # color = colors[int(track.track_id) % len(colors)] # (0.807843137254902, 0.8588235294117647, 0.611764705882353)
        # color = [i * 255 for i in color] # [231.0, 203.0, 148.0]
        bboxes.append(bbox)

        # img => videodan alınan frame (np ndarray)

        #Bounding box çiz
        # cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2)
        # #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
        #             #+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
        # cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
        #             (255, 255, 255), 2)
    return bboxes
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    frame_count = 0
    dict = {}
    previous_dict = {}
    speed_dict = {}
    tracked_objects = []
    speed_dict = {}
    first_frame_hgt = {}
    while True:
        _, img = vid.read()
        print("The length of a tracked objects " + str(len(tracked_objects)))
        frame_count = frame_count + 1
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        #print("Before the length of the classes is "+str(len(classes)))
        classes = classes[0]
        #print("the class name is "+str(classes[0]))
        #print("after the length of the classes is "+str(len(classes)))
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        print("printing the detected class name " + str(classes[0]))
        detections = [detections[i] for i in indices if classes[i] == 'person']

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        previous_dict = dict
        dict = {}
        pt_distance = None
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if class_name != 'person':
                continue
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            dict[(class_name + str(track.track_id))] = (int(bbox[0]),
                                                        int(bbox[1]),
                                                        int(bbox[2]),
                                                        int(bbox[3]))

            if not (class_name + str(track.track_id)) in tracked_objects:
                tracked_objects.append(class_name + str(track.track_id))
                speed_dict.update({(class_name + str(track.track_id)): []})
                first_frame_hgt[class_name + str(track.track_id)] = int(
                    (bbox[3] - bbox[1]))
                print("the height calculated is " +
                      str(int((bbox[3] - bbox[1]))))
                #class_name+str(track.track_id)+str("_speed") =[]

            if (class_name + str(track.track_id)) in dict.keys():
                if (class_name + str(track.track_id)) in previous_dict.keys():
                    a, b, c, d = dict[(class_name + str(track.track_id))]
                    current_fr_ctr = a + 1 / 2 * (c - a), b + 1 / 2 * (d - b)
                    a1, b1, c1, d1 = previous_dict[(class_name +
                                                    str(track.track_id))]
                    prv_fr_ctr = a1 + 1 / 2 * (c1 - a1), b1 + 1 / 2 * (d1 - b1)
                    #pt_distance = math.sqrt(sum([(a - b) ** 2 for a, b in zip(dict[(class_name+str(track.track_id))], previous_dict[(class_name+str(track.track_id))])]))

                    # calulating the distance between the bounding box centers of 2 adjacent frames
                    pt_distance = math.sqrt(
                        sum([(a - b)**2
                             for a, b in zip(current_fr_ctr, prv_fr_ctr)]))
                    frame_rate = 12
                    image_aspect_ratio = 18
                    current_frame_height = int((bbox[3] - bbox[1]))

                    #calculating the speed, adjusting for the patron's distance from camera and metre vs pixel aspect ratio
                    cur_fr_spd = pt_distance * frame_rate * (
                        first_frame_hgt[class_name + str(track.track_id)]
                    ) / current_frame_height / image_aspect_ratio
                    print(pt_distance)
                    print("the speed calculated is " + str(cur_fr_spd))
                    speed_dict[(class_name +
                                str(track.track_id))].append(pt_distance)

                    #np.sqrt((a-a1)**2+(b-b1)**2+(c-c1)+(d-d1)
                    #(class_name+str(track.track_id)+str("_speed")).append((a-a1)+(b-b1)+(c-c1)+(d-d1))
            print(class_name + str(track.track_id))
            print(dict[(class_name + str(track.track_id))])
            print("speed dict length is " +
                  str(len(speed_dict[(class_name + str(track.track_id))])))
            if pt_distance is not None:
                cv2.putText(
                    img, class_name + "-" + str(track.track_id) + "-" +
                    '%.2f' % cur_fr_spd, (int(bbox[0]), int(bbox[1] - 10)), 0,
                    0.75, (255, 255, 255), 2)
            else:
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
        print("the dict size is " + " for the frame " + str(frame_count) +
              " " + str(len(dict)))
        print("the dict size is " + " for the previous  frame " +
              str(frame_count) + " " + str(len(previous_dict)))

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
Beispiel #9
0
def main(argv):
    # print("location recieved in main as: ", e)
    ###################################
    global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME
    violator_count_list = list()
    ###################################
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    yolo = YoloV3(classes=80)

    yolo.load_weights('./weights/yolov3.tf')
    logging.info('weights loaded')

    class_names = [c.strip() for c in open('./coco.names').readlines()]
    logging.info('classes loaded')
    video_path = 'test.mkv'

    try:
        vid = cv2.VideoCapture(int(FILE_URL))
    except:
        vid = cv2.VideoCapture(FILE_URL)
    time.sleep(1.0)

    out = None

    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print("height: ", height)
    print("width: ", width)
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('./result.avi', codec, fps, (width, height))
    frame_index = -1
    fps = 0.0
    count = 0
    PROCESSING_STATUS = True
    while True:
        _, img = vid.read()
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)
        temp_violators = set()
        temp_total_people = set()
        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            class_name1 = track.get_class()
            if class_name1 == "person":
                temp_total_people.add(track.track_id)
                bbox1 = track.to_tlbr()
                x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2)
                y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2)
                r1 = int(abs(bbox1[3] - bbox1[1]))
                color = (255, 0, 0)
                cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2),
                         (0, 255, 0), 2)
                cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1)
                scale = (r1) / 100
                transparentOverlay(img,
                                   dst_circle, (x1_c, y1_c - 5),
                                   alphaVal=110,
                                   color=(0, 200, 20),
                                   scale=scale)
                for other in tracker.tracks:
                    if not other.is_confirmed() or other.time_since_update > 1:
                        continue
                    if track.track_id == other.track_id:
                        continue

                    class_name2 = other.get_class()
                    if class_name2 == "person":
                        temp_total_people.add(other.track_id)
                        bbox2 = other.to_tlbr()
                        x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2)
                        y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2)
                        r2 = int(abs(bbox2[3] - bbox2[1]))
                        if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 //
                                      2) >= 0 and abs(y1_c - y2_c) < r1 // 4:
                            temp_violators.add(track.track_id)
                            temp_violators.add(other.track_id)
                            cv2.line(img, (x1_c, y1_c), (x2_c, y2_c),
                                     (0, 0, 255), 2)
                            scale1 = (r1) / 100
                            transparentOverlay(img,
                                               dst_circle, (x1_c, y1_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale1)
                            scale2 = (r2) / 100
                            transparentOverlay(img,
                                               dst_circle, (x2_c, y2_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale2)

        # print fps on screen
        ### Comment below 3 lines to not see live output screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)

        ### Violators calculation
        violators_for_frame = len(temp_violators)
        VIOLATION_PERCENTAGE = violators_for_frame
        print("Violation percentage: ", violators_for_frame)
        violator_count_list.append(int(violators_for_frame))
        ###
        ### Call to firebase upload function
        # if violators_for_frame > 20:
        #     social_dist_violation_frame_handler(img)
        #     cv2.imwrite("temp.png",img)
        #     firebase_upload("temp.png")
        #     os.remove("temp.png")

        frame_index = frame_index + 1

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if len(violator_count_list) == 0:
        mean_violation = 0
    else:
        mean_violation = sum(violator_count_list) / len(violator_count_list)
    PROCESSING_STATUS = False
    out.release()
    cv2.destroyAllWindows()
Beispiel #10
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    length = 0
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)
    print('Opened video ', FLAGS.video, '. W x H ',
          int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), ' x ',
          int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    out = None

    dir_prefix = os.path.splitext(FLAGS.video)[0]
    vid_name = dir_prefix.replace('static/', '')
    if os.path.exists(dir_prefix):
        shutil.rmtree(dir_prefix)
        os.mkdir(dir_prefix)
    else:
        os.mkdir(dir_prefix)
    dir_prefix += '/'

    out = None
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1
        start_time = datetime.min
        bike_set = set()
        bike_list = []

        bike_dict = {
            'frame': 0,
            'id': 0,
            'finish_time': start_time,
            'Recognitions': 'default',
            'Recognised_plate': 'default',
            'plate_number': 'XXX',  # 'appearance_num':0,
            'Bike_image': None,
            'Full_frame': None,
            'Image_name': 'default',
            'Full_image_name': 'default'
        }

        # This is needed to write image fragments on disk
        img_name = 'default'
        full_img_name = 'default'

    fps = 0.0
    count = 0
    frame_num = -1
    while True:
        _, img = vid.read()
        frame_num += 1
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if class_name != 'bicycle':
                continue
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # Routines for bike detection at finish
            if not (track.track_id in bike_set):
                bike_set.add(track.track_id)
                input_fps = int(vid.get(cv2.CAP_PROP_FPS))
                dt = start_time + timedelta(seconds=frame_num / input_fps)
                bike_list.append(
                    dict(bike_dict,
                         frame=frame_num,
                         finish_time=str(dt.time()),
                         id=track.track_id))
            else:
                # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
                # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
                for item in bike_list:
                    if item['id'] == track.track_id and 600 < bbox[3] < 800:
                        # Clear previously written images, something like overwrite them
                        previous_image_name = item['Image_name']
                        previous_full_image_name = item['Full_image_name']
                        if os.path.exists(previous_image_name):
                            os.remove(previous_image_name)
                        if os.path.exists(previous_full_image_name):
                            os.remove(previous_full_image_name)
                        # Edit finish_time
                        input_fps = int(vid.get(cv2.CAP_PROP_FPS))
                        dt = start_time + timedelta(seconds=frame_num /
                                                    input_fps)
                        item['finish_time'] = str(dt.time())[:-3]
                        # Save image and put to bike dict
                        img_name = str(frame_num) + '_' + str(
                            item['id']) + '.jpg'
                        full_img_name = str(frame_num) + '_fullframe.jpg'
                        cv2.imwrite(
                            dir_prefix + img_name,
                            img[int(bbox[1]):int(bbox[3]),
                                int(bbox[0]):int(bbox[2])])
                        cv2.imwrite(dir_prefix + full_img_name, img)
                        item[
                            'Bike_image'] = '<img src="../static/' + vid_name + '/' + img_name + '" width="200" >'
                        item[
                            'Full_frame'] = '<img src="../static/' + vid_name + '/' + full_img_name + '" width="500" >'
                        item[
                            'Recognitions'] = '<img src="../static/' + vid_name + '/' + 'res_' + img_name + '" width="200" >'
                        item[
                            'Recognised_plate'] = '<img src="../static/' + vid_name + '/' + 'plate_' + img_name + '" width="200" >'
                        item['Image_name'] = dir_prefix + img_name
                        item['Full_image_name'] = dir_prefix + full_img_name
                        # Update appearance and frame_num
                        # item['appearance_num']+=1
                        item['frame'] = frame_num
                        break

                    ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
                    # for det in detections:
                    #    bbox = det.to_tlbr()
                    #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

                    # print fps on screen and to commandline
                fps = (fps + (1. / (time.time() - t1))) / 2
                if frame_num % 15 == 0:
                    time_left = int((length - frame_num) / fps)

                # if FLAGS.output:
                # out.write(img)
                # frame_index = frame_index + 1
                # list_file.write(str(frame_index)+' ')
                # if len(converted_boxes) != 0:
                #    for i in range(0,len(converted_boxes)):
                #        list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1])
                #                        + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
                # list_file.write('\n')

        # press q to quit

    vid.release()
    if FLAGS.output:
        out.release()

        df = pd.DataFrame(bike_list)
        df = df.dropna()
        if df.empty:
            print('The resulting dataframe is empty')
        else:
            del df['Image_name']
            del df['Full_image_name']
            df = df.sort_values(by='finish_time', ascending=True)
            df.reset_index(drop=True)

            engine = sqlalchemy.create_engine(database_url)
            table_name = 'table_' + vid_name
            df.to_sql(table_name, engine, method='multi')

        list_file.close()
Beispiel #11
0
def main(_argv):

    class_names = [c.strip() for c in open('coco.names').readlines()]
    # class_names=['car', 'truck','bus', 'bicycle','motorbike']
    yolo = YoloV3(classes=len(class_names))
    yolo.load_weights('./weights/yolov3.tf')

    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8

    model_filename = 'mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric('cosine',
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        vid_fps = int(vid.get(cv2.CAP_PROP_FPS))
        vid_width, vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
            vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        out = cv2.VideoWriter(FLAGS.output, codec, vid_fps,
                              (vid_width, vid_height))

    from _collections import deque
    pts = [deque(maxlen=30) for _ in range(1000)]

    counter = []

    while True:
        _, img = vid.read()
        if img is None:
            print('Completed')
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()

        boxes, scores, classes, nums = yolo.predict(img_in)

        classes = classes[0]
        names = []
        for i in range(len(classes)):

            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        boxs, scores, classes = [], [], []
        f = ['car', 'truck', 'bus', 'bicycle', 'motorbike']
        for d in detections:
            if d.class_name in f:
                boxs.append(d.tlwh)
                scores.append(d.confidence)
                classes.append(d.class_name)

        boxs = np.array(boxs)
        scores = np.array(scores)
        classes = np.array(classes)
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        tracker.predict()
        tracker.update(detections)

        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        for track in tracker.tracks:
            if track.class_name in f:
                # print("new track")

                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                class_name = track.get_class()
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]

                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)

                center = (int(((bbox[0]) + (bbox[2])) / 2),
                          int(((bbox[1]) + (bbox[3])) / 2))
                pts[track.track_id].append(center)

                for j in range(1, len(pts[track.track_id])):
                    if pts[track.track_id][j - 1] is None or pts[
                            track.track_id][j] is None:
                        continue
                    thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                    cv2.line(img, (pts[track.track_id][j - 1]),
                             (pts[track.track_id][j]), color, thickness)

                height, width, _ = img.shape
                # print("p",height,width)
                # print(int(3*height/6+height/20))

                oo = [int(x) for x in FLAGS.line_coordinates]
                print(oo)
                cv2.line(img, (oo[0], oo[1]), (oo[2], oo[3]), (0, 255, 0),
                         thickness=2)

                center_y = int(((bbox[1]) + (bbox[3])) / 2)

                if center_y <= int(3 * height / 6 + height /
                                   20) and center_y >= int(3 * height / 6 -
                                                           height / 20):

                    counter.append(int(track.track_id))

                    print(int(track.track_id))

        total_count = len(set(counter))
        h, w = img.shape[0:2]
        img[0:70, 0:500] = [0, 0, 0]

        cv2.putText(img, "Total Vehicle Count: " + str(total_count), (7, 56),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.25, (255, 255, 255), 2)

        cv2.resizeWindow('output', 1024, 768)
        cv2.imshow('output', img)
        out.write(img)

        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    out.release()
    cv2.destroyAllWindows()
Beispiel #12
0
    def show_frame(self):
        global running
        _, frame = self.cap.read()
        # frame = cv2.flip(frame, 0)
        self.numframes = self.numframes + 1
        print(self.numframes)
        frame = imutils.resize(frame, width=width_screen - 300)

        print(self.pause)

        #cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Image.fromarray( obj , mode = None )
        # obj - Objeto com interface de matriz
        # mode - Modo a ser usado (será determinado a partir do tipo se None) Consulte:
        # img1=img
        # img1 = imutils.resize(img1, width=900)

        img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        img_in = tf.expand_dims(img1, 0)
        img_in = transform_images(img_in, FLAGS.size)
        # img_in = Image.fromarray(img_in);

        # original: shape=(1, 288, 288, 3)
        # simple_example(MISTURA_COM_CV2).py: shape=(1, 288, 288, 4)
        boxes, scores, classes, nums = self.yolo.predict(img_in)

        classes = classes[0]
        names = []

        for i in range(len(classes)):
            names.append(self.class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(frame, boxes[0])
        features = self.encoder(frame, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        # initialize color map
        #cmap = plt.get_cmap('tab20b')
        #colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        self.tracker.predict()
        self.tracker.update(detections)

        cont_objects_positions_id = 0
        cont_objects_positions_x_min = 0
        cont_objects_positions_y_min = 0
        cont_objects_positions_x_max = 0
        cont_objects_positions_y_max = 0

        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            #color = colors[int(track.track_id) % len(colors)]
            #color = [i * 255 for i in color]

            # Se o id do track estiver no array de ids dos jogadores selecionados o rectagulo irá ser desenhado com uma cor diferente
            if self.contain(int(track.track_id)):
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 255), 2)
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                #              (int(bbox[0]) + (len(str(track.track_id))) * 5, int(bbox[1])),
                #              (255, 0, 255), -1)
                cv2.ellipse(frame,
                            (int(bbox[0] +
                                 ((bbox[2] - bbox[0]) / 2)), int(bbox[3])),
                            (25, 4), 0, 0, 360, (255, 0, 255), 2, 15)

                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 1)
            else:
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255) , 2)
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                #              (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])),  (0, 0, 255), -1)
                cv2.ellipse(frame,
                            (int(bbox[0] +
                                 ((bbox[2] - bbox[0]) / 2)), int(bbox[3])),
                            (20, 4), 0, 0, 360, (100, 255, 100), 2, 15)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 1)

            # cada jogador selecionado irá ter o id registado no array de ids, a posicao do x no array dos x e a posicao do y no array do y
            # self.objects_positions_id.insert(cont_objects_positions_id,track.track_id)
            # self.objects_positions_x.insert(cont_objects_positions_x,int(bbox[0]))
            #self.objects_positions_y.insert(cont_objects_positions_y,int(bbox[1]))
            self.objects_positions_id[
                cont_objects_positions_id] = track.track_id
            self.objects_positions_x_min[cont_objects_positions_x_min] = int(
                bbox[0])
            self.objects_positions_y_min[cont_objects_positions_y_min] = int(
                bbox[1])
            self.objects_positions_x_max[cont_objects_positions_x_max] = int(
                bbox[2])
            self.objects_positions_y_max[cont_objects_positions_y_max] = int(
                bbox[3])

            # Incrementar contadores dos arrays para que cada posicao dos arrays coecidir com um jogador
            cont_objects_positions_id = cont_objects_positions_id + 1
            cont_objects_positions_x_min = cont_objects_positions_x_min + 1
            cont_objects_positions_y_min = cont_objects_positions_y_min + 1
            cont_objects_positions_x_max = cont_objects_positions_x_max + 1
            cont_objects_positions_y_max = cont_objects_positions_y_max + 1

        # Criacao das multiplas linhas
        def arrayLenght(array):
            cont = len(array) - 1
            while array[cont] == 0 and cont >= 0:
                cont = cont - 1
            return cont + 1

        cont_line_player1_id = 0
        cont_line_player2_id = 0
        if arrayLenght(self.line_player1) > 0:
            print(arrayLenght(self.line_player1))
            while cont_line_player1_id < arrayLenght(self.line_player1):
                player1 = 0
                player2 = 0
                cont = 0

                for n in self.objects_positions_id:
                    if int(n) == int(self.line_player1[cont_line_player1_id]
                                     ) and player1 == 0:
                        player1 = cont
                    if int(n) == int(self.line_player2[cont_line_player2_id]
                                     ) and player2 == 0:
                        player2 = cont
                    cont = cont + 1

                if self.line_player1[
                        cont_line_player1_id] == self.line_player2[
                            cont_line_player2_id]:
                    x_new_player1 = self.objects_positions_x_min[player1] + (
                        (self.objects_positions_x_max[player1] -
                         self.objects_positions_x_min[player1]) / 2)
                    x_new_player2 = self.objects_positions_x_min[player2] + (
                        (self.objects_positions_x_max[player2] -
                         self.objects_positions_x_min[player2]) / 2)

                    cv2.line(frame, (int(x_new_player1),
                                     self.objects_positions_y_max[player1]),
                             (int(x_new_player2),
                              self.objects_positions_y_max[player2]),
                             (0, 125, 255), 5)
                else:
                    x_new_player1 = self.objects_positions_x_min[player1] + (
                        (self.objects_positions_x_max[player1] -
                         self.objects_positions_x_min[player1]) / 2)
                    x_new_player2 = self.objects_positions_x_min[player2] + (
                        (self.objects_positions_x_max[player2] -
                         self.objects_positions_x_min[player2]) / 2)

                    cv2.line(frame, (int(x_new_player1),
                                     self.objects_positions_y_max[player1]),
                             (int(x_new_player2),
                              self.objects_positions_y_max[player2]),
                             (255, 255, 255), 5)
                cont_line_player1_id = cont_line_player1_id + 1
                cont_line_player2_id = cont_line_player2_id + 1

        # criacao das setas
        if self.frame_arrow_create[0] != 0:
            contador_setas = 0
            while contador_setas < arrayLenght(self.frame_arrow_create):
                start_point = (
                    int(self.coordinates_arrow_x_init[contador_setas]),
                    int(self.coordinates_arrow_y_init[contador_setas]))

                end_point = (
                    int(self.coordinates_arrow_x_final[contador_setas]),
                    int(self.coordinates_arrow_y_final[contador_setas]))

                if int(
                        self.coordinates_arrow_x_final[contador_setas]
                ) == 0 and int(
                        self.coordinates_arrow_y_final[contador_setas]) == 0:
                    end_point = (
                        int(self.coordinates_arrow_x_init[contador_setas]),
                        int(self.coordinates_arrow_y_init[contador_setas]))
                color = (0, 255, 0)
                thickness = 2
                if int(self.numframes) - int(
                        self.frame_arrow_create[contador_setas]) < 25:
                    cv2.arrowedLine(frame, start_point, end_point, color,
                                    thickness)
                contador_setas = contador_setas + 1

            # if FLAGS.output:
        # out.write(img)
        # cv2.imshow('output', img)

        # Zoom aplicado, o self.zoom irá decidir a escala aplicada no video
        scale_percent = self.zoom  # percent of original size
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)
        dim = (width, height)
        # resize image
        img = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(
            img
        )  # Cria uma memória de imagem de um objeto que exporta a interface da matriz
        imgtk = ImageTk.PhotoImage(
            image=img
        )  # usada para exibir imagens (em escala de cinza ou em cores verdadeiras) em rótulos, botões, telas e widgets de texto
        self.lmain.imgtk = imgtk
        self.lmain.configure(image=imgtk)

        key = cv2.waitKey(1)

        self.lmain.bind('<Leave>', self.exit_)
        self.lmain.bind(
            '<Button-1>', self.motion
        )  # quando alguem clica na tela de jogo o irá imediatamente assionar a funcao self.motion

        if not self.pause:
            self.lmain.after(5, self.show_frame)
Beispiel #13
0
def nayanam(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    yolo = YoloV3(classes=80)

    yolo.load_weights(PATH_TO_WEIGHTS)
    print('weights loaded')

    class_names = [c.strip() for c in open(PATH_TO_CLASSES).readlines()]
    print('classes loaded')

    out = None
    fps = 0.0
    count = 0

    vid = cv2.VideoCapture(RTSP_URL)
    while (vid.isOpened()):
        try:
            _, img = vid.read()

        except:
            print("Empty frame")
            continue
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []

        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])

        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
        # running NMS
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        # Deepsort tracker called here
        tracker.predict()
        tracker.update(detections)
        #dump file set here
        # file = open(PATH_TO_RESULTS,'a+')
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            if VIDEO_DEBUG == 1:
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            s = str(track.track_id) + ',' + class_name + ',' + str(int(
                bbox[0])) + ',' + str(int(bbox[1])) + '\n'
            # file.write(s)
            print(s)
        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps = ", fps)
        # file.close()
        if VIDEO_DEBUG == 1:
            cv2.imshow('output', img)
            if cv2.waitKey(1) == 27:
                break
        signal.signal(signal.SIGINT, user_exit)
    vid.release()
    if VIDEO_DEBUG == 1:
        cv2.destroyAllWindows()
Beispiel #14
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        #원래 코드
        #vid = cv2.VideoCapture(int(FLAGS.video))
        #다음 팟플레이어
        #vid = cv2.VideoCapture('rtsp://172.20.10.4:8554/test')
        vid = cv2.VideoCapture('rtsp://192.168.0.28:8554/test')

        #연결x
        #os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'protocol_whitelist;file,rtp,udp'
        #vid = cv2.VideoCapture('C:/Users/Jiwon/Desktop/yolov3_deepsort-master/stream.sdp')
        #vid = cv2.VideoCapture(
        #'udpsrc port=8400 caps=application/x-rtp,media=(string)video,clock-rate=(int)9000,encoding-name=(string)H264,payload=(int)96!rtph264depay!decodebin!videoconvert!appsink',
        #cv2.CAP_GSTREAMER)
        #vid = cv2.VideoCapture("rtspsrc location=rtsp://192.168.0.25/main latency=30 ! decodebin ! nvvidconv ! appsink")
        #vid = cv2.VideoCapture('udp://@:5000')
        #vid =  cv2.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtph264depay ! avdec_h264 ! appsink', cv2.CAP_GSTREAMER)
        #vid = cv2.VideoCapture(1)

    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of qint
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1
    #확인을 위한 코드
    f_cnt = 0
    redetect = False

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)

        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            if class_name == "person":
                if int(track.track_id) == 1:
                    cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
                    cv2.rectangle(
                        img, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), (0, 255, 0), -1)
                    cv2.putText(img, class_name + "-" + str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (0, 0, 0), 2)

                    #if(MQTT초기 확인 값이면 저장)k 여기 코드 수정하셈
                    #img_user = img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])]
                    img_user = img[int(bbox[1]):int(bbox[1]) + int(bbox[3]),
                                   int(bbox[0]):int(bbox[0]) + int(bbox[2]) -
                                   10]
                    cv2.imwrite(
                        'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png',
                        img_user)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        f_cnt += 1
        print("False")
        if f_cnt > 10:
            redetect = True
            f_cnt = 0
        #"""
        if redetect:  # https://opencv-python.readthedocs.io/en/latest/doc/24.imageTemplateMatch/imageTemplateMatch.html
            _, img = vid.read()
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            template = cv2.imread(
                'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png',
                0)
            w, h = template.shape[::1]  #template 이미지의 가로와 세로

            res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            top_left = min_loc
            bottom_right = (top_left[0] + w, top_left[1] + h)
            cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 1)
            print("TRUE")
        #   """
        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(
            physical_devices[0], True)  ###################################

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    inDanger = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        inDanger = 0

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            #If a person is found calculate the distance from it to all other objects
            human = "human"
            if class_name == human:
                mainIndex = tracker.tracks.index(track)
                nonhuman_index = []
                mainCenterPoint = (int((int(bbox[2]) - int(bbox[0])) / 2 +
                                       int(bbox[0])),
                                   int((int(bbox[3]) - int(bbox[1])) / 2 +
                                       int(bbox[1])))
                centerPoints = []
                lengths = []
                diagonals = []
                for track in tracker.tracks:
                    class_type = track.get_class()
                    bbox = track.to_tlbr()
                    #diagonal = math.sqrt((int(bbox[0]) - int(bbox[2]))**2 + (int(bbox[1]) - int(bbox[3]))**2)

                    centerPoint = (int((int(bbox[2]) - int(bbox[0])) / 2 +
                                       int(bbox[0])),
                                   int((int(bbox[3]) - int(bbox[1])) / 2 +
                                       int(bbox[1])))
                    length = math.sqrt(
                        ((mainCenterPoint[0] - centerPoint[0]))**2 +
                        (mainCenterPoint[1] - centerPoint[1])**2)
                    diagonal = abs(int(bbox[0]) - int(bbox[2]))
                    diagonals.append(diagonal)
                    centerPoints.append(centerPoint)
                    lengths.append(length)

                    if class_type != human:
                        nonhuman_index.append(tracker.tracks.index(track))

                #Normalizing radii using the diagonal length of each bbox
                if not (diagonals == []):
                    cp_diagonals = list(
                        diagonals
                    )  #creamos una copia del arreglo de diagonales
                    cp_diagonals.sort()
                    normDiag = cp_diagonals[-1]
                else:
                    normDiag = 0

                if normDiag > 0 and not (diagonals == []):
                    max_radius = normDiag / 2
                    normalizedDiags = [i / normDiag for i in diagonals]
                    radii = [i * max_radius for i in normalizedDiags]
                else:
                    normalizedDiags = [i * 0 for i in diagonals]
                    radii = normalizedDiags

                for track in nonhuman_index:
                    if lengths[track] < 150:
                        inDanger += 1
                        break

                if (radii != [] and centerPoints != [] and lengths != []):
                    #print(not radii == [] and not centerPoints == [] and not lengths == [])
                    #for track in range(0,len(tracker.tracks)):
                    for track in nonhuman_index:
                        try:
                            if lengths[track] <= 150 and track != mainIndex:
                                cv2.line(img, mainCenterPoint,
                                         centerPoints[track], (255, 0, 0), 1)
                                cv2.circle(img, mainCenterPoint,
                                           int(radii[mainIndex]), (0, 0, 255),
                                           2)
                                cv2.circle(img, centerPoints[track],
                                           int(radii[track]), (0, 255, 0), 2)
                        except:
                            continue

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.putText(img, "People in DANGER: {}".format(inDanger), (0, 60),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort see github deep sort for more information
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    """
    A nearest neighbor distance metric that, for each target, returns
    the closest distance to any sample that has been observed so far.
    """
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)

    # multi target tracker
    tracker = Tracker(metric)

    # Return an identifiable list of physical devices visible to the host runtime
    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        # enable memory growth for physical devices

    # utilised to identify type of YoloV3 used
    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    # load pre-trained weights
    # pre-trained from open sources, many from public repos on github.
    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    # array contains name of classes (flags)
    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    # capture a video from the camera or a video file, files for our demonstrations.
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    # output video is empty
    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    _, img = vid.read()
    h, w, c = img.shape
    h_numStep = 12
    # number of boxes in a column
    w_numStep = 20
    # number of boxes in a row

    #make matrix-array M of categories of different areas 1=food area, etc.
    M = [[1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5],
         [1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5],
         [1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 8, 8],
         [2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 8, 8, 8, 8],
         [2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8],
         [2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 7, 7],
         [2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7],
         [2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 7, 7, 7, 7],
         [2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7],
         [2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 7, 7, 7, 7, 7],
         [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7],
         [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7]]

    # store the total time that customers stay in box[i][j]
    total_time_engage = [[0 for i in range(w_numStep + 1)]
                         for j in range(h_numStep + 1)]

    # store the time that customer k is stationary in box[i][j]
    stationary_time = [[[0 for i in range(w_numStep + 1)]
                        for j in range(h_numStep + 1)] for k in range(100000)]

    # store the positions of single customer
    x_single_tracking = []
    y_single_tracking = []
    # single customer's trackingID
    single_trackingID = 34

    # store the current position of customer
    max_trackID = 0
    x_trackID = [-1] * 1000000
    y_trackID = [-1] * 1000000

    # file store the total_time_engage
    file = 'total_time_engage.txt'

    fps = 0.0
    count = 0
    while True:

        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        # convert an image from one color space to another
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # return a tensor with a length 1 axis inserted at index 0
        img_in = tf.expand_dims(img_in, 0)

        # resize the image to 416x416
        # remember resolution has to be able to work with it
        # tensorflow.image.resize: resize image to size
        img_in = transform_images(img_in, FLAGS.size)

        # return the number of seconds passed since epoch
        t1 = time.time()
        time_finish_last_tracking = t1

        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        # detections
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Pass detections to the deepsort object and obtain the track information
        # predicts and updates via detection
        tracker.predict()
        tracker.update(detections)

        # draw horizontal boxes
        y_step = int(h / h_numStep)
        y_start = 0
        while True:
            y_end = y_start + y_step
            cv2.rectangle(img, (0, y_start), (int(w), y_end), (0, 0, 0), 1)
            y_start = y_end
            if y_start >= int(h):
                break  # finish drawing here

        # draw vertical boxes
        x_step = int(w / w_numStep)
        x_start = 0
        while True:
            x_end = x_start + x_step
            cv2.rectangle(img, (x_start, 0), (x_end, int(h)), (0, 0, 0), 1)
            x_start = x_end
            if x_start >= int(w):
                break  # finish drawing here

        time_step = time.time() - time_finish_last_tracking
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()  # get the corrected/predicted bounding box
            class_name = track.get_class(
            )  # get the class name of particular object
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            # identify center of a boundary box
            x_cent = int(bbox[0] + (bbox[2] - bbox[0]) / 2)
            y_cent = int(bbox[1] + (bbox[3] - bbox[1]) / 2)

            # draw detection on frame
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color,
                          2)  # draw rectangle
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)  # insert objectName and objectID

            # display the area each person is in
            # cv
            # update the stationary_time and total_time_engage array
            if class_name == "person":
                x_pos = int(x_cent / x_step)
                y_pos = int(y_cent / y_step)
                #print(str(track.track_id) + ": [" + str(y_pos) + ", " + str(x_pos) + "]")
                if track.track_id > max_trackID:
                    max_trackID = track.track_id
                x_trackID[track.track_id] = y_pos
                y_trackID[track.track_id] = x_pos
                stationary_time[track.track_id][y_pos][x_pos] += time_step
                total_time_engage[y_pos][x_pos] += time_step

            # track a single person
            if class_name == "person" and track.track_id == single_trackingID:
                x_single_tracking.append(x_pos)
                y_single_tracking.append(y_pos)

        for track_index in range(max_trackID + 1):
            if x_trackID[track_index] != -1:
                print("customerID " + str(track_index) + ": [" +
                      str(x_trackID[track_index]) + "," +
                      str(y_trackID[track_index]) + "] in " + market_section(M[
                          x_trackID[track_index]][y_trackID[track_index]]))

        with open(file, 'w') as filetostore:
            for i in range(h_numStep):
                for j in range(w_numStep):
                    filetostore.write(
                        "{:.2f}".format(total_time_engage[i][j]) + " ")
                filetostore.write("\n")

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
        time_finish_last_tracking = time.time()

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break

    f = open("total_time_engage.txt", "rt")
    f.close()

    # insert data into the database

    # initialise track arrays
    track_time = [0] * 10000000
    track_customerID = [0] * 10000000
    track_area = ["" for x in range(10000000)]
    x_single = [0] * 10000000
    y_single = [0] * 10000000

    # organise data to be inserted
    track_index = -1
    for k in range(1000):
        for h in range(h_numStep):
            for w in range(w_numStep):
                if stationary_time[k][h][w] != 0:
                    track_index += 1
                    track_time[track_index] = stationary_time[k][h][w]
                    track_customerID[track_index] = k
                    track_area[track_index] = str(h) + ', ' + str(w)
    x_tmp = -1
    y_tmp = -1
    single_track_index = -1
    for k in range(len(x_single_tracking)):
        if x_single_tracking[k] != x_tmp and y_single_tracking[k] != y_tmp:
            single_track_index += 1
            x_single[single_track_index] = x_single_tracking[k]
            y_single[single_track_index] = y_single_tracking[k]
            x_tmp = x_single[single_track_index]
            y_tmp = y_single[single_track_index]
    single_tracking_areas = ""
    for k in range(single_track_index):
        single_tracking_areas += '[' + str(x_single[k]) + ',' + str(
            y_single[k]) + '] , '

    # connect and insert the appropriate data in primary_table
    for k in range(track_index + 1):
        try:
            conn = mariadb.connect(user="******",
                                   password="******",
                                   host="localhost",
                                   database="trackingDB")

            cur = conn.cursor()
            mySql_insert_query = """INSERT INTO primary_table(trackID, customerID, area) 
                                    VALUES (%s, %s, %s) """

            recordTuple = (k, track_customerID[k], track_area[k])
            cur.execute(mySql_insert_query, recordTuple)
            conn.commit()

        except mariadb.Error as error:
            print("Failed to insert record into the primary_table {}".format(
                error))
        finally:
            if (conn.is_connected()):
                cur.close()
                conn.close()

    # connect and insert the appropriate data in "engaged" table
    for k in range(track_index + 1):
        try:
            conn = mariadb.connect(user="******",
                                   password="******",
                                   host="localhost",
                                   database="trackingDB")

            cur = conn.cursor()
            mySql_insert_query = """INSERT INTO engaged(trackID, engagement_time) 
                                    VALUES (%s, %s) """

            recordTuple = (k, track_time[k])
            cur.execute(mySql_insert_query, recordTuple)
            conn.commit()

        except mariadb.Error as error:
            print("Failed to insert record into the engaged table {}".format(
                error))
        finally:
            if (conn.is_connected()):
                cur.close()
                conn.close()

    # connect and insert the appropriate data in "total_areas" table
    try:
        conn = mariadb.connect(user="******",
                               password="******",
                               host="localhost",
                               database="trackingDB")

        cur = conn.cursor()
        mySql_insert_query = """INSERT INTO total_areas(customerID, all_areas_visited) 
                                    VALUES (%s, %s) """

        recordTuple = (single_trackingID, single_tracking_areas)
        cur.execute(mySql_insert_query, recordTuple)
        conn.commit()

    except mariadb.Error as error:
        print("Failed to insert record into the total_areas table {}".format(
            error))
    finally:
        if (conn.is_connected()):
            cur.close()
            conn.close()

    # plot the graph
    fig = plt.figure(1)
    fig.suptitle('Engagement time on different areas', fontsize=20)
    ax = plt.axes(projection='3d')
    ax = plt.axes(projection='3d')

    # Data for a three-dimensional line
    x = np.arange(w_numStep - 1, -1, -1)
    y = np.linspace(0, h_numStep - 1, h_numStep)
    X, Y = np.meshgrid(x, y)
    Z = [[0 for j in range(w_numStep)] for i in range(h_numStep)]
    for i in range(h_numStep):
        for j in range(w_numStep):
            Z[i][j] = total_time_engage[i][j]
    Z = np.array(Z)

    # Plot the surface.
    ax.plot_surface(X,
                    Y,
                    Z,
                    rstride=1,
                    cstride=1,
                    cmap='viridis',
                    edgecolor='none')
    ax.set_xlabel('width')
    ax.set_ylabel('height')
    ax.set_zlabel('time')

    ax.view_init(35, 80)
    #gets the polar axis on the current image
    frame = plt.gca()
    #gets x and y axis list of x and y axis tick locations
    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])
    #Plots the figure
    fig2 = plt.figure(2)
    fig2_title = 'Walking pattern of a single customer( trackingID = ' + str(
        single_trackingID) + ')'
    fig2.suptitle(fig2_title, fontsize=15)
    plt.plot(x_single_tracking, y_single_tracking, 'ro')
    plt.axis([0, w_numStep, h_numStep, 0])

    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])

    fig.savefig('engage_level.jpg')
    fig2.savefig('single_tracking.jpg')
    plt.show()

    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
def main(_argv):
    region = load_ROI()


    # Definition of the parameters
    max_cosine_distance = 0.3  #Default = 0.5
    nn_budget = None
    nms_max_overlap = 0.8      #Default = 0.5 

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    video_name = os.path.splitext(FLAGS.video)[-2]

    weights = 'weights/yolov3_sang.tf'
    yolo.load_weights(weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')


    #WRITE RESULT
    
    result = "tracking_result/{}_track.txt".format(video_name)
    file_out = open(result,'w')
    path = os.getcwd()
    path = str(os.path.split(os.path.split(path)[0])[0])
    #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name))
    vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name))
    vid = cv2.VideoCapture(vid_path)
    out = None

    
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    
    frame_index = -1 
    
    fps = 0.0
    count = 0 
    while True:
        _, img = vid.read()

        if img is None:
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)    
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
        
        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]        

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        frame_index = frame_index + 1
        if frame_index % 100 == 0: 
            print('FRAME: ',frame_index)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            x_cen = int((int(bbox[2]) + int(bbox[0]))/2)
            y_cen = int((int(bbox[3]) + int(bbox[1]))/2)

            if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False:  #NGOAI ROI THI XOA
                track.delete_track()

            cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2)
            
            #GHI FILE TRACKING_RESULT theo chuan CountMovement
            bb_width = int(bbox[2]) - int(bbox[0])
            bb_height = int(bbox[3]) - int(bbox[1])
            diagonal = math.sqrt(bb_height**2 + bb_width**2)
            file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height))

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        for det in detections:
            bbox = det.to_tlbr() 
            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1)
        
        # print fps on screen 
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
Beispiel #18
0
def main(_argv):
    # set present path
    home = os.getcwd()

    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    # model_filename = 'weights/mars-small128.pb'
    model_filename = os.path.join(home, "weights", "arcface_weights.h5")
    encoder = gdet.create_box_encoder(model_filename, batch_size=128)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    # Database 생성
    face_db = dict()

    db_path = FLAGS.database
    for name in os.listdir(db_path):
        name_path = os.path.join(db_path, name)
        name_db = []
        for i in os.listdir(name_path):
            if i.split(".")[1] != "jpg": continue
            id_path = os.path.join(name_path, i)
            img = cv2.imread(id_path)
            # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # img_in = tf.expand_dims(img_in, 0)
            # img_in = transform_images(img_in, FLAGS.size)
            # boxes, scores, classes, nums = yolo.predict(img_in)
            boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]])
            scores = np.asarray([[1]])
            converted_boxes = convert_boxes(img, boxes, scores)
            features = encoder(img, converted_boxes)

            if features.shape[0] == 0: continue

            for f in range(features.shape[0]):
                name_db.append(features[f, :])
        name_db = np.asarray(name_db)
        face_db[name] = dict({"used": False, "db": name_db})

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0

    detection_list = []

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)

        # print(boxes, scores, classes, nums)
        # time.sleep(5)
        t2 = time.time()
        times.append(t2 - t1)
        print(f'yolo predict time : {t2-t1}')
        times = times[-20:]

        t3 = time.time()
        #############
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0], scores[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        t4 = time.time()
        print(f'feature generation time : {t4-t3}')

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        t5 = time.time()
        # Call the tracker
        tracker.predict()
        # tracker.update(detections)
        tracker.update(detections, face_db, FLAGS.max_face_threshold)
        t6 = time.time()
        print(f'tracking time : {t6-t5}')

        frame_index = frame_index + 1
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            face_name = track.get_face_name()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id)) +
                            len(str(face_name))) * 23, int(bbox[1])), color,
                          -1)
            # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            cv2.putText(
                img, class_name + "-" + str(track.track_id) + "-" + face_name,
                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            # print(class_name + "-" + str(track.track_id))

            # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
            if face_name != "":
                detection_list.append(
                    dict({
                        "frame_no": str(frame_index),
                        "id": str(face_name),
                        "x": str(int(bbox[0])),
                        "y": str(int(bbox[1])),
                        "width": str(int(bbox[2]) - int(bbox[0])),
                        "height": str(int(bbox[3]) - int(bbox[1]))
                    }))
        #######
        fps = (fps + (1. / (time.time() - t1))) / 2
        # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
        #                   cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
        if FLAGS.output:
            out.write(img)
            # frame_index = frame_index + 1
            # list_file.write(str(frame_index)+' ')
            # if len(converted_boxes) != 0:
            #     for i in range(0,len(converted_boxes)):
            #         list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
            # list_file.write('\n')
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()

    frame_list = sorted(detection_list,
                        key=lambda x: (int(x["frame_no"]), int(x["id"])))
    # pprint.pprint(frame_list)

    f = open(FLAGS.eval, "w")
    for a in frame_list:
        f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] +
                " " + a["width"] + " " + a["height"] + "\n")
    # 파일 닫기
    f.close()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, max_age=40)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        objects = 0

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            bbox = track.to_tlbr()
            class_name = track.get_class()

            if (FLAGS.class_1 == 'all'):
                objects += 1
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    img, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(img, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)
            elif (FLAGS.class_1 != 'all'):
                if (class_name == FLAGS.class_1):
                    objects += 1
                    color = colors[int(track.track_id) % len(colors)]
                    color = [i * 255 for i in color]
                    cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        img, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(img, class_name + "-" + str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (255, 255, 255), 2)

    # print("Objetos filtrados:{}".format(objects))
    # print N_objects on screen
        cv2.putText(img, "# Objetos: {}".format(objects), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        # print fps on screen
        #fps  = ( fps + (1./(time.time()-t1)) ) / 2
        #cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
        #                  cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
    #    # img_in = cv2.imread(img_filename, cv2.COLOR_BGR2RGB)
    #    img_in = cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0)
    img_in = transform_images(img_in, 416)

    t1 = time.time()

    boxes, scores, classes, nums = yolo.predict(img_in)

    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    names = np.array(names)
    converted_boxes = convert_boxes(
        img, boxes[0]
    )  #transformation from relative x1/xsize, y1/ysize... to x,y,w,h
    features = encoder(img, converted_boxes)

    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      converted_boxes, scores[0], names, features)
    ]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]
Beispiel #21
0
    def run(self, img):
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            return

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = self.yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(self.class_names[int(classes[i])])

        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = self.encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        self.tracker.predict()
        self.tracker.update(detections)

        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                return
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            self.last_tracked.append(
                (track.track_id, track.get_class(), int(bbox[0]), int(bbox[1]),
                 int(bbox[2]), int(bbox[3])))
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        self.fps = (self.fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(self.fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
Beispiel #22
0
def main():

    class_names = [
        c.strip() for c in open('./data/labels/coco.names').readlines()
    ]
    yolo = YoloV3(classes=len(class_names))
    yolo.load_weights('./weights/yolov3.tf')

    imageHub = imagezmq.ImageHub()

    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8

    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric('cosine',
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    #vid = cv2.VideoCapture('./data/video/traffic1.mkv')
    #vid = cv2.VideoCapture("video.webm")
    #vid = VideoCaptureAsync("video.webm")
    #vid = vid.start()

    codec = cv2.VideoWriter_fourcc(*'XVID')
    #vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
    #vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height))
    out = cv2.VideoWriter('./data/video/results.avi', codec, 20, (480, 480))

    from collections import deque
    pts = [deque(maxlen=30) for _ in range(1000)]

    counter = []

    while True:

        #_, img = vid.read()

        (rpiName, img) = imageHub.recv_image()
        imageHub.send_reply(b'OK')

        if img is None:
            print('Completed')
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)

        t1 = time.time()

        boxes, scores, classes, nums = yolo.predict(img_in)

        classes = classes[0]
        names = []

        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])

        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        tracker.predict()
        tracker.update(detections)

        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        #current_count = int(0)

        for track in tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)

            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            pts[track.track_id].append(center)

            for j in range(1, len(pts[track.track_id])):

                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(img, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), color, thickness)

            height, width, _ = img.shape
            #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
            #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

            center_y = int(((bbox[1]) + (bbox[3])) / 2)

            if center_y <= int(3 * height / 6 + height /
                               20) and center_y >= int(3 * height / 6 -
                                                       height / 20):
                if class_name == 'car' or class_name == 'truck' or class_name == 'person':
                    counter.append(int(track.track_id))
                    #current_count += 1

        total_count = len(set(counter))
        #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2)
        cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0, 130),
                    0, 1, (0, 0, 255), 2)

        fps = 1. / (time.time() - t1)
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 0, 1, (0, 0, 255),
                    2)
        #cv2.resizeWindow('output', 1024, 768)
        cv2.imshow('output', img)
        out.write(img)

        if cv2.waitKey(1) == ord('q'):
            break

    #vid.release()
    out.release()
    cv2.destroyAllWindows()
	boxes, scores, classes, nums = yolo.predict(img_in) #pass images and predict
	# The yolo predictions return numpy empty arrays, includes the boxes, scores, classes and nums, the boxes per images are limited

	# boxes, 3D shape (1, 100, 4)
	# scores, 2D shape (1, 100)
	# classes, 2D shape (1, 100)
	# nums, 1D shape (1,)

	classes = classes[0]

	names = []
	for i in range(len(classes)):
		names.append(class_names[int(classes[i])])
	names = np.array(names)
	converted_boxes = convert_boxes(img, boxes[0])
	features = encoder(img, converted_boxes) #generate the features vector
	
	detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]

	cmap =  plt.get_cmap('tab20b')
	colors = [cmap(i)[:3] for i in np.linspace(0,1,20)]

 # run non-maxima suppresion
	boxs = np.array([d.tlwh for d in detections])
	scores = np.array([d.confidence for d in detections])
	classes = np.array([d.class_name for d in detections])
	indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,scores) #witch boxes thas be gotten
	detections = [detections[i] for i in indices] # ready for deep_sort
	
	tracker.predict()
Beispiel #24
0
    def main(self, _argv):
        if FLAGS.tiny:
            yolo = YoloV3Tiny(classes=FLAGS.num_classes)
        else:
            yolo = YoloV3(classes=FLAGS.num_classes)

        yolo.load_weights(FLAGS.weights)
        logging.info('weights loaded')

        class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
        logging.info('classes loaded')

        try:
            vid = cv2.VideoCapture(int(FLAGS.video))
        except:
            vid = cv2.VideoCapture(FLAGS.video)

        out = None

        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.y_axis = height + 1
        if FLAGS.output:
            # by default VideoCapture returns float instead of int
            fps = int(vid.get(cv2.CAP_PROP_FPS))
            codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
            out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
            list_file = open('detection.txt', 'w')
            frame_index = -1

        fps = 0.0
        count = 0

        cv2.namedWindow('HawkEye')
        cv2.setMouseCallback('HawkEye', self.mouse_callback)

        while True:
            _, img = vid.read()

            if img is None:
                logging.warning("Empty Frame")
                time.sleep(0.1)
                count += 1
                if count < 3:
                    continue
                else:
                    break

            if self.y_axis < height:
                cv2.line(img, (0, self.y_axis),
                         (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), self.y_axis),
                         (255, 0, 0), 3)

            img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_in = tf.expand_dims(img_in, 0)
            img_in = transform_images(img_in, FLAGS.size)

            t1 = time.time()
            boxes, scores, classes, nums = yolo.predict(img_in)

            classes = classes[0]
            names = []
            for i in range(len(classes)):
                names.append(class_names[int(classes[i])])
            names = np.array(names)
            converted_boxes = convert_boxes(img, boxes[0])
            features = self.encoder(img, converted_boxes)
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    converted_boxes, scores[0], names, features)
            ]

            # initialize color map
            cmap = plt.get_cmap('tab20b')
            colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

            # run non-maxima suppresion
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(
                boxs, classes, self.nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            self.tracker.predict()
            self.tracker.update(detections)
            for track in self.tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                class_name = track.get_class()
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                present_x, present_y, w, h = track.to_xywh()
                present_size = int(w * h)

                if self.y_axis <= present_y:
                    if track.size < present_size and track.y_axis < self.y_axis:
                        label = 'coming'
                    else:
                        label = 'warning'
                else:
                    label = ''
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.putText(img, label, (int(bbox[0]), int(bbox[1] - 10)),
                            cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2)

            # print fps on screen
            fps = (fps + (1. / (time.time() - t1))) / 2
            cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
            cv2.imshow('HawkEye', img)
            if FLAGS.output:
                out.write(img)
                frame_index = frame_index + 1
                list_file.write(str(frame_index) + ' ')
                if len(converted_boxes) != 0:
                    for i in range(0, len(converted_boxes)):
                        list_file.write(
                            str(converted_boxes[i][0]) + ' ' +
                            str(converted_boxes[i][1]) + ' ' +
                            str(converted_boxes[i][2]) + ' ' +
                            str(converted_boxes[i][3]) + ' ')
                list_file.write('\n')

            # press q to quit
            if cv2.waitKey(1) == ord('q'):
                break
        vid.release()
        if FLAGS.output:
            out.release()
            list_file.close()
        cv2.destroyAllWindows()