예제 #1
0
 def initTrack(self):
     if (self.track):
         self.progressBar.setValue(20)
         if self.options["track"]:
             if self.options["tracker"] == "deep_sort":
                 from deep_sort import generate_detections
                 from deep_sort.deep_sort import nn_matching
                 from deep_sort.deep_sort.tracker import Tracker
                 self.progressBar.setValue(50)
                 metric = nn_matching.NearestNeighborDistanceMetric(
                     "cosine", 0.2, 100)
                 self.tracker = Tracker(metric)
                 self.encoder = generate_detections.create_box_encoder(
                     os.path.abspath(
                         "deep_sort/resources/networks/mars-small128.ckpt-68577"
                     ))
             elif self.options["tracker"] == "sort":
                 from sort.sort import Sort
                 self.encoder = None
                 self.tracker = Sort()
                 self.progressBar.setValue(50)
         if self.options["BK_MOG"] and self.options["track"]:
             fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()
         self.progressBar.setValue(60)
         self.initTFNet()
     else:
         self.initTFNet()
예제 #2
0
파일: guiPy.py 프로젝트: grh1cob/Deepsense
def gui(self):
    source = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo
    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()

    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    App(tkinter.Tk(), "Tkinter and OpenCV", 0, tracker, encoder)
예제 #3
0
    def __init__(self, classes, tracker='sort'):
        self.ttype = tracker
        self.classes = classes
        if tracker == 'deep_sort':
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker

            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)  #param
            self.nms_max_overlap = 0.1  #param
            model_path = os.path.join(WORK_DIR, MODEL_DIR,
                                      "mars-small128.ckpt-68577")
            self.encoder = generate_detections.create_box_encoder(model_path)
            self.tracker = Tracker(metric)

            from deep_sort.application_util import preprocessing as prep
            from deep_sort.deep_sort.detection import Detection
            self.prep = prep
            self.Detection = Detection

        elif tracker == 'sort':
            from sort.sort import Sort
            self.tracker = Sort()

        self.trackers = {}
예제 #4
0
def main():
	global tracker
	global encoder
	global msg
	msg = IntList()
	max_cosine_distance = 0.2
	nn_budget = 100
	metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
	tracker = Tracker(metric)
	model_filename = "/home/ilyas/darknetros_sort/src/sort_track/src/deep_sort/model_data/mars-small128.pb" #Change it to your directory
	encoder = gdet.create_box_encoder(model_filename)
	#Initialize ROS node
	rospy.init_node('sort_tracker', anonymous=True)
	rate = rospy.Rate(10)
	# Get the parameters
	(camera_topic, detection_topic, tracker_topic) = get_parameters()
	#Subscribe to image topic
	image_sub = rospy.Subscriber(camera_topic,Image,callback_image)
	#Subscribe to darknet_ros to get BoundingBoxes from YOLOv3
	sub_detection = rospy.Subscriber(detection_topic, BoundingBoxes , callback_det)
	while not rospy.is_shutdown():
		#Publish results of object tracking
		pub_trackers = rospy.Publisher(tracker_topic, IntList, queue_size=10)
		print(msg)
		pub_trackers.publish(msg)
		rate.sleep()
예제 #5
0
    def __init__(self):

        self.metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", 0.2, 100)
        self.tracker = Tracker(self.metric)
        self.encoder = generate_detections.create_box_encoder(
            os.path.abspath(
                "deep_sort/resources/networks/mars-small128.ckpt-68577"))
예제 #6
0
 def __init__(self, encoderPath, applyMask=False):
     self.metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", 0.9, 100)
     self.tracker = Tracker(self.metric,
                            max_iou_distance=0.9,
                            max_age=50,
                            n_init=3,
                            _lambda=0.3)
     self.encoder = generate_detections.create_box_encoder(
         encoderPath, applyMask=applyMask)
     self.applyMask = applyMask
    def __init__(self, args):
        self.tfnet = None
        self.image_publisher = rospy.Publisher(args.output, Image, queue_size=1)
        self.subscriber = rospy.Subscriber(args.input, Image, self.callback, queue_size = 1, buff_size=2**24)
        self.subscriber = rospy.Subscriber("/cctv_info", String, self.exit)

        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", 0.2, 100)
        self.tracker = Tracker(metric)
        self.encoder = generate_detections.create_box_encoder(
            os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577"))

        options = {"model": "darkflow/cfg/yolo.cfg", "load": "darkflow/bin/yolo.weights", "threshold": 0.1,
                   "track": True, "trackObj": ["person"], "BK_MOG": True, "tracker": "deep_sort", "csv": False}

        self.tfnet = TFNet(options)
예제 #8
0
 def __init__(self,
              model_path,
              matching_threshold=.5,
              max_iou_distance=0.7,
              max_age=60,
              n_init=5):
     print(matching_threshold)
     print(max_iou_distance)
     print(max_age)
     print(n_init)
     self.encoder = generate_detections.create_box_encoder(
         os.path.abspath(model_path))
     self.metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", matching_threshold)
     self.tracker = Tracker(self.metric, max_iou_distance, max_age, n_init)
     self.detections = []
     self.trackingPerson = None
예제 #9
0
 def __init__(self):
     self.YOLO_Classes = utils.read_class_names(cfg.YOLO.CLASSES)
     self.key_list = list(self.YOLO_Classes.keys())
     self.val_list = list(self.YOLO_Classes.values())
     self.deepSORT_model = CONFIG.deepSORT_model
     self.encoder = gdet.create_box_encoder(self.deepSORT_model,
                                            batch_size=1)
     self.max_cosine_distance = 0.7
     self.nn_budget = None
     self.metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", self.max_cosine_distance, self.nn_budget)
     self.tracker = Tracker(self.metric)
     self.track_only = [
         "car", "truck", "motorbike", "bus", "bicycle", "person"
     ]
     self.memory = {}
     self.already_counted = deque(maxlen=50)
예제 #10
0
 def __init__(self, id, device, config, log):
     self.id = id
     self.log = log
     self.frame = []
     self.device = device
     self.config = config
     self.cnt = 0
     self.cams = []
     self.img_size = config['img_size']
     self.nms_max_overlap = config['nms_max_overlap']
     self.body_min_w = config['body_min_w']
     self.conf_thres = config['conf_thres']
     self.nms_thres = config['nms_thres']
     self.max_hum_w = int(self.img_size / 2)
     self.detector = Darknet(self.config['model_def'],
                             img_size=self.config['img_size']).to(
                                 self.device)
     self.detector.load_darknet_weights(self.config['weights_path'])
     self.encoder = gdet.create_box_encoder(
         self.config['model_filename'],
         batch_size=self.config['batch_size'])
     self._stopevent = threading.Event()
     print("created ok")
     threading.Thread.__init__(self)
예제 #11
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        XYSCALE = cfg.YOLO.XYSCALE_TINY
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        XYSCALE = cfg.YOLO.XYSCALE
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)

    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)
    NUM_CLASSES = len(CLASSES)
    input_size = FLAGS.size
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    times = []
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES)
            else:
                feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASSES, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASSES)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASSES, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASSES)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASSES, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) -
                                            1] == "weights":
                    utils.load_weights(model, FLAGS.weights)
                else:
                    model.load_weights(FLAGS.weights).expect_partial()
        model.summary()
    elif FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    max_cosine_distance = 0.7  # 0.5 / 0.7
    nn_budget = None
    model_filename = './weights/tracker/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    key_list = list(CLASSES.keys())
    val_list = list(CLASSES.values())
    Track_only = []

    logging.info("Models loaded!")
    while True:
        return_value, frame = vid.read()
        if not return_value:
            logging.warning("Empty Frame")
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_size = frame.shape[:2]

        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        elif FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
        elif FLAGS.framework == 'trt':
            batched_input = tf.constant(image_data)
            pred_bbox = []
            result = infer(batched_input)
            for _, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                                XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.5)  # 0.25
        bboxes = utils.nms(bboxes, 0.5, method='nms')  # 0.213

        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and CLASSES[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(CLASSES[int(bbox[5])])

        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        tracker.predict()
        tracker.update(detections)

        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  # 1 / 5
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            tracking_id = track.track_id
            index = key_list[val_list.index(class_name)]
            tracked_bboxes.append(bbox.tolist() + [tracking_id, index])

        image = utils.draw_bbox(frame,
                                tracked_bboxes,
                                classes=CLASSES,
                                tracking=True)

        image = cv2.putText(
            image,
            "Time: {:.2f}ms".format(sum(times) / len(times) * 1000),
            (0, 36),  # 24
            cv2.FONT_HERSHEY_SIMPLEX,
            1.5,
            (0, 0, 255),
            2)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("Detections", image)
        if FLAGS.output:
            out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
예제 #12
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    ppe_input_size = FLAGS.ppe_size
    helmet_input_size = FLAGS.helmet_size
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    times = []
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    ppe_detector = create_ppe_detector(ppe_input_size)
    helmet_detector = create_helmet_detector(helmet_input_size)

    nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg")
    nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg")
    nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg")

    nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0]
    nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0]
    nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0]

    known_face_encodings = [
        nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3
    ]
    known_face_names = ["Nacho", "Nacho", "Nacho"]
    face_locations = []
    face_encodings = []
    face_names = []

    max_cosine_distance = 0.7  # 0.5 / 0.7
    nn_budget = None
    model_filename = './weights/tracker/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    Track_only = []

    logging.info("Models loaded!")
    while True:
        return_value, frame = vid.read()
        if not return_value:
            logging.warning("Empty Frame")
            break

        frame_size = frame.shape[:2]
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

        img_in = tf.expand_dims(frame, 0)
        img_in = transform_images(img_in, helmet_input_size)

        image_data = utils.image_preprocess(np.copy(frame),
                                            [ppe_input_size, ppe_input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if FLAGS.framework == 'tf':
            ppe_pred_bbox = ppe_detector.predict(image_data)
        elif FLAGS.framework == 'trt':
            batched_input = tf.constant(image_data)
            ppe_pred_bbox = []
            result = ppe_detector(batched_input)
            for _, value in result.items():
                value = value.numpy()
                ppe_pred_bbox.append(value)

        helmet_pred_bbox = helmet_detector.predict(img_in)

        # face_locations = face_recognition.face_locations(small_frame)
        face_locations = face_recognition.face_locations(frame)
        face_encodings = face_recognition.face_encodings(frame, face_locations)
        face_names = []
        for face_encoding in face_encodings:
            matches = face_recognition.compare_faces(known_face_encodings,
                                                     face_encoding)
            name = "Unknown"

            # if True in matches:
            #     first_match_index = matches.index(True)
            #     name = known_face_names[first_match_index]
            face_distances = face_recognition.face_distance(
                known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
            face_names.append(name)

        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size,
                                        ppe_input_size)
        helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3',
                                           frame_size, helmet_input_size)

        face_bboxes = []
        for (top, right, bottom, left), name in zip(face_locations,
                                                    face_names):
            # top *= 4
            # left *= 4
            # right *= 4
            # bottom *= 4
            face_bboxes.append([left, top, right, bottom, name])
        bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, [])

        boxes, safety_scores, site_roles, face_names = [], [], [], []
        for bbox in bboxes:
            boxes.append([
                bbox[0].astype(int), bbox[1].astype(int),
                bbox[2].astype(int) - bbox[0].astype(int),
                bbox[3].astype(int) - bbox[1].astype(int)
            ])
            safety_scores.append(bbox[4])
            site_roles.append(bbox[5])
            face_names.append("None")
        for bbox in face_bboxes:
            boxes.append(
                [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]])
            safety_scores.append(0)
            site_roles.append(-1)
            face_names.append(bbox[4])

        boxes = np.array(boxes)
        safety_scores = np.array(safety_scores)
        site_roles = np.array(site_roles)
        face_names = np.array(face_names)
        features = np.array(encoder(frame, boxes))
        detections = [
            Detection(bbox, 0.9, 0, feature, safety_score, site_role,
                      face_name)
            for bbox, feature, safety_score, site_role, face_name in zip(
                boxes, features, safety_scores, site_roles, face_names)
        ]

        tracker.predict()
        tracker.update(detections)

        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  # 1 / 5
                continue
            bbox = track.to_tlbr()

            tracking_id = track.track_id
            safety_score = track.get_safety_score()
            site_role = track.get_site_role()
            face_name = track.get_face_name()
            if site_role == -1:
                to_add = [face_name, site_role, tracking_id]
            else:
                to_add = [safety_score, site_role, tracking_id]

            tracked_bboxes.append(bbox.tolist() + to_add)
        image = utils.draw_demo(frame, tracked_bboxes)
        image = cv2.putText(image, "Time: {:.2f} FPS".format(fps), (0, 24),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("Detections", image)
        if FLAGS.output:
            out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
예제 #13
0
from scipy.spatial.distance import cosine

import imutils
from imutils.video import VideoStream

# deep sort
from deep_sort import generate_detections as gd
from deep_sort.detection import Detection
from deep_sort.preprocessing import non_max_suppression

# constants
nms_max_overlap = 1.0

# initialize an instance of the deep-sort tracker
w_path = os.path.join(os.path.dirname(__file__), 'deep_sort/mars-small128.pb')
encoder = gd.create_box_encoder(w_path, batch_size=1)
    

def camera_frame_gen(args):

    # initialize the video stream and allow the camera sensor to warmup
    print("> starting video stream...")
    vs = VideoStream(src=0).start()
    sleep(2.0)

    # loop over the frames from the video stream
    while True:
        # pull frame from video stream
        frame = vs.read()

        # array to PIL image format
예제 #14
0
def Object_tracking(YoloV3,
                    video_path,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times = []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break
        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = tf.expand_dims(image_data, 0)

        t1 = time.time()
        pred_bbox = YoloV3.predict(image_data)
        t2 = time.time()

        times.append(t2 - t1)
        times = times[-20:]

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_image, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        # draw detection on frame
        image = draw_bbox(original_image,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
예제 #15
0
    def system(self,
               video_path,
               output_path,
               input_size=320,
               show=False,
               CLASSES='tiny_yolo/data/coco.names',
               score_threshold=0.3,
               iou_threshold=0.45,
               rectangle_colors='',
               Track_only=[],
               display_tm=False,
               realTime=True):
        #arducam_utils = ArducamUtils(0)

        # Definition of the  deep sort parameters
        max_cosine_distance = 0.7
        nn_budget = None

        #initialize deep sort object
        model_filename = 'model_data/mars-small128.pb'  # deep sort tensorflow pretrained model
        encoder = gdet.create_box_encoder(model_filename, batch_size=1)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)

        times, times_2 = [], []  #parameters for finding fps

        if video_path:
            vid = cv2.VideoCapture(video_path)  # detect on video
        else:
            print("\n\n\nSelected device 0")
            vid = cv2.VideoCapture(0, cv2.CAP_V4L2)  # detect from webcam
            #vid.set(cv2.CAP_PROP_CONVERT_RGB, arducam_utils.convert2rgb)
            vid.set(cv2.CAP_PROP_FPS, 2)

        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        #fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*'MPEG')  # defining video writer
        out = cv2.VideoWriter(output_path, codec, 30,
                              (width, height))  # output_path must be .avi

        NUM_CLASS = self.read_class_names(
            CLASSES)  # reading coco classes in the form of key value
        num_classes = len(NUM_CLASS)
        key_list = list(NUM_CLASS.keys())
        val_list = list(NUM_CLASS.values())

        # calculating parameters for img processing fucntion
        loop_check, original_frame = vid.read()
        if not loop_check:
            print("\n\nCouldn't read the video")
            return False
        # colors for detection
        hsv_tuples = [(1.0 * x / num_classes, 1., 1.)
                      for x in range(num_classes)]
        detection_colors = list(
            map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        detection_colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                detection_colors))
        # random.seed(0)
        random.shuffle(detection_colors)  # to shuffle shades of same color
        # random.seed(None)
        newTime = 0
        prevTime = 0
        dummy_time = 1
        t3 = 0
        playsound('system_ready.wav')
        # loop for video
        while True:
            loop_check, original_frame = vid.read(
            )  # loop_check is bool value for reading correctly or not
            # cv2.imshow("org",original_frame)
            if not loop_check:
                return True
            prevTime = newTime
            newTime = time.time()
            t1 = time.time()
            bboxes = self.Yolo.predict(original_frame)
            t2 = time.time()
            # extract bboxes to boxes (x, y, width, height), scores and names
            boxes, scores, names = [], [], []
            #tracking
            for bbox in bboxes:  #loop to sperate the bounding boxes in the frames
                if len(Track_only) != 0 and NUM_CLASS[int(
                        bbox[5])] in Track_only or len(Track_only) == 0:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    scoreVal = bbox[4]
                    class_id = int(bbox[5])
                    boxes.append([x1, y1, x2, y2])
                    scores.append(scoreVal)
                    label = NUM_CLASS[class_id]
                    names.append(label)
                    #self.image = cv2.rectangle(original_frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Obtain all the detections for the given frame.
            boxes = np.array(boxes)
            names = np.array(names)
            scores = np.array(scores)
            features = np.array(encoder(original_frame, boxes))
            # create deep sort object for detection
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    boxes, scores, names, features)
            ]

            if realTime:
                tracked_bboxes = time_to_contact(original_frame,
                                                 tracker.matchedBoxes,
                                                 newTime,
                                                 prevTime,
                                                 key_list,
                                                 val_list,
                                                 display_tm=display_tm)
            else:
                tracked_bboxes = time_to_contact(original_frame,
                                                 tracker.matchedBoxes,
                                                 dummy_time,
                                                 dummy_time - 0.01666666666,
                                                 key_list,
                                                 val_list,
                                                 display_tm=display_tm)

            # Pass detections to the deepsort object and obtain the track information.
            tracker.predict()
            tracker.update(detections)

            # draw detection on frame
            self.image = self.draw_bbox(original_frame,
                                        tracked_bboxes,
                                        detection_colors,
                                        NUM_CLASS,
                                        tracking=True)

            # calculating fps
            t3 = time.time()
            times.append(t2 - t1)
            times_2.append(t3 - t1)

            times = times[-20:]
            times_2 = times_2[-20:]

            ms = sum(times) / len(times) * 1000
            fps = 1000 / ms
            fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

            print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".
                  format(ms, fps, fps2))
            if output_path != '': out.write(self.image)
            if False:
                cv2.imshow('Tracked', self.image)

                if cv2.waitKey(25) & 0xFF == ord("q"):
                    cv2.destroyAllWindows()
                    break

        cv2.destroyAllWindows()
예제 #16
0
 def load_tracker(self):
     model_filename = '/Users/ajaymudhai/Desktop/SL/model_data/mars1-small128.pb'
     self.encoder = gdet.create_box_encoder(model_filename, batch_size=1)
     metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", self.max_cosine_distance, self.nn_budget)
     self.tracker = Tracker(metric)
예제 #17
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    # if file == 'camera':
    #     file = 0
    # else:
    #     assert os.path.isfile(file), \
    #     'file {} does not exist'.format(file)

    camera1 = cv2.VideoCapture(file[0])
    camera2 = cv2.VideoCapture(file[1])
    camera3 = cv2.VideoCapture(file[2])

    # if file == 0:
    #     self.say('Press [ESC] to quit video')
    #
    # assert camera.isOpened(), \
    # 'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    # if file == 0:#camera window
    #     cv2.namedWindow('', 0)
    #     _, frame = camera.read()
    #     height, width, _ = frame.shape
    #     cv2.resizeWindow('', width, height)
    # else:
    #     _, frame = camera.read()
    #     height, width, _ = frame.shape

    # if SaveVideo:
    #     fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #     if file == 0:#camera window
    #       fps = 1 / self._get_fps(frame)
    #       if fps < 1:
    #         fps = 1
    #     else:
    #         fps = round(camera1.get(cv2.CAP_PROP_FPS))
    #     videoWriter = cv2.VideoWriter(
    #         'output_{}'.format(file), fourcc, fps, (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while (camera1.isOpened() and camera2.isOpened() and camera3.isOpened()):
        elapsed += 1
        ret1, frame1 = camera1.read()
        ret2, frame2 = camera2.read()
        ret3, frame3 = camera3.read()
        #if(ret1 and ret2 and ret3):
        h1, w1 = frame1.shape[:2]
        vis = np.concatenate((frame2, frame1, frame3), axis=1)
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0

        # while camera.isOpened():
        #     elapsed += 1
        #     _, frame = camera.read()
        #     if frame is None:
        #         print ('\nEnd of Video')
        #         break
        #     if self.FLAGS.skip != n :
        #         n+=1
        #         continue
        #     n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(vis)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(vis)
        buffer_inp.append(vis)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    #print("else hi")
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display:
                    cv2.imshow('This is postprocessed', postprocessed)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display:
            choice = cv2.waitKey(1)
            if choice == 27:
                break

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera1.release()
    camera2.release()
    camera3.release()
    if self.FLAGS.display:
        cv2.destroyAllWindows()
예제 #18
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    camera = cv2.VideoCapture(file)

    if file == 0:
        self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None
    if file == 0:  #camera window
        cv2.namedWindow(self.FLAGS.object_id, 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow(self.FLAGS.object_id, width * 0.5, height * 0.5)
    else:
        _, frame = camera.read()
        height, width, _ = frame.shape

    if self.FLAGS.push_stream:
        ffmpeg_pipe(self, file, width, height)

    if SaveVideo:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        if file == 0:  #camera window
            fps = 1 / self._get_fps(frame)
            if fps < 1:
                fps = 1
        else:
            fps = round(camera.get(cv2.CAP_PROP_FPS))
        videoWriter = cv2.VideoWriter('output_{}'.format(file), fourcc, fps,
                                      (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while camera.isOpened():
        if self.FLAGS.process_status == 1:  # esc
            print("gongjia: Stoped! ")
            break
        if self.FLAGS.process_status == 2:
            #print("gongjia: Paused! ")
            continue
        elapsed += 1
        _, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display:
                    cv2.imshow(self.FLAGS.object_id, postprocessed)
                if self.FLAGS.push_stream:
                    #im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                    self.pipe.stdin.write(postprocessed.tobytes())

            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write(' {0:3.3f} FPS '.format(elapsed /
                                                     (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display:
            choice = cv2.waitKey(1)
            if choice == 27:
                break

    cv2.imwrite(
        '{}_{}_counter.jpg'.format(self.FLAGS.demo, self.FLAGS.object_id),
        postprocessed)
    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera.release()
    if self.FLAGS.display:
        cv2.destroyAllWindows()
    if self.FLAGS.push_stream:
        self.pipe.stdin.close()
        self.pipe.wait()
예제 #19
0
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None
    
    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path) # detect on video
    else:
        vid = cv2.VideoCapture(0) # detect from webcam

    # by default VideoCapture returns float instead of int
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    print("VIDEO PROPERTIES:FrameCount:{}\tWidth:{}\tHeight:{}\tFps:{}\t".format(length,width,height,fps))
    
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys()) 
    val_list = list(NUM_CLASS.values())
    
    
    #1.BACKGROUND DETECTION
    backSub = cv2.createBackgroundSubtractorMOG2(history = 400, varThreshold = 16, detectShadows = False)
    bgMask=None
    frame_no=0
    while True:
        _, frame = vid.read()
        frame_no=frame_no+1
        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break
                
        
        #1.1 BACKGROUND Update
        fgMask = backSub.apply(original_frame)
        bgMask = backSub.getBackgroundImage()
        
        
        if frame_no % 100==0:
            print(frame_no)
        
        
        image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)
        
        #t1 = time.time()
        #pred_bbox = Yolo.predict(image_data)
        t2 = time.time()
        
        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes) 
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue 
            bbox = track.to_tlbr() # Get the corrected/predicted bounding box
            class_name = track.get_class() #Get the class name of particular object
            tracking_id = track.track_id # Get the ID for the particular track
            index = key_list[val_list.index(class_name)] # Get predicted object index by object name
            tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function
            
            #Save  to File          
            box_item=bbox.tolist() + [tracking_id, index,frame_no]
            ts.save(box_item)
            
        # draw detection on frame
        image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True)

        t3 = time.time()
        times.append(t2-t1)
        times_2.append(t3-t1)
        
        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times)/len(times)*1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
        
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break
     
    cv2.imwrite(ts.out_bg_img,bgMask)
    
    cv2.destroyAllWindows()
예제 #20
0
def main(yolo, video_path=0, save_path=None):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    # openpose
    w, h = model_wh('0x0')
    model = 'mobilenet_thin'
    config = tf.ConfigProto(device_count={'gpu': 0})
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    e = TfPoseEstimator(get_graph_path(model),
                        target_size=(64, 64),
                        tf_config=config)
    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    video = Video(video_path)
    # video_capture = cv2.VideoCapture()  # 'data/person.mp4'
    w = video.w
    h = video.h
    tracker = Tracker(metric,
                      img_shape=(w, h),
                      max_eu_dis=0.1 * np.sqrt((w**2 + h**2)))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(save_path, fourcc, 7, (w, h))
    list_file = open(output_dir + 'detection.txt', 'w')
    frame_index = -1
    fps = 0.0

    while True:
        ret, frame = video.video_capture.read()  # frame shape 640*480*3
        if ret is not True or frame is None:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxes, scores, _ = yolo.detect_image(image)
        # start = time.time()
        features = encoder(frame, boxes)  # 提取到每个框的特征
        # end = time.time()
        # print(end-start)
        detections = [
            Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1])
            for idx, bbox_and_feature in enumerate(zip(boxes, features))
        ]  # 保存到一个类中

        # Call the tracker
        tracker.predict()
        tracker.update(detections, np.asarray(image))
        humans = get_keypoints(image, e)
        frame = draw_humans(image, humans, imgcopy=False)
        for index, track in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "{}".format(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        # for det in detections:
        #     bbox = det.to_tlbr()
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if save_path is not None:
            # Define the codec and create VideoWriter object

            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxes) != 0:
                for i in range(0, len(boxes)):
                    list_file.write(
                        str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' +
                        str(boxes[i][2]) + ' ' + str(boxes[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        # print("fps= %f" % fps)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.video_capture.release()
    if save_path is not None:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
예제 #21
0
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = [], custom_yolo=None, custom_classes=YOLO_CUSTOM_CLASSES, Custom_track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None


    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path) # detect on video
    else:
        vid = cv2.VideoCapture(0) # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())

    # set a bunch of flags and variables for made baskets and possessions
    possession = None
    possession_list = []
    combined_possession_avg = 0.5

    total_basket_count=0
    basket_frame_list = []

    baskets_dict = {"Dark": 0, "Light": 0}

    made_basket_first_frame = 0
    made_basket_frames = 0
    basket_marked = False

    if custom_yolo:
      NUM_CUSTOM_CLASS = read_class_names(custom_classes)
      custom_key_list = list(NUM_CUSTOM_CLASS.keys())
      custom_val_list = list(NUM_CUSTOM_CLASS.values())

    frame_counter = 0
    # loop through each frame in video
    while True:
        _, frame = vid.read()

        try:
            first_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
            frame_counter += 1
        except:
            break

        image_data = image_preprocess(np.copy(first_frame), [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        # CUSTOM BLOCK FOR BASKETBALL
        if custom_yolo:

          if YOLO_FRAMEWORK == "tf":
            # use yolo model to make prediction on the image data
            custom_pred_bbox = custom_yolo.predict(image_data)

          # reshape our data to be in correct form for processing
          custom_pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in custom_pred_bbox]
          custom_pred_bbox = tf.concat(custom_pred_bbox, axis=0)

          # get boxes based on threshhold
          custom_bboxes = postprocess_boxes(custom_pred_bbox, original_frame, input_size, 0.3)
          # custom_bboxes = nms(custom_bboxes, iou_threshold, method='nms')

          # extract bboxes to boxes (x, y, width, height), scores and names
          custom_boxes, custom_scores, custom_names = [], [], []
          for bbox in custom_bboxes:
              if len(Custom_track_only) !=0 and NUM_CUSTOM_CLASS[int(bbox[5])] in Custom_track_only or len(Custom_track_only) == 0:
                  custom_boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
                  custom_scores.append(bbox[4])
                  custom_names.append(NUM_CUSTOM_CLASS[int(bbox[5])])

          # Obtain all the detections for the given frame.
          custom_boxes = np.array(custom_boxes)
          custom_names = np.array(custom_names)
          custom_scores = np.array(custom_scores)

          # take note of the highest "scoring" made basket and basketball obj in each frame
          highest_scoring_basketball = 0
          basketball_box = None
          basketball_center = None
          highest_scoring_made_basket = 0
          made_basket_box = None
          for i, bbox in enumerate(custom_bboxes):
            # loop through each bounding box to get the "best one" of the frame
            # we do this because sometimes our model will detect two, and we know there can only be one
            name = custom_names[i]
            score = round(custom_scores[i], 3)
            if name == 'basketball':
              if score > highest_scoring_basketball:
                highest_scoring_basketball = score
                basketball_box = bbox
            if name == 'made-basket':
              if score > .85 and score > highest_scoring_made_basket:
                highest_scoring_made_basket = score
                made_basket_box = bbox

          # if it sees a basketball, put a box on it and note the center (for possession)
          if basketball_box is not None:
            cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1])), (int(basketball_box[2]), int(basketball_box[3])), (0,0,255), 1)
            cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1]-30)), (int(basketball_box[0])+(10)*17, int(basketball_box[1])), (0,0,255), -1)
            cv2.putText(original_frame, "basketball" + "-" + str(highest_scoring_basketball),(int(basketball_box[0]), int(basketball_box[1]-10)),0, 0.5, (255,255,255),1)
            basketball_center = ( (basketball_box[2]+basketball_box[0])/2, (basketball_box[3]+basketball_box[1])/2 )


          if made_basket_box is not None:
            # if theres a made basket put the box on it
            cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1])), (int(made_basket_box[2]), int(made_basket_box[3])), (0,255,0), 1)
            cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1]-30)), (int(made_basket_box[0])+(15)*17, int(made_basket_box[1])), (0,255,0), -1)
            cv2.putText(original_frame, "made-basket" + " " + str(highest_scoring_made_basket),(int(made_basket_box[0]), int(made_basket_box[1]-10)),0, 0.6, (0,0,0),1)

            if made_basket_frames == 0:
              # if this is the first frame in the sequence
              made_basket_first_frame = frame_counter

            # increment a counter for made basket frames
            made_basket_frames += 1

            # if there were 3 consecuative frames AND we havnt marked the basket yet then lets count it!
            if made_basket_frames >= 3 and not basket_marked:
              basket_marked = True
              basket_frame_list.append(made_basket_first_frame)
              if possession:
                # record which "team" scored the basket
                baskets_dict[possession] += 1

          # if no made basket make sure the made basket counter is at zero
          else:
            # no made basket
            made_basket_frames = 0

          # 60 frames after a made basket we can reset the "marked basket" flag to False
          # in essence this means we start looking for made baskets again
          if basket_marked and frame_counter > basket_frame_list[-1] + 60:
            basket_marked = False

        # END CUSTOM BLOCK

        # PRESON PREDICTION and TRACKING BLOCK

        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')



        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
                w = bbox[2].astype(int)-bbox[0].astype(int)
                h = bbox[3].astype(int)-bbox[1].astype(int)
                if h < height/3 and w < width/4:
                  if h > 120:
                    boxes.append([bbox[0].astype(int), bbox[1].astype(int), w, h])
                    scores.append(bbox[4])
                    names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)

        # detect jersey color using the tracked persons bounding box
        patches = [gdet.extract_image_patch(frame, box, [box[3], box[2]]) for box in boxes]
        color_ratios = [find_color(patch) for patch in patches]

        features = np.array(encoder(original_frame, boxes))

        # mark the detection
        detections = [Detection(bbox, score, class_name, feature, color_ratio) for bbox, score, class_name, feature, color_ratio in zip(boxes, scores, names, features, color_ratios)]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        color_ratio_list = []
        check_possession = False
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue

            color_ratio = track.get_color_ratio()
            color_ratio_list.append(color_ratio)

            bbox = track.to_tlbr() # Get the corrected/predicted bounding box
            class_name = track.get_class() #Get the class name of particular object
            tracking_id = track.track_id # Get the ID for the particular track
            index = key_list[val_list.index(class_name)] # Get predicted object index by object name

            tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function

            # if there is a basketball in the frame, and its "in" a ersons bounding box, check what box it is in for psosession
            if basketball_center:
              if basketball_center[0] >= bbox[0] and basketball_center[0] <= bbox[2]:
                if basketball_center[1] >= bbox[1] and basketball_center[1] <= bbox[3]:
                  check_possession = True
                  if color_ratio <= .2:
                    # light team
                    possession_list.append(0)
                  else:
                    # dark team
                    possession_list.append(1)

            else:
              # no basketball in frame
              # possession_list.append(-1)
              # test_list.pop(0)
              pass

        # if the ball is in a bounding box, update out possession tracker
        if check_possession:
          if len(possession_list) > 60:
            # this function takes an average of the last 60 posessions marked to determine current position
            # it weights the most recent detections more
            # this algo is a WIP
            possession_list = possession_list[-60:]
            # full_avg = sum(possession_list)/len(possession)
            last_60_avg = sum(possession_list[-60:])/60
            last_30_avg = sum(possession_list[-30:])/30
            last_15_avg = sum(possession_list[-15:])/15
            last_5_avg = sum(possession_list[-5:])/5

            combined_possession_avg = round((last_60_avg + last_30_avg + last_15_avg + last_5_avg)/4,3)

            #most_common_possession = stats.mode(possession_list)[0]

          else:
            combined_possession_avg = round(sum(possession_list)/len(possession_list),3)

          # use our possession average to determine who has the ball right now
          if combined_possession_avg < 0.5:
            possession = "Light"
          elif combined_possession_avg > 0.5:
            possession = "Dark"


        # draw detection on frame
        image = draw_bbox(original_frame, tracked_bboxes, color_ratios=color_ratio_list, CLASSES=CLASSES, tracking=True)

        t3 = time.time()
        times.append(t2-t1)
        times_2.append(t3-t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times)/len(times)*1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2)/len(times_2)*1000)

        if possession == "Light":
          image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (50, 255, 255), 2)
        else:
          image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # image = cv2.putText(image, "Light: {} Dark: {} None: {}".format(possession_list.count(0), possession_list.count(1), possession_list.count(-1)), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        image = cv2.putText(image, "Posession Avg: {}".format(combined_possession_avg), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
    return_data = {"baskets_dict": baskets_dict, "basket_frame_list": basket_frame_list}
    print("video saved to {}".format(output_path))
    return(return_data)
예제 #22
0
def Object_tracking(Yolo,
                    video_path,
                    output_path,
                    class_names,
                    image_size=416,
                    show=False,
                    rectangle_colors=''):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    # initialize deep sort object
    model_filename = 'models/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam
    width, height, fps = get_video_capture_info(vid)
    codec = cv2.VideoWriter_fourcc(*'XVID')
    # output_path must be .mp4
    out = cv2.VideoWriter(output_path, codec, fps, (width, height))

    key_list = list(class_names.keys())
    val_list = list(class_names.values())

    detection_times, tracking_times = [], []
    _, frame = vid.read()  # BGR

    while frame is not None:
        # create the original_frame for display purposes (draw_bboxes)
        original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        # preprocessing found in datasets.py
        img = preprocess_image(frame, image_size)

        t1 = time.time()
        boxes, class_inds, scores = yolo_predict(yolo, img, frame)
        t2 = time.time()
        names = []
        for clss in class_inds:
            names.append(class_names[clss])
        features = np.array(encoder(original_frame, boxes))
        # Pass detections to the deepsort object and obtain the track information.
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = get_tracker_info(tracker, val_list, key_list)

        # update the times information
        t3 = time.time()
        detection_times.append(t2 - t1)
        tracking_times.append(t3 - t1)
        detection_times = detection_times[-20:]
        tracking_times = tracking_times[-20:]

        ms, fps, fps2 = efficiency_statistics(detection_times, tracking_times)

        # draw detection on frame
        image = draw_bbox(original_frame,
                          tracked_bboxes,
                          class_names,
                          tracking=True,
                          rectangle_colors=rectangle_colors)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # get next frame
        _, frame = vid.read()  # BGR

        # show and store the results
        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '':
            out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
예제 #23
0
sh.setFormatter(format_str)
log_file_folder = os.path.abspath(os.path.join(
    os.path.dirname(__file__))) + os.sep + "logs" + os.sep
make_dir(log_file_folder)
log_file_str = log_file_folder + os.sep + "text.log"
th = handlers.TimedRotatingFileHandler(filename=log_file_str,
                                       when='H',
                                       encoding='utf-8')
th.setFormatter(format_str)
logger.addHandler(sh)
logger.addHandler(th)

if __name__ == '__main__':

    # Deep SORT 跟踪器
    encoder = generate_detections.create_box_encoder(ARGS.model_feature,
                                                     batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       ARGS.min_score, None)
    tracker = Tracker(metric)

    # 载入模型
    yolo = YOLO4(ARGS.model_yolo, ARGS.min_score)

    # 读取摄像头实时图像数据、或视频文件
    try:
        video = cv2.VideoCapture(int(ARGS.video))
    except:
        video = cv2.VideoCapture(ARGS.video)

    # 输出保存视频
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
예제 #24
0
파일: main.py 프로젝트: wss321/mot
def main(yolo, video_path=0, save_path=None):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    video = Video(video_path)
    # video_capture = cv2.VideoCapture()  # 'data/person.mp4'
    w = video.w
    h = video.h
    tracker = Tracker(metric,
                      img_shape=(w, h),
                      max_eu_dis=0.1 * np.sqrt((w**2 + h**2)))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(save_path, fourcc, 15, (w, h))
    list_file = open(output_dir + 'detection.txt', 'w')
    frame_index = -1
    fps = 0.0
    while True:
        ret, frame = video.video_capture.read()  # frame shape 640*480*3
        if ret is not True or frame is None:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxes, scores, _ = yolo.detect_image(image)
        # start = time.time()
        features = encoder(frame, boxes)  # 提取到每个框的特征
        # end = time.time()
        # print(end-start)
        detections = [
            Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1])
            for idx, bbox_and_feature in enumerate(zip(boxes, features))
        ]  # 保存到一个类中

        # Call the tracker
        tracker.predict()
        tracker.update(detections, np.asarray(image))
        frame_index = frame_index + 1
        # if frame_index == 50:
        #         cv2.imwrite('output/img/org.jpg', frame)
        for index, track in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

            cv2.putText(frame, "{}".format(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        track.color, 2)

        # if frame_index == 50:
        #     cv2.imwrite('output/img/det.jpg', frame)
        # for index, track in enumerate(tracker.tracks):
        #     if not track.is_confirmed() or track.time_since_update > 1:
        #         continue
        #     bbox = track.to_tlbr()
        #     cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
        #                 5e-3 * 200, (0, 255, 0), 2)
        # if frame_index == 50:
        # cv2.imwrite('output/img/track_{}.jpg'.format(frame_index), frame)

        cv2.imshow('', frame)

        if save_path is not None:
            # Define the codec and create VideoWriter object

            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxes) != 0:
                for i in range(0, len(boxes)):
                    list_file.write(
                        str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' +
                        str(boxes[i][2]) + ' ' + str(boxes[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        # print("fps= %f" % fps)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.video_capture.release()
    if save_path is not None:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
예제 #25
0
# output_video_size = 416
border_line = [(0, 400), (1200, 400)]
path_track = 20  # how many frames in path are saves
detector = YOLOv4(
    input_shape=(HEIGHT, WIDTH, 3),
    anchors=YOLOV4_ANCHORS,
    num_classes=80,
    training=False,
    yolo_max_boxes=64,
    yolo_iou_threshold=0.3,
    yolo_score_threshold=0.4,
)
detector.load_weights("models/yolov4.h5")

max_cosine_distance = 0.2  # 03
encoder = gdet.create_box_encoder("models/mars-small128.pb", batch_size=64)
metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                   max_cosine_distance, None)
tracker = Tracker(metric)
border_lines = {'border1': [[0, 100], [312, 104]]}


def drawBorderLines(frame):
    for b in border_lines:
        a = border_lines[b][0]
        b = border_lines[b][1]
        length = 40
        vX0 = b[0] - a[0]
        vY0 = b[1] - a[1]
        mag = math.sqrt(vX0 * vX0 + vY0 * vY0)
        vX = vX0 / mag
예제 #26
0
        layerNames[i[0] - 1] for i in nnet.getUnconnectedOutLayers()
    ]

    outputs = nnet.forward(outputNames)

    return findObjects(outputs, img)


#get random colors
colors = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

#set model path
model_filename = 'files/market1501.pb'

#initialize encoder
encoder = gdet.create_box_encoder(model_filename, batch_size=1)

#initializing the metric

#parameters for metric
max_cosine_distance = 0.5
nn_budget = None

metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                       nn_budget)

#initialising tracker
tracker = Tracker(metric)

#detecting and tracking objects
inp = int(
예제 #27
0
def deepsort(yolo, args):
    #nms_max_overlap = 0.3 #nms threshold

    images_input = True if os.path.isdir(args.input) else False
    if images_input:
        # get images list
        jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg'))
        jpg_files = glob.glob(os.path.join(args.input, '*.jpg'))
        frame_capture = jpeg_files + jpg_files
        frame_capture.sort()
    else:
        # create video capture stream
        frame_capture = cv2.VideoCapture(0 if args.input ==
                                         '0' else args.input)
        if not frame_capture.isOpened():
            raise IOError("Couldn't open webcam or video")

    # create video save stream if needed
    save_output = True if args.output != "" else False
    if save_output:
        if images_input:
            raise ValueError("image folder input could be saved to video file")

        # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
        # to convert it to x264 to reduce file size:
        # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
        #
        #video_FourCC    = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC))
        video_FourCC = cv2.VideoWriter_fourcc(
            *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v")
        video_fps = frame_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        out = cv2.VideoWriter(args.output, video_FourCC,
                              (5. if args.input == '0' else video_fps),
                              video_size)

    if args.tracking_classes_path:
        # load the object classes used in tracking if have, other class
        # from detector will be ignored
        tracking_class_names = get_classes(args.tracking_classes_path)
    else:
        tracking_class_names = None

    #create deep_sort box encoder
    encoder = create_box_encoder(args.deepsort_model_path, batch_size=1)

    #create deep_sort tracker
    max_cosine_distance = 0.5  #threshold for cosine distance
    nn_budget = None
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    # alloc a set of queues to record motion trace
    # for each track id
    motion_traces = [deque(maxlen=30) for _ in range(9999)]
    total_obj_counter = []

    # initialize a list of colors to represent each possible class label
    np.random.seed(100)
    COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

    i = 0
    fps = 0.0
    while True:
        ret, frame = get_frame(frame_capture, i, images_input)
        if ret != True:
            break
        #time.sleep(0.2)
        i += 1

        start_time = time.time()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # detect object from image
        _, out_boxes, out_classnames, out_scores = yolo.detect_image(image)
        # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h)
        boxes, class_names, scores = get_tracking_object(
            out_boxes, out_classnames, out_scores, tracking_class_names)

        # get encoded features of bbox area image
        features = encoder(frame, boxes)

        # form up detection records
        detections = [
            Detection(bbox, score, feature, class_name)
            for bbox, score, class_name, feature in zip(
                boxes, scores, class_names, features)
        ]

        # Run non-maximum suppression.
        #nms_boxes = np.array([d.tlwh for d in detections])
        #nms_scores = np.array([d.confidence for d in detections])
        #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores)
        #detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # show all detection result as white box
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(det.class_name),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (255, 255, 255), 2)

        track_indexes = []
        track_count = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # record tracking info and get bbox
            track_indexes.append(int(track.track_id))
            total_obj_counter.append(int(track.track_id))
            bbox = track.to_tlbr()

            # show all tracking result as color box
            color = [
                int(c)
                for c in COLORS[track_indexes[track_count] % len(COLORS)]
            ]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (color), 2)
            if track.class_name:
                cv2.putText(frame, str(track.class_name),
                            (int(bbox[0] + 30), int(bbox[1] - 20)), 0,
                            5e-3 * 150, (color), 2)

            track_count += 1

            # get center point (x,y) of current track bbox and record in queue
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            motion_traces[track.track_id].append(center)

            # draw current center point
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            #draw motion trace
            motion_trace = motion_traces[track.track_id]
            for j in range(1, len(motion_trace)):
                if motion_trace[j - 1] is None or motion_trace[j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]),
                         (color), thickness)

        # show tracking statistics
        total_obj_num = len(set(total_obj_counter))
        cv2.putText(frame, "Total Object Counter: " + str(total_obj_num),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(track_count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)

        # refresh window
        cv2.namedWindow("DeepSORT", 0)
        cv2.resizeWindow('DeepSORT', 1024, 768)
        cv2.imshow('DeepSORT', frame)

        if save_output:
            #save a frame
            out.write(frame)

        end_time = time.time()
        fps = (fps + (1. / (end_time - start_time))) / 2
        # Press q to stop video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything if job is finished
    if not images_input:
        frame_capture.release()
    if save_output:
        out.release()
    cv2.destroyAllWindows()
예제 #28
0
파일: box_rect.py 프로젝트: wss321/mot
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric, max_iou_distance=0.7, max_age=30, n_init=3)

    writeVideo_flag = False

    video_capture = cv2.VideoCapture('data/person.mp4')  #

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('p_output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret is not True or frame is None:
            break
        t1 = time.time()

        image = Image.fromarray(frame)
        boxes, scores, _ = yolo.detect_image(image)  # nms already done.
        features = encoder(frame, boxes)

        detections = [Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in
                      enumerate(zip(boxes, features))]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for index, track in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                        5e-3 * 200, (0, 255, 0), 2)

        # for det in detections:
        #     bbox = det.to_tlbr()
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxes) != 0:
                for i in range(0, len(boxes)):
                    list_file.write(
                        str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(
                            boxes[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % fps)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
예제 #29
0
파일: help.py 프로젝트: pribadihcr/DLL-RAPI
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.createBackgroundSubtractorMOG2()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    vid = imageio.get_reader(file, 'ffmpeg')  #cv2.VideoCapture(file)
    if file == 0:
        self.say('Press [ESC] to quit video')

    #assert camera.isOpened(), \
    #'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0

    plt.ion()
    fig = plt.figure()
    ax = plt.gca()
    frame = vid.get_data(0)
    img_artist = ax.imshow(frame)
    for num in range(1, 20000):
        try:
            frame = vid.get_data(num)
            print(num)
        except:
            break
        elapsed += 1
        #_, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)

                if self.FLAGS.display:
                    #cv2.imshow('', postprocessed)
                    img_artist.set_data(postprocessed)
                    plt.show()
                    plt.pause(0.00001)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()

    sys.stdout.write('\n')

    if self.FLAGS.csv:
        f.close()
예제 #30
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    camera = skvideo.io.VideoCapture(file)

    if file == 0:
        self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None
    if file == 0:  #camera window
        cv2.namedWindow('', 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow('', width, height)
    else:
        _, frame = camera.read()
        height, width, _ = frame.shape

    if SaveVideo:
        if file == 0:  #camera window
            fps = 1 / self._get_fps(frame)
            if fps < 1:
                fps = 1
        else:
            fps = get_fps_rate(file)

        output_file = 'output_{}'.format(file)
        if os.path.exists(output_file):
            os.remove(output_file)

        videoWriter = skvideo.io.VideoWriter(output_file,
                                             fps=fps,
                                             frameSize=(width, height))
        videoWriter.open()

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while camera.isOpened():
        elapsed += 1
        _, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out,
                                                               img,
                                                               save=False)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker,
                        save=False)
                if SaveVideo:
                    videoWriter.write(postprocessed)

            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera.release()