def initTrack(self): if (self.track): self.progressBar.setValue(20) if self.options["track"]: if self.options["tracker"] == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker self.progressBar.setValue(50) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) self.tracker = Tracker(metric) self.encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577" )) elif self.options["tracker"] == "sort": from sort.sort import Sort self.encoder = None self.tracker = Sort() self.progressBar.setValue(50) if self.options["BK_MOG"] and self.options["track"]: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() self.progressBar.setValue(60) self.initTFNet() else: self.initTFNet()
def gui(self): source = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None App(tkinter.Tk(), "Tkinter and OpenCV", 0, tracker, encoder)
def __init__(self, classes, tracker='sort'): self.ttype = tracker self.classes = classes if tracker == 'deep_sort': from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) #param self.nms_max_overlap = 0.1 #param model_path = os.path.join(WORK_DIR, MODEL_DIR, "mars-small128.ckpt-68577") self.encoder = generate_detections.create_box_encoder(model_path) self.tracker = Tracker(metric) from deep_sort.application_util import preprocessing as prep from deep_sort.deep_sort.detection import Detection self.prep = prep self.Detection = Detection elif tracker == 'sort': from sort.sort import Sort self.tracker = Sort() self.trackers = {}
def main(): global tracker global encoder global msg msg = IntList() max_cosine_distance = 0.2 nn_budget = 100 metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) model_filename = "/home/ilyas/darknetros_sort/src/sort_track/src/deep_sort/model_data/mars-small128.pb" #Change it to your directory encoder = gdet.create_box_encoder(model_filename) #Initialize ROS node rospy.init_node('sort_tracker', anonymous=True) rate = rospy.Rate(10) # Get the parameters (camera_topic, detection_topic, tracker_topic) = get_parameters() #Subscribe to image topic image_sub = rospy.Subscriber(camera_topic,Image,callback_image) #Subscribe to darknet_ros to get BoundingBoxes from YOLOv3 sub_detection = rospy.Subscriber(detection_topic, BoundingBoxes , callback_det) while not rospy.is_shutdown(): #Publish results of object tracking pub_trackers = rospy.Publisher(tracker_topic, IntList, queue_size=10) print(msg) pub_trackers.publish(msg) rate.sleep()
def __init__(self): self.metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) self.tracker = Tracker(self.metric) self.encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577"))
def __init__(self, encoderPath, applyMask=False): self.metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.9, 100) self.tracker = Tracker(self.metric, max_iou_distance=0.9, max_age=50, n_init=3, _lambda=0.3) self.encoder = generate_detections.create_box_encoder( encoderPath, applyMask=applyMask) self.applyMask = applyMask
def __init__(self, args): self.tfnet = None self.image_publisher = rospy.Publisher(args.output, Image, queue_size=1) self.subscriber = rospy.Subscriber(args.input, Image, self.callback, queue_size = 1, buff_size=2**24) self.subscriber = rospy.Subscriber("/cctv_info", String, self.exit) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) self.tracker = Tracker(metric) self.encoder = generate_detections.create_box_encoder( os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577")) options = {"model": "darkflow/cfg/yolo.cfg", "load": "darkflow/bin/yolo.weights", "threshold": 0.1, "track": True, "trackObj": ["person"], "BK_MOG": True, "tracker": "deep_sort", "csv": False} self.tfnet = TFNet(options)
def __init__(self, model_path, matching_threshold=.5, max_iou_distance=0.7, max_age=60, n_init=5): print(matching_threshold) print(max_iou_distance) print(max_age) print(n_init) self.encoder = generate_detections.create_box_encoder( os.path.abspath(model_path)) self.metric = nn_matching.NearestNeighborDistanceMetric( "cosine", matching_threshold) self.tracker = Tracker(self.metric, max_iou_distance, max_age, n_init) self.detections = [] self.trackingPerson = None
def __init__(self): self.YOLO_Classes = utils.read_class_names(cfg.YOLO.CLASSES) self.key_list = list(self.YOLO_Classes.keys()) self.val_list = list(self.YOLO_Classes.values()) self.deepSORT_model = CONFIG.deepSORT_model self.encoder = gdet.create_box_encoder(self.deepSORT_model, batch_size=1) self.max_cosine_distance = 0.7 self.nn_budget = None self.metric = nn_matching.NearestNeighborDistanceMetric( "cosine", self.max_cosine_distance, self.nn_budget) self.tracker = Tracker(self.metric) self.track_only = [ "car", "truck", "motorbike", "bus", "bicycle", "person" ] self.memory = {} self.already_counted = deque(maxlen=50)
def __init__(self, id, device, config, log): self.id = id self.log = log self.frame = [] self.device = device self.config = config self.cnt = 0 self.cams = [] self.img_size = config['img_size'] self.nms_max_overlap = config['nms_max_overlap'] self.body_min_w = config['body_min_w'] self.conf_thres = config['conf_thres'] self.nms_thres = config['nms_thres'] self.max_hum_w = int(self.img_size / 2) self.detector = Darknet(self.config['model_def'], img_size=self.config['img_size']).to( self.device) self.detector.load_darknet_weights(self.config['weights_path']) self.encoder = gdet.create_box_encoder( self.config['model_filename'], batch_size=self.config['batch_size']) self._stopevent = threading.Event() print("created ok") threading.Thread.__init__(self)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) key_list = list(CLASSES.keys()) val_list = list(CLASSES.values()) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and CLASSES[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(CLASSES[int(bbox[5])]) boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() class_name = track.get_class() tracking_id = track.track_id index = key_list[val_list.index(class_name)] tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) image = utils.draw_bbox(frame, tracked_bboxes, classes=CLASSES, tracking=True) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 36), # 24 cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) ppe_input_size = FLAGS.ppe_size helmet_input_size = FLAGS.helmet_size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) ppe_detector = create_ppe_detector(ppe_input_size) helmet_detector = create_helmet_detector(helmet_input_size) nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg") nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg") nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg") nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0] nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0] nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0] known_face_encodings = [ nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3 ] known_face_names = ["Nacho", "Nacho", "Nacho"] face_locations = [] face_encodings = [] face_names = [] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) img_in = tf.expand_dims(frame, 0) img_in = transform_images(img_in, helmet_input_size) image_data = utils.image_preprocess(np.copy(frame), [ppe_input_size, ppe_input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': ppe_pred_bbox = ppe_detector.predict(image_data) elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) ppe_pred_bbox = [] result = ppe_detector(batched_input) for _, value in result.items(): value = value.numpy() ppe_pred_bbox.append(value) helmet_pred_bbox = helmet_detector.predict(img_in) # face_locations = face_recognition.face_locations(small_frame) face_locations = face_recognition.face_locations(frame) face_encodings = face_recognition.face_encodings(frame, face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unknown" # if True in matches: # first_match_index = matches.index(True) # name = known_face_names[first_match_index] face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = known_face_names[best_match_index] face_names.append(name) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size, ppe_input_size) helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3', frame_size, helmet_input_size) face_bboxes = [] for (top, right, bottom, left), name in zip(face_locations, face_names): # top *= 4 # left *= 4 # right *= 4 # bottom *= 4 face_bboxes.append([left, top, right, bottom, name]) bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, []) boxes, safety_scores, site_roles, face_names = [], [], [], [] for bbox in bboxes: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) safety_scores.append(bbox[4]) site_roles.append(bbox[5]) face_names.append("None") for bbox in face_bboxes: boxes.append( [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]) safety_scores.append(0) site_roles.append(-1) face_names.append(bbox[4]) boxes = np.array(boxes) safety_scores = np.array(safety_scores) site_roles = np.array(site_roles) face_names = np.array(face_names) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, 0.9, 0, feature, safety_score, site_role, face_name) for bbox, feature, safety_score, site_role, face_name in zip( boxes, features, safety_scores, site_roles, face_names) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() tracking_id = track.track_id safety_score = track.get_safety_score() site_role = track.get_site_role() face_name = track.get_face_name() if site_role == -1: to_add = [face_name, site_role, tracking_id] else: to_add = [safety_score, site_role, tracking_id] tracked_bboxes.append(bbox.tolist() + to_add) image = utils.draw_demo(frame, tracked_bboxes) image = cv2.putText(image, "Time: {:.2f} FPS".format(fps), (0, 24), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
from scipy.spatial.distance import cosine import imutils from imutils.video import VideoStream # deep sort from deep_sort import generate_detections as gd from deep_sort.detection import Detection from deep_sort.preprocessing import non_max_suppression # constants nms_max_overlap = 1.0 # initialize an instance of the deep-sort tracker w_path = os.path.join(os.path.dirname(__file__), 'deep_sort/mars-small128.pb') encoder = gd.create_box_encoder(w_path, batch_size=1) def camera_frame_gen(args): # initialize the video stream and allow the camera sensor to warmup print("> starting video stream...") vs = VideoStream(src=0).start() sleep(2.0) # loop over the frames from the video stream while True: # pull frame from video stream frame = vs.read() # array to PIL image format
def Object_tracking(YoloV3, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times = [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = YoloV3.predict(image_data) t2 = time.time() times.append(t2 - t1) times = times[-20:] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_image, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function ms = sum(times) / len(times) * 1000 fps = 1000 / ms # draw detection on frame image = draw_bbox(original_image, tracked_bboxes, CLASSES=CLASSES, tracking=True) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def system(self, video_path, output_path, input_size=320, show=False, CLASSES='tiny_yolo/data/coco.names', score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[], display_tm=False, realTime=True): #arducam_utils = ArducamUtils(0) # Definition of the deep sort parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' # deep sort tensorflow pretrained model encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] #parameters for finding fps if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: print("\n\n\nSelected device 0") vid = cv2.VideoCapture(0, cv2.CAP_V4L2) # detect from webcam #vid.set(cv2.CAP_PROP_CONVERT_RGB, arducam_utils.convert2rgb) vid.set(cv2.CAP_PROP_FPS, 2) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'MPEG') # defining video writer out = cv2.VideoWriter(output_path, codec, 30, (width, height)) # output_path must be .avi NUM_CLASS = self.read_class_names( CLASSES) # reading coco classes in the form of key value num_classes = len(NUM_CLASS) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) # calculating parameters for img processing fucntion loop_check, original_frame = vid.read() if not loop_check: print("\n\nCouldn't read the video") return False # colors for detection hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] detection_colors = list( map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) detection_colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), detection_colors)) # random.seed(0) random.shuffle(detection_colors) # to shuffle shades of same color # random.seed(None) newTime = 0 prevTime = 0 dummy_time = 1 t3 = 0 playsound('system_ready.wav') # loop for video while True: loop_check, original_frame = vid.read( ) # loop_check is bool value for reading correctly or not # cv2.imshow("org",original_frame) if not loop_check: return True prevTime = newTime newTime = time.time() t1 = time.time() bboxes = self.Yolo.predict(original_frame) t2 = time.time() # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] #tracking for bbox in bboxes: #loop to sperate the bounding boxes in the frames if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) scoreVal = bbox[4] class_id = int(bbox[5]) boxes.append([x1, y1, x2, y2]) scores.append(scoreVal) label = NUM_CLASS[class_id] names.append(label) #self.image = cv2.rectangle(original_frame, (x1, y1), (x2, y2), (255, 0, 0), 2) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) # create deep sort object for detection detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] if realTime: tracked_bboxes = time_to_contact(original_frame, tracker.matchedBoxes, newTime, prevTime, key_list, val_list, display_tm=display_tm) else: tracked_bboxes = time_to_contact(original_frame, tracker.matchedBoxes, dummy_time, dummy_time - 0.01666666666, key_list, val_list, display_tm=display_tm) # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # draw detection on frame self.image = self.draw_bbox(original_frame, tracked_bboxes, detection_colors, NUM_CLASS, tracking=True) # calculating fps t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}". format(ms, fps, fps2)) if output_path != '': out.write(self.image) if False: cv2.imshow('Tracked', self.image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def load_tracker(self): model_filename = '/Users/ajaymudhai/Desktop/SL/model_data/mars1-small128.pb' self.encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", self.max_cosine_distance, self.nn_budget) self.tracker = Tracker(metric)
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() # if file == 'camera': # file = 0 # else: # assert os.path.isfile(file), \ # 'file {} does not exist'.format(file) camera1 = cv2.VideoCapture(file[0]) camera2 = cv2.VideoCapture(file[1]) camera3 = cv2.VideoCapture(file[2]) # if file == 0: # self.say('Press [ESC] to quit video') # # assert camera.isOpened(), \ # 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None # if file == 0:#camera window # cv2.namedWindow('', 0) # _, frame = camera.read() # height, width, _ = frame.shape # cv2.resizeWindow('', width, height) # else: # _, frame = camera.read() # height, width, _ = frame.shape # if SaveVideo: # fourcc = cv2.VideoWriter_fourcc(*'XVID') # if file == 0:#camera window # fps = 1 / self._get_fps(frame) # if fps < 1: # fps = 1 # else: # fps = round(camera1.get(cv2.CAP_PROP_FPS)) # videoWriter = cv2.VideoWriter( # 'output_{}'.format(file), fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while (camera1.isOpened() and camera2.isOpened() and camera3.isOpened()): elapsed += 1 ret1, frame1 = camera1.read() ret2, frame2 = camera2.read() ret3, frame3 = camera3.read() #if(ret1 and ret2 and ret3): h1, w1 = frame1.shape[:2] vis = np.concatenate((frame2, frame1, frame3), axis=1) if self.FLAGS.skip != n: n += 1 continue n = 0 # while camera.isOpened(): # elapsed += 1 # _, frame = camera.read() # if frame is None: # print ('\nEnd of Video') # break # if self.FLAGS.skip != n : # n+=1 # continue # n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(vis) else: fgmask = None preprocessed = self.framework.preprocess(vis) buffer_inp.append(vis) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: #print("else hi") postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display: cv2.imshow('This is postprocessed', postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display: choice = cv2.waitKey(1) if choice == 27: break sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera1.release() camera2.release() camera3.release() if self.FLAGS.display: cv2.destroyAllWindows()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) camera = cv2.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None if file == 0: #camera window cv2.namedWindow(self.FLAGS.object_id, 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow(self.FLAGS.object_id, width * 0.5, height * 0.5) else: _, frame = camera.read() height, width, _ = frame.shape if self.FLAGS.push_stream: ffmpeg_pipe(self, file, width, height) if SaveVideo: fourcc = cv2.VideoWriter_fourcc(*'XVID') if file == 0: #camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = round(camera.get(cv2.CAP_PROP_FPS)) videoWriter = cv2.VideoWriter('output_{}'.format(file), fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while camera.isOpened(): if self.FLAGS.process_status == 1: # esc print("gongjia: Stoped! ") break if self.FLAGS.process_status == 2: #print("gongjia: Paused! ") continue elapsed += 1 _, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display: cv2.imshow(self.FLAGS.object_id, postprocessed) if self.FLAGS.push_stream: #im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) self.pipe.stdin.write(postprocessed.tobytes()) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write(' {0:3.3f} FPS '.format(elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display: choice = cv2.waitKey(1) if choice == 27: break cv2.imwrite( '{}_{}_counter.jpg'.format(self.FLAGS.demo, self.FLAGS.object_id), postprocessed) sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera.release() if self.FLAGS.display: cv2.destroyAllWindows() if self.FLAGS.push_stream: self.pipe.stdin.close() self.pipe.wait()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print("VIDEO PROPERTIES:FrameCount:{}\tWidth:{}\tHeight:{}\tFps:{}\t".format(length,width,height,fps)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) #1.BACKGROUND DETECTION backSub = cv2.createBackgroundSubtractorMOG2(history = 400, varThreshold = 16, detectShadows = False) bgMask=None frame_no=0 while True: _, frame = vid.read() frame_no=frame_no+1 try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break #1.1 BACKGROUND Update fgMask = backSub.apply(original_frame) bgMask = backSub.getBackgroundImage() if frame_no % 100==0: print(frame_no) image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class() #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index(class_name)] # Get predicted object index by object name tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function #Save to File box_item=bbox.tolist() + [tracking_id, index,frame_no] ts.save(box_item) # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2-t1) times_2.append(t3-t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times)/len(times)*1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2)/len(times_2)*1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.imwrite(ts.out_bg_img,bgMask) cv2.destroyAllWindows()
def main(yolo, video_path=0, save_path=None): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # openpose w, h = model_wh('0x0') model = 'mobilenet_thin' config = tf.ConfigProto(device_count={'gpu': 0}) config.gpu_options.per_process_gpu_memory_fraction = 0.3 e = TfPoseEstimator(get_graph_path(model), target_size=(64, 64), tf_config=config) # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) video = Video(video_path) # video_capture = cv2.VideoCapture() # 'data/person.mp4' w = video.w h = video.h tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w**2 + h**2))) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(save_path, fourcc, 7, (w, h)) list_file = open(output_dir + 'detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video.video_capture.read() # frame shape 640*480*3 if ret is not True or frame is None: break t1 = time.time() image = Image.fromarray(frame) boxes, scores, _ = yolo.detect_image(image) # start = time.time() features = encoder(frame, boxes) # 提取到每个框的特征 # end = time.time() # print(end-start) detections = [ Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in enumerate(zip(boxes, features)) ] # 保存到一个类中 # Call the tracker tracker.predict() tracker.update(detections, np.asarray(image)) humans = get_keypoints(image, e) frame = draw_humans(image, humans, imgcopy=False) for index, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if save_path is not None: # Define the codec and create VideoWriter object # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxes) != 0: for i in range(0, len(boxes)): list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(boxes[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 # print("fps= %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video.video_capture.release() if save_path is not None: out.release() list_file.close() cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = [], custom_yolo=None, custom_classes=YOLO_CUSTOM_CLASSES, Custom_track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) # set a bunch of flags and variables for made baskets and possessions possession = None possession_list = [] combined_possession_avg = 0.5 total_basket_count=0 basket_frame_list = [] baskets_dict = {"Dark": 0, "Light": 0} made_basket_first_frame = 0 made_basket_frames = 0 basket_marked = False if custom_yolo: NUM_CUSTOM_CLASS = read_class_names(custom_classes) custom_key_list = list(NUM_CUSTOM_CLASS.keys()) custom_val_list = list(NUM_CUSTOM_CLASS.values()) frame_counter = 0 # loop through each frame in video while True: _, frame = vid.read() try: first_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB) frame_counter += 1 except: break image_data = image_preprocess(np.copy(first_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() # CUSTOM BLOCK FOR BASKETBALL if custom_yolo: if YOLO_FRAMEWORK == "tf": # use yolo model to make prediction on the image data custom_pred_bbox = custom_yolo.predict(image_data) # reshape our data to be in correct form for processing custom_pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in custom_pred_bbox] custom_pred_bbox = tf.concat(custom_pred_bbox, axis=0) # get boxes based on threshhold custom_bboxes = postprocess_boxes(custom_pred_bbox, original_frame, input_size, 0.3) # custom_bboxes = nms(custom_bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names custom_boxes, custom_scores, custom_names = [], [], [] for bbox in custom_bboxes: if len(Custom_track_only) !=0 and NUM_CUSTOM_CLASS[int(bbox[5])] in Custom_track_only or len(Custom_track_only) == 0: custom_boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)]) custom_scores.append(bbox[4]) custom_names.append(NUM_CUSTOM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. custom_boxes = np.array(custom_boxes) custom_names = np.array(custom_names) custom_scores = np.array(custom_scores) # take note of the highest "scoring" made basket and basketball obj in each frame highest_scoring_basketball = 0 basketball_box = None basketball_center = None highest_scoring_made_basket = 0 made_basket_box = None for i, bbox in enumerate(custom_bboxes): # loop through each bounding box to get the "best one" of the frame # we do this because sometimes our model will detect two, and we know there can only be one name = custom_names[i] score = round(custom_scores[i], 3) if name == 'basketball': if score > highest_scoring_basketball: highest_scoring_basketball = score basketball_box = bbox if name == 'made-basket': if score > .85 and score > highest_scoring_made_basket: highest_scoring_made_basket = score made_basket_box = bbox # if it sees a basketball, put a box on it and note the center (for possession) if basketball_box is not None: cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1])), (int(basketball_box[2]), int(basketball_box[3])), (0,0,255), 1) cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1]-30)), (int(basketball_box[0])+(10)*17, int(basketball_box[1])), (0,0,255), -1) cv2.putText(original_frame, "basketball" + "-" + str(highest_scoring_basketball),(int(basketball_box[0]), int(basketball_box[1]-10)),0, 0.5, (255,255,255),1) basketball_center = ( (basketball_box[2]+basketball_box[0])/2, (basketball_box[3]+basketball_box[1])/2 ) if made_basket_box is not None: # if theres a made basket put the box on it cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1])), (int(made_basket_box[2]), int(made_basket_box[3])), (0,255,0), 1) cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1]-30)), (int(made_basket_box[0])+(15)*17, int(made_basket_box[1])), (0,255,0), -1) cv2.putText(original_frame, "made-basket" + " " + str(highest_scoring_made_basket),(int(made_basket_box[0]), int(made_basket_box[1]-10)),0, 0.6, (0,0,0),1) if made_basket_frames == 0: # if this is the first frame in the sequence made_basket_first_frame = frame_counter # increment a counter for made basket frames made_basket_frames += 1 # if there were 3 consecuative frames AND we havnt marked the basket yet then lets count it! if made_basket_frames >= 3 and not basket_marked: basket_marked = True basket_frame_list.append(made_basket_first_frame) if possession: # record which "team" scored the basket baskets_dict[possession] += 1 # if no made basket make sure the made basket counter is at zero else: # no made basket made_basket_frames = 0 # 60 frames after a made basket we can reset the "marked basket" flag to False # in essence this means we start looking for made baskets again if basket_marked and frame_counter > basket_frame_list[-1] + 60: basket_marked = False # END CUSTOM BLOCK # PRESON PREDICTION and TRACKING BLOCK if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0: w = bbox[2].astype(int)-bbox[0].astype(int) h = bbox[3].astype(int)-bbox[1].astype(int) if h < height/3 and w < width/4: if h > 120: boxes.append([bbox[0].astype(int), bbox[1].astype(int), w, h]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) # detect jersey color using the tracked persons bounding box patches = [gdet.extract_image_patch(frame, box, [box[3], box[2]]) for box in boxes] color_ratios = [find_color(patch) for patch in patches] features = np.array(encoder(original_frame, boxes)) # mark the detection detections = [Detection(bbox, score, class_name, feature, color_ratio) for bbox, score, class_name, feature, color_ratio in zip(boxes, scores, names, features, color_ratios)] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] color_ratio_list = [] check_possession = False for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue color_ratio = track.get_color_ratio() color_ratio_list.append(color_ratio) bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class() #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index(class_name)] # Get predicted object index by object name tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function # if there is a basketball in the frame, and its "in" a ersons bounding box, check what box it is in for psosession if basketball_center: if basketball_center[0] >= bbox[0] and basketball_center[0] <= bbox[2]: if basketball_center[1] >= bbox[1] and basketball_center[1] <= bbox[3]: check_possession = True if color_ratio <= .2: # light team possession_list.append(0) else: # dark team possession_list.append(1) else: # no basketball in frame # possession_list.append(-1) # test_list.pop(0) pass # if the ball is in a bounding box, update out possession tracker if check_possession: if len(possession_list) > 60: # this function takes an average of the last 60 posessions marked to determine current position # it weights the most recent detections more # this algo is a WIP possession_list = possession_list[-60:] # full_avg = sum(possession_list)/len(possession) last_60_avg = sum(possession_list[-60:])/60 last_30_avg = sum(possession_list[-30:])/30 last_15_avg = sum(possession_list[-15:])/15 last_5_avg = sum(possession_list[-5:])/5 combined_possession_avg = round((last_60_avg + last_30_avg + last_15_avg + last_5_avg)/4,3) #most_common_possession = stats.mode(possession_list)[0] else: combined_possession_avg = round(sum(possession_list)/len(possession_list),3) # use our possession average to determine who has the ball right now if combined_possession_avg < 0.5: possession = "Light" elif combined_possession_avg > 0.5: possession = "Dark" # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, color_ratios=color_ratio_list, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2-t1) times_2.append(t3-t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times)/len(times)*1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2)/len(times_2)*1000) if possession == "Light": image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (50, 255, 255), 2) else: image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # image = cv2.putText(image, "Light: {} Dark: {} None: {}".format(possession_list.count(0), possession_list.count(1), possession_list.count(-1)), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Posession Avg: {}".format(combined_possession_avg), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows() return_data = {"baskets_dict": baskets_dict, "basket_frame_list": basket_frame_list} print("video saved to {}".format(output_path)) return(return_data)
def Object_tracking(Yolo, video_path, output_path, class_names, image_size=416, show=False, rectangle_colors=''): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None # initialize deep sort object model_filename = 'models/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam width, height, fps = get_video_capture_info(vid) codec = cv2.VideoWriter_fourcc(*'XVID') # output_path must be .mp4 out = cv2.VideoWriter(output_path, codec, fps, (width, height)) key_list = list(class_names.keys()) val_list = list(class_names.values()) detection_times, tracking_times = [], [] _, frame = vid.read() # BGR while frame is not None: # create the original_frame for display purposes (draw_bboxes) original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) # preprocessing found in datasets.py img = preprocess_image(frame, image_size) t1 = time.time() boxes, class_inds, scores = yolo_predict(yolo, img, frame) t2 = time.time() names = [] for clss in class_inds: names.append(class_names[clss]) features = np.array(encoder(original_frame, boxes)) # Pass detections to the deepsort object and obtain the track information. detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = get_tracker_info(tracker, val_list, key_list) # update the times information t3 = time.time() detection_times.append(t2 - t1) tracking_times.append(t3 - t1) detection_times = detection_times[-20:] tracking_times = tracking_times[-20:] ms, fps, fps2 = efficiency_statistics(detection_times, tracking_times) # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, class_names, tracking=True, rectangle_colors=rectangle_colors) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # get next frame _, frame = vid.read() # BGR # show and store the results print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
sh.setFormatter(format_str) log_file_folder = os.path.abspath(os.path.join( os.path.dirname(__file__))) + os.sep + "logs" + os.sep make_dir(log_file_folder) log_file_str = log_file_folder + os.sep + "text.log" th = handlers.TimedRotatingFileHandler(filename=log_file_str, when='H', encoding='utf-8') th.setFormatter(format_str) logger.addHandler(sh) logger.addHandler(th) if __name__ == '__main__': # Deep SORT 跟踪器 encoder = generate_detections.create_box_encoder(ARGS.model_feature, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", ARGS.min_score, None) tracker = Tracker(metric) # 载入模型 yolo = YOLO4(ARGS.model_yolo, ARGS.min_score) # 读取摄像头实时图像数据、或视频文件 try: video = cv2.VideoCapture(int(ARGS.video)) except: video = cv2.VideoCapture(ARGS.video) # 输出保存视频 fourcc = cv2.VideoWriter_fourcc(*'XVID')
def main(yolo, video_path=0, save_path=None): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) video = Video(video_path) # video_capture = cv2.VideoCapture() # 'data/person.mp4' w = video.w h = video.h tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w**2 + h**2))) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(save_path, fourcc, 15, (w, h)) list_file = open(output_dir + 'detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video.video_capture.read() # frame shape 640*480*3 if ret is not True or frame is None: break t1 = time.time() image = Image.fromarray(frame) boxes, scores, _ = yolo.detect_image(image) # start = time.time() features = encoder(frame, boxes) # 提取到每个框的特征 # end = time.time() # print(end-start) detections = [ Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in enumerate(zip(boxes, features)) ] # 保存到一个类中 # Call the tracker tracker.predict() tracker.update(detections, np.asarray(image)) frame_index = frame_index + 1 # if frame_index == 50: # cv2.imwrite('output/img/org.jpg', frame) for index, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, track.color, 2) # if frame_index == 50: # cv2.imwrite('output/img/det.jpg', frame) # for index, track in enumerate(tracker.tracks): # if not track.is_confirmed() or track.time_since_update > 1: # continue # bbox = track.to_tlbr() # cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, # 5e-3 * 200, (0, 255, 0), 2) # if frame_index == 50: # cv2.imwrite('output/img/track_{}.jpg'.format(frame_index), frame) cv2.imshow('', frame) if save_path is not None: # Define the codec and create VideoWriter object # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxes) != 0: for i in range(0, len(boxes)): list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(boxes[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 # print("fps= %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video.video_capture.release() if save_path is not None: out.release() list_file.close() cv2.destroyAllWindows()
# output_video_size = 416 border_line = [(0, 400), (1200, 400)] path_track = 20 # how many frames in path are saves detector = YOLOv4( input_shape=(HEIGHT, WIDTH, 3), anchors=YOLOV4_ANCHORS, num_classes=80, training=False, yolo_max_boxes=64, yolo_iou_threshold=0.3, yolo_score_threshold=0.4, ) detector.load_weights("models/yolov4.h5") max_cosine_distance = 0.2 # 03 encoder = gdet.create_box_encoder("models/mars-small128.pb", batch_size=64) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, None) tracker = Tracker(metric) border_lines = {'border1': [[0, 100], [312, 104]]} def drawBorderLines(frame): for b in border_lines: a = border_lines[b][0] b = border_lines[b][1] length = 40 vX0 = b[0] - a[0] vY0 = b[1] - a[1] mag = math.sqrt(vX0 * vX0 + vY0 * vY0) vX = vX0 / mag
layerNames[i[0] - 1] for i in nnet.getUnconnectedOutLayers() ] outputs = nnet.forward(outputNames) return findObjects(outputs, img) #get random colors colors = np.random.randint(0, 255, size=(200, 3), dtype="uint8") #set model path model_filename = 'files/market1501.pb' #initialize encoder encoder = gdet.create_box_encoder(model_filename, batch_size=1) #initializing the metric #parameters for metric max_cosine_distance = 0.5 nn_budget = None metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) #initialising tracker tracker = Tracker(metric) #detecting and tracking objects inp = int(
def deepsort(yolo, args): #nms_max_overlap = 0.3 #nms threshold images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None #create deep_sort box encoder encoder = create_box_encoder(args.deepsort_model_path, batch_size=1) #create deep_sort tracker max_cosine_distance = 0.5 #threshold for cosine distance nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h) boxes, class_names, scores = get_tracking_object( out_boxes, out_classnames, out_scores, tracking_class_names) # get encoded features of bbox area image features = encoder(frame, boxes) # form up detection records detections = [ Detection(bbox, score, feature, class_name) for bbox, score, class_name, feature in zip( boxes, scores, class_names, features) ] # Run non-maximum suppression. #nms_boxes = np.array([d.tlwh for d in detections]) #nms_scores = np.array([d.confidence for d in detections]) #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores) #detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # show all detection result as white box for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(det.class_name), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # record tracking info and get bbox track_indexes.append(int(track.track_id)) total_obj_counter.append(int(track.track_id)) bbox = track.to_tlbr() # show all tracking result as color box color = [ int(c) for c in COLORS[track_indexes[track_count] % len(COLORS)] ] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) if track.class_name: cv2.putText(frame, str(track.class_name), (int(bbox[0] + 30), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track.track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track.track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("DeepSORT", 0) cv2.resizeWindow('DeepSORT', 1024, 768) cv2.imshow('DeepSORT', frame) if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_iou_distance=0.7, max_age=30, n_init=3) writeVideo_flag = False video_capture = cv2.VideoCapture('data/person.mp4') # if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('p_output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret is not True or frame is None: break t1 = time.time() image = Image.fromarray(frame) boxes, scores, _ = yolo.detect_image(image) # nms already done. features = encoder(frame, boxes) detections = [Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in enumerate(zip(boxes, features))] # Call the tracker tracker.predict() tracker.update(detections) for index, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxes) != 0: for i in range(0, len(boxes)): list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str( boxes[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.createBackgroundSubtractorMOG2() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) vid = imageio.get_reader(file, 'ffmpeg') #cv2.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') #assert camera.isOpened(), \ #'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 plt.ion() fig = plt.figure() ax = plt.gca() frame = vid.get_data(0) img_artist = ax.imshow(frame) for num in range(1, 20000): try: frame = vid.get_data(num) print(num) except: break elapsed += 1 #_, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if self.FLAGS.display: #cv2.imshow('', postprocessed) img_artist.set_data(postprocessed) plt.show() plt.pause(0.00001) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() sys.stdout.write('\n') if self.FLAGS.csv: f.close()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) camera = skvideo.io.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None if file == 0: #camera window cv2.namedWindow('', 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow('', width, height) else: _, frame = camera.read() height, width, _ = frame.shape if SaveVideo: if file == 0: #camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = get_fps_rate(file) output_file = 'output_{}'.format(file) if os.path.exists(output_file): os.remove(output_file) videoWriter = skvideo.io.VideoWriter(output_file, fps=fps, frameSize=(width, height)) videoWriter.open() # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while camera.isOpened(): elapsed += 1 _, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img, save=False) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker, save=False) if SaveVideo: videoWriter.write(postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera.release()