def __init__(self, video_capture, czones): self.czones = czones self.fps = int(video_capture.get(cv2.CAP_PROP_FPS)) self.cap = video_capture self.ct = CentroidTracker(maxDisappeared=8) self.mapped_centroid_classes = {} self.tracked_objects_status = {} self.frameid_control = {} self.estimated_speed = {}
def main(args): # Read video frames, fps = read_video(args.video_path) print(f"Read {len(frames)} frames (fps: {fps})") # Read bboxes of each frame json_files = sorted(os.listdir(args.bbox_path), key=lambda x: int(x.split(".")[0])) object_boxes_per_frame = [] for file in json_files: with open(os.path.join(args.bbox_path, file)) as f: data = json.load(f) bboxes = data['children'].copy() object_boxes_per_frame.append(bboxes) print(f"Read {len(object_boxes_per_frame)} bbox files") # Run object tracking centroid_ids_per_frame = [] if args.method == "centroid": ct = CentroidTracker(maxDisappeared=50) for ind in range(len(frames)): rects = [[obj['x1'], obj['y1'], obj['x2'], obj['y2']] for obj in object_boxes_per_frame[ind]] centroid_ids = ct.update(rects) centroid_ids_per_frame.append(centroid_ids.copy()) elif args.method == "kalman": tracker = Sort(max_age=50, min_hits=3) for ind in range(len(frames)): detections = np.array([[ obj['x1'], obj['y1'], obj['x2'], obj['y2'], obj['confidence'] ] for obj in object_boxes_per_frame[ind]]) trackers = tracker.update(detections, None) centroid_ids = [[((track[0] + track[2]) / 2, (track[1] + track[3]) / 2), int(track[4])] for track in trackers] centroid_ids_per_frame.append(centroid_ids) else: raise NotImplementedError print(f"Processed {len(centroid_ids_per_frame)} frames") # Create output video annotated_frames = annotate_frames(frames, object_boxes_per_frame, centroid_ids_per_frame) frames2video(annotated_frames, fps=28, filepath=args.save_path) print("Created output video")
def main(): # Construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--model", required=True, help="path to model file") ap.add_argument("-c", "--confidence", type=float, default=0.95, help="minimum probability to filter weak detections") args = vars(ap.parse_args()) # Initialize our centroid tracker and frame dimensions ct = CentroidTracker() # Load our serialized model from disk print("[INFO] Loading model...") model = get_model(args["model"]) vs = FileVideoStream("/media/ave/ae722e82-1476-4374-acd8-a8a18ef8ae7a/Rana_vids/Nova_1/121/03-2018.05.25_06.24.23-17.mpg").start() time.sleep(2.0) # Loop over the frames from the video stream while vs.more(): # Read the next frame from the video stream frame = vs.read() analyze_frame(frame, ct, model, args["confidence"]) # Cleanup cv2.destroyAllWindows() vs.stop()
def __init__(self): # Instantiate detector self.detector = ObjectDetection() # Set and load model self.model_path = "D:/Final-Year-Project/Object-tracking/models/yolo.h5" self.detector.setModelTypeAsYOLOv3() self.detector.setModelPath(self.model_path) self.detector.loadModel() # Set custom objects self.custom_objects = self.detector.CustomObjects(car=True, motorcycle=True, person=True, bicycle=True, dog=True) self.tracker = CentroidTracker()
def __init__(self, publisher: mqtt_publisher, args): # Start Arguments self.publisher: mqtt_publisher = publisher for k, v in args.items(): if k is "skip_frame": if v is not None: self.skip_frame = int(v) else: self.skip_frame = 5 if k is "min_confidence": if v is not None: self.min_confidence = float(v) else: self.min_confidence = 0.4 if k is "resolution": if v is not None: w, h = v.split(",") self.resolution = (int(w), int(h)) else: self.resolution = (640, 480) if k is "debug": if v is not None: self.debug = v else: self.debug = False # Classes the net Model recognises self.CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] # centroid tracker and some inits self.ct = CentroidTracker(publisher, maxDisappeared=40, maxDistance=50, ) self.targets = [] self.rois = OrderedDict() self.take_snap = False self.fps = None self.s = sched.scheduler(time.time, time.sleep)
def __init__(self, camera_idx, camera_view, res_x=640, res_y=480, view_h=450, view_w=800, origin_distance=0, static_background=None): """ camera_idx: refers to the device number camera_view: can either be "TOP-DOWN" or "FRONT-ON" res_x: refers to camera's horisontal resolution res_x: refers to camera's vertical resolution view_h: height of the camera view window view_w: width of the camera view window origin_distance: distance between camera and origin in cm static_background: background for which newly drawn frames are diffed against for detecting objects """ self.cam = cv2.VideoCapture(camera_idx) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, view_h) self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, view_w) # where the camera view self.camera_view = camera_view assert (self.camera_view == "TOP-DOWN" or "FRONT-ON") self.res_x = res_x self.res_y = res_y self.origin_distance = origin_distance self.pixel_cm_ratio = 0 self.static_background = static_background self.centroid_tracker = CentroidTracker()
RESIZED_HEIGHT = int(FRAME_HEIGHT * RESIZE_SCALAR) TRACKER_REACQUISITION_RANGE = int(settings.trackrange * RESIZE_SCALAR) TRACKER_REACQUISITION_TIME = settings.tracktime DEBUG_MODE = settings.debug CAPTURE_FRAMERATE = settings.framerate # This is a framerate of the .avi recording # Start the stopwatch / counter t1_start = process_time() # Initialize the heads up display controller which draws the UI on top of the frame hud = HUDController(success_area_y=settings.success_area_y, success_area_length=settings.success_area_length, frame_width=RESIZED_WIDTH) # Initialize the centroid tracker which keeps track of the same balls between frames ct = CentroidTracker(TRACKER_REACQUISITION_RANGE, TRACKER_REACQUISITION_TIME) # Initialize the height checker and desired starting height boundary starting_y = FRAME_HEIGHT / 4 * RESIZE_SCALAR starting_height = FRAME_HEIGHT / 10 * RESIZE_SCALAR # Load Yolo net = cv2.dnn.readNet("../yolo/yolov3_training.weights", "../yolo/yolov3_config.cfg") # Enable GPU processing net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) # Name custom object classes = ["juggling ball"]
def main(): ct = CentroidTracker(10) pub.subscribe(face_out_of_frame, 'face_out_of_frame') pub.subscribe(face_in_frame, 'face_in_frame') log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() stream = cv2.VideoCapture(0) model_bin, model_xml = get_face_detection_model(args) model_age_gender_xml, model_age_gender_bin = get_age_gender_detection_model( args) # Plugin initialization for specified device and load extensions library if specified plugin = get_plugin(args) print('running on device', args.device) add_extension_to_plugin(args, plugin) face_detection_net = IENetwork(model=model_xml, weights=model_bin) check_for_unsupported_layers(plugin, face_detection_net) age_gender_net = IENetwork(model=model_age_gender_xml, weights=model_age_gender_bin) check_for_unsupported_layers(plugin, age_gender_net) # /opt/intel/computer_vision_sdk/deployment_tools/intel_models/face-detection-adas-0001/FP32/face-detection-adas-0001.xml log.info("Preparing inputs") face_net_input_blob = next(iter(face_detection_net.inputs)) face_net_output_blob = next(iter(face_detection_net.outputs)) face_detection_net.batch_size = 1 # Read and pre-process input images n, c, h, w = face_detection_net.inputs[face_net_input_blob].shape print('number of images :', n, 'number of channels:', c, 'height of image:', h, 'width of image:', w) # Loading model to the plugin log.info("Loading model ton the plugin") exec_net = plugin.load(network=face_detection_net) age_gender_input_blob = next(iter(age_gender_net.inputs)) # print(face_detection_net.inputs,face_detection_net.outputs,age_gender_net.inputs,age_gender_net.outputs) # print(age_gender_net.outputs,len(age_gender_net.outputs)) age_blob = 'age_conv3' gender_blob = 'prob' age_output = age_gender_net.outputs[age_blob] gender_output = age_gender_net.outputs[gender_blob] print("age,gender,model input specs", age_gender_net.inputs[age_gender_input_blob].shape) agen, agec, ageh, agew = age_gender_net.inputs[age_gender_input_blob].shape print("loading page gender model to the plugin") exec_age_gender_net = plugin.load(network=age_gender_net) while (True): status, image = stream.read() res, initialw, initialh = infer_face(n, c, h, w, image, exec_net, face_net_input_blob) out = res[face_net_output_blob] count = 0 tfaces = np.ndarray(shape=(agen, agec, ageh, agew)) rects = [] for obj in out[0][0]: threshold = obj[2] class_id = int(obj[1]) if threshold > 0.9: count = count + 1 xmin = int(obj[3] * initialw) ymin = int(obj[4] * initialh) xmax = int(obj[5] * initialw) ymax = int(obj[6] * initialh) color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) face = image[ymin:ymax, xmin:xmax] (fh, fw) = face.shape[:-1] if fh < ageh or fw < agew: continue tface = cv2.resize(face, (agew, ageh)) tface = tface.transpose((2, 0, 1)) tfaces[0] = tface t0 = time() out_age_gender = exec_age_gender_net.infer( inputs={face_net_input_blob: tfaces}) print('inferencetime age,gender detection', (time() - t0) * 1000) age, gender, checkedin = get_age_gender( out_age_gender, age_blob, gender_blob) rects.append((xmin, ymin, xmax, ymax, age, gender, checkedin)) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2) print("number of faces in frame", count) if count > 0: x = ct.update(rects) print(list(x.items())) cv2.imshow("face", face) cv2.imshow("Display", image) if cv2.waitKey(1) & 0xFF == ord('q'): break
dest='start_offset', type=int, default=0, help='Start time in seconds.') args = parser.parse_args() cap = cv2.VideoCapture(args.video_source) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) cap.set(cv2.CAP_PROP_POS_MSEC, args.start_offset * 1000) folder_output = args.folder_output if not os.path.exists(folder_output): os.makedirs(folder_output) ct = CentroidTracker(folder_output) start_time = datetime.datetime.now() num_frames = 0 im_width, im_height = (cap.get(3), cap.get(4)) print("Image Width: ", im_width) print("Image Height: ", im_height) # max number of hands we want to detect/track num_hands_detect = 4 cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) while True: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] ret, image_np = cap.read()
class TrackerSpeedEstimator: """TrackerSpeedEstimator class manages the tracking and the speed estimation of elements in a video. Note: Works with CentroidTracker and Control_zone classes Args: czones (list of object class): list of Control_zone object class that have been initialized video_capture (opencv object): opencv video iterator Attributes: czones (list of object class): list of Control_zone object class that have been initialized fps (int): number of frames per second of the video cap (opencv object): opencv video iterator ct (object): CentroidTracker object mapped_centroid_classes (dict): dictionnary of tracked elements and their detected classes (ie {0:'car',1: 'car',2:'truck'} ) tracked_objects_status (dict): dictionnary of tracked elements and their control zone status, idzone belongings and number of displayed time (ie {0: (1, 1, 0), 1: (0, None, 0), 2: (2, 1, 19)} ) frameid_control (dict): dictionnary of tracked elements and their frames ids in the control zone (ie {4: [24, 68, 69], 0: [54, 79, 80, 81, 82], 8: [84, 112]}) estimated_speed (dict): dictionnary of the tracked elements and their estimated speed for each control zone (ie {1: {0: 104.4, 8: 104.4}, 2: {2: 100.8, 6: 127.095} }) """ def __init__(self, video_capture, czones): self.czones = czones self.fps = int(video_capture.get(cv2.CAP_PROP_FPS)) self.cap = video_capture self.ct = CentroidTracker(maxDisappeared=8) self.mapped_centroid_classes = {} self.tracked_objects_status = {} self.frameid_control = {} self.estimated_speed = {} def track(self, detections): """update the tracker with the centroid of the detected elements Args: detections (list of numpy array): list of bounding box coordinates of the detected elements (ie [np.array([252,266,175,112]]),np.array([112,186,375,121]])]) """ # object Tracking self.detections = detections bboxes = [np.array(i[:4]).astype(int) for i in self.detections] self.objects = self.ct.update(bboxes) def map_centroid_class(self): """Maps the tracked objects ids with the detected object classes using centroids and bounding boxes Note : the mapping is realized according the the minimal distance bewteen two centroids """ centroid_bboxes = np.array([((x1 + x2) / 2, ((y1 + y2) / 2)) for x1, y1, x2, y2, conf, cls in self.detections]) for (objectID, centroid) in self.objects.items(): distances = dist.cdist(np.expand_dims(centroid, axis=0), centroid_bboxes) imaped_bbox = distances.argmin(axis=1)[0] self.mapped_centroid_classes[objectID] = self.detections[imaped_bbox][5] def _update_status(self, obj_id, centroid, cz): """update the tracked elements informations to know when an element is not yet in the control zone, is currently in the control zone or has already crossed the control zone. Frame ids are saved for each tracked elements when entering in the control zone and exiting the controle zone Note : status is defined as : 0 - when a tracked element has not crossed any control zone yet, 1 - when a tracked element has crossed the starting zone of a control zone 2 - when a tracked element has crossed the ending zone of the same control zone Args: obj_id (int): object id of the tracked element centroid (tuple of int): x,y coordinates tracked centroid cz (Control_zone object): control zone """ frameid = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES)) if not (obj_id in self.tracked_objects_status.keys()): self.tracked_objects_status[obj_id] = (0, None, 0) status, idczone, ndisplay = self.tracked_objects_status[obj_id] if cz.entering_zone(centroid): # if status != 1: self.tracked_objects_status[obj_id] = (1, cz.idczone, 0) self.frameid_control[obj_id] = [frameid] if idczone == cz.idczone: if cz.exiting_zone(centroid): self.tracked_objects_status[obj_id] = (2, idczone, 0) self.frameid_control[obj_id].append(frameid) def compute_speed(self): """Compute the speed for each tracked elements crossing each control zone Note : only elements with status = 2 are measured using information of the entering and exiting frame ids knowing the length of between the entering and the exiting zone , the number of frames in the control zone and the frame rate, we can estimate the speed of the object. """ for czone in self.czones: if not (czone.idczone in self.estimated_speed.keys()): self.estimated_speed[czone.idczone] = {} for (objectID, centroid) in self.objects.items(): self._update_status(objectID, centroid, czone) status, idczone, ndisplay = self.tracked_objects_status[objectID] if status == 2 and (idczone == czone.idczone): n_present_frames = self.frameid_control[objectID][-1] - self.frameid_control[objectID][0] speed = ((czone.ckzn_d / (n_present_frames / self.fps)) * 3600) / 1000 # km/h self.estimated_speed[idczone].update({objectID: speed}) def display_speed(self, img, ndisplay_frames=20): """Displays the speed of each measured objects and the average speed for each control zone Note : only the speed of status = 2 elements is displayed during ndisplay_frames If an element if over the speed limit, the speed is displayed as red Args: img (numpy 2D array): input image ndisplay_frames (int): maximum number of displayed speed frames for each tracked objects """ speedlimits = {} for czone in self.czones: speedlimits[czone.idczone] = czone.speedlimit shape = img.shape[:2] for (objectID, centroid) in self.objects.items(): status, idczone, ndisplay = self.tracked_objects_status[objectID] if status == 2: if ndisplay <= ndisplay_frames: speed = self.estimated_speed[idczone][objectID] centroid = self.objects[objectID] cv2.putText(img, "{0:.1f} : km/h".format(speed), (centroid[0] - 15, centroid[1] + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, over_speed_color((0, 255, 0), speed, speedlimits[idczone]), 1) self.tracked_objects_status[objectID] = (status, idczone, ndisplay + 1) else: self.tracked_objects_status[objectID] = (0, None, 0) i = 0 for czone in self.czones: idczone = czone.idczone if len(self.estimated_speed[idczone]) > 0: mspeed = np.array(list(self.estimated_speed[idczone].values())).mean() offset_r, offset_c = offset_loc(czone.draw_loc) x, y = ( (shape[1] // 2) + (offset_c * (shape[1] // 4)), -((shape[0] // 2) - 20) * offset_r + (shape[0] // 2)) cv2.rectangle(img, (x - 10, y + 5), (x + 170, y - 15), czone.col, -1) cv2.putText(img, "Avg : {0:.1f} : km/h".format(mspeed), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) i += 1 def display_tracking(self, img): """Displays the centroid and the IDs of the tracked objects Args: img (numpy 2D array): input image """ for (objectID, centroid) in self.objects.items(): col = (0, 255, 0) status, _, _ = self.tracked_objects_status[objectID] if status == 1: col = (255, 255, 255) # draw both the ID of the object and the centroid of the # object on the output frame text = "ID {}".format(objectID) cv2.putText(img, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, col, 2) cv2.circle(img, (centroid[0], centroid[1]), 4, col, -1)
return cap.isOpened() if args.video_path != '' else True def get_frame(): if args.video_path != '': return cap.read() else: in_Frame = qOut_Frame.get() frame = in_Frame.getCvFrame() return True, frame startTime = time.monotonic() detections = [] frame_count = 0 counter = [0, 0, 0, 0] # left, right, up, down ct = CentroidTracker(maxDisappeared=40, maxDistance=50) trackableObjects = {} def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray: return cv2.resize(arr, shape).transpose(2, 0, 1).flatten() while should_run(): # Get image frames from camera or video file read_correctly, frame = get_frame() if not read_correctly: break if args.video_path != '': # Prepare image frame from video for sending to device img = dai.ImgFrame() img.setData(to_planar(frame, (300, 300)))
class Camera: def __init__(self, publisher: mqtt_publisher, args): # Start Arguments self.publisher: mqtt_publisher = publisher for k, v in args.items(): if k is "skip_frame": if v is not None: self.skip_frame = int(v) else: self.skip_frame = 5 if k is "min_confidence": if v is not None: self.min_confidence = float(v) else: self.min_confidence = 0.4 if k is "resolution": if v is not None: w, h = v.split(",") self.resolution = (int(w), int(h)) else: self.resolution = (640, 480) if k is "debug": if v is not None: self.debug = v else: self.debug = False # Classes the net Model recognises self.CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] # centroid tracker and some inits self.ct = CentroidTracker(publisher, maxDisappeared=40, maxDistance=50, ) self.targets = [] self.rois = OrderedDict() self.take_snap = False self.fps = None self.s = sched.scheduler(time.time, time.sleep) def read_config(self): self.targets = [] self.rois = OrderedDict() with open("config.json") as json_file: data = json.load(json_file) for t in data["target"]: self.targets.append(t) for roi in data["ROI"]: name = roi["name"] (x, y, w, h) = roi["coordinates"] self.rois[name] = (int(x), int(y), int(w), int(h)) self.ct.update_key_centroids(self.rois) def reload_config(self): self.read_config() def snapshot(self): self.take_snap = True def publish_snap(self, img): val, buffer = cv2.imencode(".jpg", img) encoded = base64.b64encode(buffer) packet_size = 3000 start = 0 end = packet_size length = len(encoded) pic_id = "snapshot_{}".format(length % 100) pos = 0 packet_number = math.ceil(length / packet_size) - 1 while start <= length: data = { "data": str(encoded[start:end]), "pic_id": pic_id, "pos": pos, "packet_number": packet_number } print("sending {}/{}".format(pos, packet_number)) self.publisher.publish(json.dumps(data), "test/test/snapshot") end += packet_size start += packet_size pos = pos + 1 time.sleep(0.2) def publish_fps(self): self.fps.stop() if self.fps.elapsed() <= 0: pass else: fps = { "time_elapsed": int(self.fps.elapsed()), "fps": int(self.fps.fps()) } self.publisher.publish(json.dumps(fps), "test/test/fps") self.fps = FPS().start() def publish_online(self, sc): msg = { "online": True } self.publisher.publish(json.dumps(msg), "test/test/status") self.s.enter(60, 1, self.publish_online, (sc,)) def run_camera(self): totalFrames = 0 trackers = [] trackableObjects = {} W = None H = None class_list = [] self.read_config() self.s.enter(1, 1, self.publish_online, (self.s,)) # Video Source # vid_capture = cv2.VideoCapture(0) vid_capture = cv2.VideoCapture("../../img/in/campus4-c1.avi") # Load Model print("[INFO] loading model...") net = cv2.dnn.readNetFromCaffe("mobilenet_ssd/MobileNetSSD_deploy.prototxt", "mobilenet_ssd/MobileNetSSD_deploy.caffemodel") print("Done") # Start FPS counter self.fps = FPS().start() print("Running...") error = False while True: _, frame = vid_capture.read() # if end of video if frame is None: print("no Frame") vid_capture = cv2.VideoCapture("../../img/in/campus4-c1.avi") _, frame = vid_capture.read() # error = True # break # resize and convert to rgb for dlib frame = cv2.resize(frame, self.resolution, interpolation=cv2.INTER_AREA) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # set frame dimensions if W is None or H is None: (H, W) = frame.shape[:2] # snapshot if self.take_snap: self.publish_snap(frame) self.take_snap = False continue # init bounding box rectangles rects = [] # Only search for objects every 5 frames if totalFrames % self.skip_frame == 0: totalFrames = 0 # init new set of trackers trackers = [] class_list = [] # convert the frame to a blob and pass the blob through the # network and obtain the detections blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5) net.setInput(blob) detections = net.forward() # loop over the detections for i in np.arange(0, detections.shape[2]): # extract the confidence (i.e., probability) associated # with the prediction confidence = detections[0, 0, i, 2] if confidence > self.min_confidence: # if the class label is not a person, ignore it idx = int(detections[0, 0, i, 1]) target = self.CLASSES[idx] if not self.targets.__contains__(target): continue # compute the (x, y)-coordinates of the bounding box box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) (start_x, start_y, end_x, end_y) = box.astype("int") # make a dlib rectangle object and start the dlib tracker tracker = dlib.correlation_tracker() rect = dlib.rectangle(start_x, start_y, end_x, end_y) tracker.start_track(rgb, rect) trackers.append(tracker) class_list.append(target) # use tracker during skipped frames, not object recognition else: for tracker in trackers: # update the tracker and grab the updated position tracker.update(rgb) pos = tracker.get_position() # unpack position object start_x = int(pos.left()) start_y = int(pos.top()) end_x = int(pos.right()) end_y = int(pos.bottom()) # add the box coordinates to the rectangles list rects.append((start_x, start_y, end_x, end_y)) # use centroids to match old and new centroids and then loop over them (objects, class_dict) = self.ct.update(rects, class_list) for (object_id, centroid) in objects.items(): # check if object is in the object list t_object = trackableObjects.get(object_id, None) # if there is no existing trackable object, create one if t_object is None: t_object = TrackableObject(object_id, centroid, class_dict[object_id]) else: t_object.centroids.append(centroid) # store the trackable object in our dictionary trackableObjects[object_id] = t_object # draw Centroid text = "ID {}".format(object_id) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) for roi in self.rois: draw_roi(frame, roi, self.rois[roi]) # Debugging if self.debug: cv2.imshow("Tracking", frame) totalFrames += 1 self.fps.update() k = cv2.waitKey(5) & 0xFF if k == 27: # Esc break self.publish_fps() self.fps.stop() cv2.destroyAllWindows() vid_capture.release() print("Camera stopped") return True, error
# dict.get(key, return value if the specified key doesn't exit) # which means if "input" is None, it will return False if not args.get("input", False): print("starting video stream") vs = VideoStream(src=0).start() # webcam warmup time time.sleep(2.0) else: print("opening the specified video file") vs = cv2.VideoCapture(args["input"]) writer = None W = None H = None cent_tracker = CentroidTracker(max_frames_to_disappear=40, max_distance=50) trackers = [] trackable_objs_dict = {} total_frames = 0 total_downs = 0 total_ups = 0 fps = FPS().start() while True: frame = vs.read() frame = frame[1] if args.get("input", False) else frame # if the optinal input is set and the obtained frame is None, # it means the frame reached at the end of the video
def playback(): # handle each group separately, good for making DB for cams, group_name, fps in cams_groups: # delete everything we know tracked_objects.clear() tracked_objects_reid.clear() if config.keep_track_targeted: build_known_people() players = [ PicturePlayback(camera, fps, not config.playback_realtime) for camera in cams ] # players = [CameraPlayback()] # player = [VideoPlayback('video.avi')] # players = [YoutubePlayback('https://www.youtube.com/watch?v=N79f1znMWQ8')] # centroid tracker for each camera to keep track of IDs after new detection centroid_tracker = [ CentroidTracker(0, config.centroid_max_distance) for _ in range(len(cams)) ] correlation_trackers = [[] for _ in range(len(cams))] # start playback for player in players: player.start() frame_index = 0 while any([player.is_playing() for player in players]): frames = [player.get_frame() for player in players] # make them all to have the same length # current frame for each camera for camera_i, frame in enumerate(frames): if frame is None: continue # rectangles of detected people in current frame frame_copy = frame.copy() bodies = [] # should we try detecting again? should_detect = (frame_index % config.detect_frequency == 0) if should_detect: # detect rectangles bodies = detect.detect_people(frame) correlation_trackers[camera_i] = build_trackers( bodies, frame) else: # track rectangles for tracker in correlation_trackers[camera_i]: tracker.update(frame) pos = tracker.get_position() bodies.append((int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom()))) # get rectangles with IDs assigned to them detected_objects = centroid_tracker[camera_i].update( bodies, should_detect) for (track_id, (x1, y1, x2, y2)) in detected_objects.items(): # should face and body samples be saved from the current frame? should_sample = (frame_index % config.detect_frequency == 0) # should we try to find match for newly detected people with known people? should_reid_known = (frame_index % config.detect_frequency == 0) and len(known_objects) > 0 # should we try to find match for newly detected people? should_reid = (frame_index % config.detect_frequency == 0) # TODO: clear these checks # fix out of image x2, x1, y2, y1 = min(x2, frame.shape[1]), max(x1, 0), min( y2, frame.shape[1]), max(y1, 0) # too small, ignore if x2 - x1 < 10 or y2 - y1 < 20: continue # convert from internal track_id to actual person_id while True: new_id = tracked_objects_reid.get(track_id, None) if new_id is None: break track_id = new_id person_track = tracked_objects.get(track_id, None) if person_track is None: person_track = known_objects.get(track_id, None) cropped_body = frame[y1:y2, x1:x2] if person_track is None: if should_detect: logging.info( 'PLAYBACK: new person {} detected in camera {}' .format(track_id, camera_i)) person_track = PersonTrack(track_id, n_cameras) tracked_objects[track_id] = person_track # sample for re-ID should_sample = True # try to find whether we have seen this person before should_reid = True elif should_detect: # compare to self just in case it's actually a new person if config.reid_same: test_id = centroid_tracker[camera_i].next_id test_track = PersonTrack(test_id, n_cameras) test_track.add_body_sample(cropped_body, frame_index, camera_i) face = detect.get_face(cropped_body) if face is not None: test_track.add_face_sample( face, frame_index, camera_i) self_compare = recognize.compare_to_detected( test_track, {track_id: person_track}) # same centroid but persons don't match if self_compare is None: # re-id this centroid because i'ts not the same person centroid_tracker[camera_i].reid( track_id, test_id) CentroidTracker.next_id += 1 tracked_objects[test_id] = test_track track_id = test_id person_track = test_track logging.info( 'PLAYBACK: new person {} detected in camera {}' .format(track_id, camera_i)) should_sample = False should_reid = True # TODO: re-IDed person should be re-IDed again, because A1==A2 =never match= B1==B2 # don't re-ID people who were re-IDed before, so there are no cycles in detection should_reid = should_reid and not person_track.was_reided( ) should_reid_known = should_reid_known and not person_track.is_known( ) if should_sample: person_track.add_body_sample(cropped_body, frame_index, camera_i) # try to find face of this person face = detect.get_face(cropped_body) if face is not None: person_track.add_face_sample( face, frame_index, camera_i) compare_to_array = [] if should_reid_known: compare_to_array.append( (known_objects, config.known_required_match_percent)) if should_reid: compare_to_array.append( (tracked_objects, config.required_match_percent)) for compare_to, required_match in compare_to_array: same_person_id = recognize.compare_to_detected( person_track, compare_to, required_match) if same_person_id is not None and same_person_id != track_id: # get track of person we matched same_person_track = compare_to.get(same_person_id) # merge information same_person_track.merge(person_track) # we only need one track, the one that doesn't have less information tracked_objects.pop(track_id) # re-ID from trackers ID to person ID tracked_objects_reid[track_id] = same_person_id # update values track_id = same_person_id person_track = same_person_track person_track.reid() break elif same_person_id == track_id: # this is an error and should never happened :) logging.error( 'PLAYBACK: comparing and matching same person {}, something is wrong' .format(track_id)) # we do not keep track of this person, delete him # TODO: creating the instance is quite unnecessary, but not 'that' costly... if not config.keep_track_all and not person_track.is_known( ): del tracked_objects[track_id] # display information on screen # TODO: maybe add face? but tracking it is unnecessary and we detect in irregularly, so probably not cv2.rectangle(frame_copy, (x1, y1), (x2, y2), (255, 0, 0), 1) cv2.putText(frame_copy, person_track.get_name(), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) cv2.imshow('Camera {}'.format(camera_i), frame_copy) # TODO: better quitting but it's OK for now if cv2.waitKey(1) & 0xFF == ord('q'): break frame_index += 1 cv2.destroyAllWindows() # build database from collected information if config.build_dataset: improve.build_new_dataset(group_name, tracked_objects) logging.info('PLAYBACK: Playback finished')
import time from annotation import Annotator import numpy as np import picamera from PIL import Image from tflite_runtime.interpreter import Interpreter import dlib CAMERA_WIDTH = int(640 / 2) CAMERA_HEIGHT = int(480 / 2) ct = CentroidTracker() def load_labels(path): """Loads the labels file. Supports files with or without index numbers.""" with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() labels = {} for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): labels[int(pair[0])] = pair[1].strip() else: labels[row_number] = pair[0].strip() return labels
from imutils.video import VideoStream, FPS import face_recognition import imutils import pickle import cv2 import time from centroid_tracker import CentroidTracker tracker = CentroidTracker() FRAMES_TO_TRACK = 50 print("Loading encodings") data = pickle.loads(open("encodings.pkl", "rb").read()) detector = cv2.CascadeClassifier("/usr/local/share/OpenCV/lbpcascades/lbpcascade_frontalface_improved.xml") print("Starting video stream") vs = VideoStream(usePiCamera=True).start() time.sleep(2.0) counter = 0 names = [] start = time.time() fps_counter = 0 while True: fps_counter += 1 counter += 1 frame = vs.read() frame = imutils.resize(frame, width=500) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) rects = detector.detectMultiScale(gray, 1.1, 5) objects = tracker.update(rects)
class LiveDetector: def __init__(self): # Instantiate detector self.detector = ObjectDetection() # Set and load model self.model_path = "D:/Final-Year-Project/Object-tracking/models/yolo.h5" self.detector.setModelTypeAsYOLOv3() self.detector.setModelPath(self.model_path) self.detector.loadModel() # Set custom objects self.custom_objects = self.detector.CustomObjects(car=True, motorcycle=True, person=True, bicycle=True, dog=True) self.tracker = CentroidTracker() def track_objects(self, frame): rects = [] names = [] data = {} frame = self.pixelate_frontyard((100, 100), frame) returned_image, detection = self.detector.detectCustomObjectsFromImage( custom_objects=self.custom_objects, input_image=frame, output_type="array", input_type="array") for eachObject in detection: rects.append(eachObject["box_points"]) names.append(eachObject["name"]) (startX, startY, endX, endY) = eachObject["box_points"] cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) self.blur_object((startX, startY), (endX, endY), (11, 11), frame) objects = self.tracker.update(rects, names) if objects is not None: for objectID, objectDetails in objects.items(): # draw both the ID of the object and the centroid of the # object on the output frame centroid = objectDetails[0] name = objectDetails[1] if self.tracker.disappeared[objectID] < 1: text = name + " " + str(objectID) cv2.putText(frame, text, (centroid[0] - 30, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) data[name] = objectID return frame, data def blur_object(self, topLeft, bottomRight, kSize, frame): x, y = topLeft[0], topLeft[1] w, h = bottomRight[0] - topLeft[0], bottomRight[1] - topLeft[1] ROI = frame[y:y + h, x:x + w] blur = cv2.GaussianBlur(ROI, kSize, 0) frame[y:y + h, x:x + w] = blur def blur_frontyard(self, kSize, frame): height, width, channel = frame.shape ROI_corners = np.array([[(320, 490), (895, 320), (895, height), (320, height)]], dtype=np.int32) blurred_frame = cv2.GaussianBlur(frame, kSize, 0) mask = np.zeros(frame.shape, dtype=np.uint8) ignore_mask_color = (255, ) * channel cv2.fillPoly(mask, ROI_corners, ignore_mask_color) mask_inverse = np.ones(mask.shape).astype(np.uint8) * 255 - mask frame = cv2.bitwise_and(blurred_frame, mask) + cv2.bitwise_and( frame, mask_inverse) return frame def pixelate_frontyard(self, kSize, frame): height, width, channel = frame.shape w, h = kSize ROI_corners = np.array([[(320, 490), (895, 320), (895, height), (320, height)]], dtype=np.int32) temp = cv2.resize(frame, (w, h), interpolation=cv2.INTER_LINEAR) pixelated_frame = cv2.resize(temp, (width, height), interpolation=cv2.INTER_NEAREST) mask = np.zeros(frame.shape, dtype=np.uint8) ignore_mask_color = (255, ) * channel cv2.fillPoly(mask, ROI_corners, ignore_mask_color) mask_inverse = np.ones(mask.shape).astype(np.uint8) * 255 - mask frame = cv2.bitwise_and(pixelated_frame, mask) + cv2.bitwise_and( frame, mask_inverse) return frame
from imutils.video import VideoStream import numpy as np import argparse import time import cv2 ap = argparse.ArgumentParser() ap.add_argument("-p", "--prototxt", default="face_model/deploy.prototxt", \ help="path to Caffe prototxt file") ap.add_argument("-m", "--model", default="face_model/res10_300x300_ssd_iter_140000.caffemodel",\ help="path to Caffe pre-trained model") ap.add_argument("-c", "--confidence", type=float, default=0.5,\ help="detection threshold") args = vars(ap.parse_args()) cent_tracker = CentroidTracker() (H, W) = (None, None) print("loading the model") net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) print("start video streaming") vs = VideoStream(src=0).start() # to warm up the camera, wait 2.0 sec time.sleep(2.0) while True: frame = vs.read() frame = imutils.resize(frame, width=400) # this statement is used for the initializer step
class Camera: """Camera Class Default values based on Playstation Eye camera""" def __init__(self, camera_idx, camera_view, res_x=640, res_y=480, view_h=450, view_w=800, origin_distance=0, static_background=None): """ camera_idx: refers to the device number camera_view: can either be "TOP-DOWN" or "FRONT-ON" res_x: refers to camera's horisontal resolution res_x: refers to camera's vertical resolution view_h: height of the camera view window view_w: width of the camera view window origin_distance: distance between camera and origin in cm static_background: background for which newly drawn frames are diffed against for detecting objects """ self.cam = cv2.VideoCapture(camera_idx) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, view_h) self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, view_w) # where the camera view self.camera_view = camera_view assert (self.camera_view == "TOP-DOWN" or "FRONT-ON") self.res_x = res_x self.res_y = res_y self.origin_distance = origin_distance self.pixel_cm_ratio = 0 self.static_background = static_background self.centroid_tracker = CentroidTracker() def object_vector(self, centroid): """Method for determining the vector between the mid point of the screen""" object_x = centroid[0] object_y = centroid[1] y = object_x - self.res_x // 2 # There may be a bit of confusion around the variable names # the assigned x, y and z refer to vector co-ordinates. Otherwise # I am refering to the x, y position in the camera view if self.camera_view == "TOP-DOWN": x = object_y - self.res_y // 2 z = None else: x = None z = object_y - self.res_y // 2 return (x, y, z) def draw_bounding_boxs(self, frame): """Method for drawing bounding boxes around objects different to the static background """ diff = cv2.absdiff(self.static_background, frame) gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(src=gray, ksize=(5, 5), sigmaX=0) rects = [] _, thresh = cv2.threshold(src=blur, thresh=70, maxval=255, type=cv2.THRESH_BINARY) dilated = cv2.dilate(src=thresh, kernel=None, iterations=3) contours, _ = cv2.findContours(image=dilated, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE) for i, contour in enumerate(contours): (x, y, w, h) = cv2.boundingRect(contour) # in the case that the area of the difference is to small, i.e. # inconsistencies due to lighting, the loop will continue to the # next iteration if cv2.contourArea(contour) < 900: continue # create numpy array out of each corner of the bounding box box_bounds = np.array([x, x + w, y, y + h]) rects.append(box_bounds) # coordinates of the centroid centroid = (x + w // 2, y + h // 2) cv2.rectangle( img=frame, pt1=(x, y), # bottom left coord pt2=(x + w, y + h), # top right coord color=(255, 0, 0), thickness=2) cv2.circle(img=frame, center=(centroid), radius=5, color=(255, 0, 0), thickness=2) # update the create a new instance of the object dictionary object_dict = self.centroid_tracker.update_objects(rects) object_vectors = {} for obj_id, centroid in object_dict.items(): text = "ID %s" % {obj_id} cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) partial_vector = self.object_vector(centroid) # redefine the centroid as a partial vector (a vector that is # missing one of it's value's) object_vectors[obj_id] = partial_vector return frame, object_vectors def __del__(self): self.cam.release() @staticmethod def merge_vectors(vector_a, vector_b): """Static method for merging 2 given vectors inputs come in tuple form with the assumption that both x values will be equal, due to the decided upon camera layout. """ n_x = vector_a[0] n_y = vector_a[1] if vector_a[1] is not None else vector_b[1] n_z = vector_a[2] if vector_a[2] is not None else vector_b[2] return (n_x, n_y, n_z) @staticmethod def vector_distance(self, v): return sqrt([i**2 for i in v])
def process_image(img, img_index): # get an image from 'img' directory template = cv2.imread("./ball-img/{}".format(imgs[img_index])) # original image to draw rectangles on og_img = img # define region of interest roi_xy1 = (450, 219) roi_xy2 = (830, 549) img = img[roi_xy1[1]:roi_xy2[1], roi_xy1[0]:roi_xy2[0]] # make the image easier to work with #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #img = cv2.GaussianBlur(img, (11, 11), 0) #img = getCanny(img) """ # find circles circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1.2, 10) # ensure at least some circles were found if circles is not None: # convert the (x, y) coordinates and radius of the circles to integers circles = np.round(circles[0, :]).astype("int") # loop over the (x, y) coordinates and radius of the circles for (x, y, r) in circles: # draw the circle in the output image, then draw a rectangle # corresponding to the center of the circle cv2.circle(og_img, (x + roi_xy1[0], y + roi_xy1[1]), r, (0, 255, 0), 4) cv2.rectangle(og_img, (x - 5 + roi_xy1[0], y - 5 + roi_xy1[1]), (x + 5 + roi_xy1[0], y + 5 + roi_xy1[1]), (0, 128, 255), -1) """ # find matches minimized = False try: img = cv2.matchTemplate(template, img, cv2.TM_CCOEFF_NORMED) except Exception as ex: minimized = True print("The window cannot be minimized...") # wait until user reopens window while minimized: try: img = screen_cap(hwnd=hwnd) img = cv2.matchTemplate(template, img, cv2.TM_CCOEFF_NORMED) except Exception as ex: pass else: print("Resuming...") sleep(1) break # get location and confidence min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(img) # only draw rectangle and calc location if the confidence is greater than 0.6 if max_val > 0.65: # convert location to be relative to og_img top_left = (max_loc[0] + roi_xy1[0], max_loc[1] + roi_xy1[1]) bottom_right = (top_left[0] + template.shape[1], top_left[1] + template.shape[0]) # object tracking with centroid_tracker ct = CentroidTracker() (H, W) = (None, None) cv2.rectangle(og_img, top_left, bottom_right, color=(0, 0, 255), thickness=2, lineType=cv2.LINE_4) print("Ball location: ", max_loc) print("Confidence: ", max_val) return og_img
from centroid_tracker import CentroidTracker import numpy as np import imutils import time import cv2 import os.path ct = CentroidTracker() (H, W) = (None, None) model = cv2.dnn.readNetFromCaffe( "deploy.prototxt", "res10_300x300_ssd_iter_140000_fp16.caffemodel") vc = cv2.VideoCapture(0) time.sleep(2.0) isAvailable, frame = vc.read() count = 0 while True and isAvailable: isAvailable, frame = vc.read() frame = imutils.resize(frame, width=400) # cv2.imshow("d", frame[40:320, 132:390]) if W is None or H is None: (H, W) = frame.shape[:2] blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (104.0, 177.0, 123.0)) model.setInput(blob) detections = model.forward() rects = [] for i in range(0, detections.shape[2]): if detections[0, 0, i, 2] > 0.5: