def __init__(self, opt): self.vdo = cv2.VideoCapture() #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.write_video = True
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.class_names = self.yolo3.class_names
def __init__(self, args): self.args = args args.display = False if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.centernet = detector = detector_factory[opt.task](opt) # self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint, args.model_name)
def __init__(self, opt): #self.vdo = cv2.VideoCapture() #self.yolo_info = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") # self.deepsort = DeepSort("deep/checkpoint/ori_net_last.pth") self.write_video = True
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.detectron2 = Detectron2() self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda)
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) self.vdo = cv2.VideoCapture() self.detectron2 = Detectron2() # Initialize coordinate mapper myCoordMapper = coord_mapper.CoordMapper(coord_mapper.ISSIA_kozep_elorol) self.deepsort = DeepSort(args.deepsort_checkpoint, lambdaParam=1.0, coordMapper=myCoordMapper, max_dist=1.0, min_confidence=0.1, nms_max_overlap=0.7, max_iou_distance=0.7, max_age=75, n_init=3, nn_budget=50, use_cuda=use_cuda)
def __init__(self, detections_file: str, resolution: tuple, fps: int, input_images_dir: str, output_video_path: str, output_result_path: str, use_cuda: bool, lambdaParam: float, max_dist: float, min_confidence: float, nms_max_overlap: float, max_iou_distance: float, max_age: int, n_init: int, nn_budget: int, model_path='deep_sort/deep/checkpoint/ckpt.t7', early_stopping=None): self.detections_file = detections_file # A pickle fájl amiben az összes detekció benne van self.input_images_dir = input_images_dir # A mappa ahol a 2.5K-s képek vannak {frameNum}.jpg formátumban self.output_video_path = output_video_path # Ahova a vizualizálandó videót mentem self.output_result_path = output_result_path # Ahová a kimenetet mentem CSV formátumba self.early_stopping = early_stopping assert self.output_result_path is not None and self.detections_file is not None self._use_cuda = use_cuda self.fps = fps self.resolution = resolution # Initialize coordinate mapper self.myCoordMapper = coord_mapper.CoordMapperCSG( match_code='HUN-BEL 1. Half') self.deepsort = DeepSort(model_path=model_path, lambdaParam=lambdaParam, coordMapper=self.myCoordMapper, max_dist=max_dist, min_confidence=min_confidence, nms_max_overlap=nms_max_overlap, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init, nn_budget=nn_budget, use_cuda=self._use_cuda, resolution=(self.resolution[0] * 2, self.resolution[1]), fps=self.fps)
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) params = Params(f'projects/{self.args.project}.yml') self.cam_id = 1 if args.display: pass # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.class_names = load_class_names('data/coco.names') self.submit = True self.object_list = []
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.yolo3 = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.class_names = self.yolo3.class_names self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] bbox_xywh, cls_conf, cls_ids = self.yolo3(im) #bbox_xyxy = torch.zeros_like(bbox_xywh, dtype=bbox_xywh.dtype) #bbox_xyxy[0] = bbox_xywh[:,0]-bbox_xywh[:,2]/2 #bbox_xyxy[1] = bbox_xywh[:,1]-bbox_xywh[:,3]/2 #bbox_xyxy[2] = bbox_xywh[:,0]+bbox_xywh[:,2]/2 #bbox_xyxy[3] = bbox_xywh[:,1]+bbox_xywh[:,3]/2 if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def __init__(self, args): self.args = args if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.vdo = cv2.VideoCapture() self.yolo3 = InferYOLOv3(args.yolo_cfg, args.img_size, args.yolo_weights, args.data_cfg, device, conf_thres=args.conf_thresh, nms_thres=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.classes
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.class_names = self.yolo3.class_names self.resnet3d_model, self.resnet_spatial_transform = get_resnet_model()
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): frame_no +=1 _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] results = test_net(im, net, detector, args.cuda, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.4) # RFBNet使用教程 bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) if not args.image_input: self.vdo = cv2.VideoCapture() cfg = get_cfg() #cfg.merge_from_file("detectron2_repo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml") #cfg.MODEL.WEIGHTS = "detectron2://COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl" cfg.merge_from_file("../detectron2_repo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml") cfg.MODEL.WEIGHTS = args.detectron2_weights #"detectron2://Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl" cfg.MODEL.MASK_ON = False cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 #cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 self.predictor = DefaultPredictor(cfg) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda, extractor_type=args.extractor_type, game_id=args.game_id, team_0=args.team_0)
def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) #self.vdo = cv2.VideoCapture() self.imgList = natsort.natsorted(glob.glob(self.args.imgs_path)) self.detectron2 = Detectron2() # Initialize coordinate mapper self.myCoordMapper = coord_mapper.CoordMapperCSG( match_code='HUN-BEL 2. Half') self.fps = 6 self.deepsort = DeepSort(args.deepsort_checkpoint, lambdaParam=0.6, coordMapper=self.myCoordMapper, max_dist=1.0, min_confidence=0.1, nms_max_overlap=0.7, max_iou_distance=0.7, max_age=self.fps * 3, n_init=3, nn_budget=50, use_cuda=use_cuda)
def tracking(queue_items: mp.Queue, area): txt_writer = open(txt_path, 'wt') deepsorts = [] for i in range(5): deepsort = DeepSort("deep/checkpoint/ckpt.t7") deepsort.extractor.net.share_memory() deepsorts.append(deepsort) xmin, ymin, xmax, ymax = area while True: try: queue_item = queue_items.get(block=True, timeout=3) except queue.Empty: print('Empty queue. End?') break batch_results = queue_item.detect_results imgs = queue_item.imgs ori_imgs = queue_item.ori_imgs frame_ids = queue_item.frame_ids for batch_idx, results in enumerate(batch_results): # frame by frame for class_id in [1, 2, 3, 4]: bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results, class_id) if (bbox_xywh is not None) and (len(bbox_xywh) > 0): outputs = deepsorts[class_id].update( bbox_xywh, cls_conf, imgs[batch_idx]) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (xmin, ymin) if is_write: ori_im = draw_bboxes(ori_imgs[batch_idx], bbox_xyxy, identities, class_id, offset=(xmin, ymin)) for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] idx = int( identities[i]) if identities is not None else 0 txt_writer.write( f'{frame_ids[batch_idx]} {class_id} {idx} {x1} {y1} {x2} {y2}\n' ) txt_writer.close()
class Detector(object): def __init__(self, centernet_opt, args): # CenterNet detector self.detector = detector_factory[centernet_opt.task](centernet_opt) # Deep SORT self.deepsort = DeepSort(args.deepsort_checkpoint, args.max_cosine_distance, args.use_cuda) self.args = args def run(self, video_path, output_path): # open input video assert os.path.isfile(video_path), "Error: invalid video path" vdo = cv2.VideoCapture() vdo.open(video_path) # open output video im_width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) im_height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*"MJPG") output_vdo = cv2.VideoWriter(output_path, fourcc, 20, (im_width, im_height)) # track each frame in video start_time = time.time() frame_cnt = 0 while vdo.grab(): frame_cnt += 1 _, ori_im = vdo.retrieve() im = ori_im[0:im_height, 0:im_width] detection = self.detector.run(im)["results"][1] bbox_xywh, conf = Detector._bbox_to_xywh_cls_conf(detection, self.args.min_confidence) outputs = self.deepsort.update(bbox_xywh, conf, im) if(len(outputs) > 0): bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) output_vdo.write(ori_im) @staticmethod def _bbox_to_xywh_cls_conf(bbox, min_confidence): bbox = bbox[bbox[:, 4] > min_confidence, :] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2 bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2 return bbox[:, :4], bbox[:, 4]
class DeepsortTracker(object): def __init__(self, config=config): self.config = config self.deepsort = DeepSort(config.deepsort_checkpoint, use_cuda=config.use_cuda) def detect(self, img, boxes_x1y1x2y2conf): box_xcycwh = [] box_conf = [] for box_x1y1x2y2conf in boxes_x1y1x2y2conf: box = box_x1y1x2y2conf box_xcycwh.append(np.array([(box[0] + box[2]) // 2, (box[1] + box[3]) // 2, box[2] - box[0], box[3] - box[1]], dtype=np.int32)) box_conf.append(box[4]) box_xcycwh = np.array(box_xcycwh) outputs, track_states = self.deepsort.update(box_xcycwh, box_conf, img) if outputs == []: return [], [] box_x1y1x2y2 = outputs[:, :4] identities = outputs[:, -1] return box_x1y1x2y2, identities, track_states
class Detector(object): def __init__(self, opt): self.vdo = cv2.VideoCapture() #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.write_video = True def open(self, video_path): if opt.input_type == 'webcam': self.vdo.open(opt.webcam_ind) elif opt.input_type == 'ipcam': # load cam key, secret with open("cam_secret.txt") as f: lines = f.readlines() key = lines[0].strip() secret = lines[1].strip() self.vdo.open(opt.ipcam_url.format(key, secret, opt.ipcam_no)) # video else: assert os.path.isfile(opt.vid_path), "Error: path error" self.vdo.open(opt.vid_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) #return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): frame_no += 1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] #im = ori_im[ymin:ymax, xmin:xmax, :] #start_center = time.time() results = self.detector.run(im)['results'] bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) fps = 1 / (end - start) avg_fps += fps print("centernet time: {}s, fps: {}, avg fps : {}".format( end - start, fps, avg_fps / frame_no)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def main(): print('Connecting to camera') cap = cv2.VideoCapture(0) # cap = cv2.VideoCapture('rtsp://*****:*****@[email protected]/H264?ch=1&subtype=0') # - rtsp://admin:comvis@[email protected]:554/H.264 assert cap.isOpened(), 'Unable to connect to camera' device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('Loading models') detector = Detector('weights/yolov5s.pt', img_size=(640, 640), conf_thresh=0.4, iou_thresh=0.5, agnostic_nms=False, device=device) deepsort = DeepSort('weights/ckpt.t7', max_dist=0.2, min_confidence=0.3, nms_max_overlap=0.5, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, device=device) bboxes_visualizer = BoundingBoxesVisualizer() fps_estimator = MeanEstimator() person_cls_id = detector.names.index('person') # get id of 'person' class width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cam_fps = int(cap.get(cv2.CAP_PROP_FPS)) print(f'Starting capture, camera_fps={cam_fps}') # Start of demo win_name = 'MICA ReID Demo' cv2.namedWindow(win_name, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_FREERATIO) cv2.resizeWindow(win_name, width, height) frame_id = 0 while True: start_it = time.time() ret, img = cap.read() if not ret: print('Unable to read camera') break detections = detector.detect([img])[0] num_people = 0 if detections is not None: detections = detections[detections[:, -1].eq( person_cls_id)] # filter person xywh, confs = parse_detection(detections) outputs = deepsort.update(xywh, confs, img) num_people = len(outputs) bboxes_visualizer.remove([ t.track_id for t in deepsort.tracker.tracks if t.time_since_update > 3 or t.is_deleted() ]) bboxes_visualizer.update(outputs) # draw detections for pid in outputs[:, -1]: bboxes_visualizer.plot(img, pid, label=f'Person {pid}', line_thickness=5, trail_trajectory=True, trail_bbox=False) # draw counting overlay = img.copy() count_str = f'Number of people: {num_people}' text_size = cv2.getTextSize(count_str, 0, fontScale=0.5, thickness=1)[0] cv2.rectangle(overlay, (10, 10 + 10), (15 + text_size[0], 10 + 20 + text_size[1]), (255, 255, 255), -1) img = cv2.addWeighted(overlay, 0.4, img, 0.6, 0) cv2.putText(img, count_str, (12, 10 + 15 + text_size[1]), 0, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) # show cv2.imshow(win_name, img) key = cv2.waitKey(1) elapsed_time = time.time() - start_it fps = fps_estimator.update(1 / elapsed_time) print( f'[{frame_id:06d}] num_detections={num_people} fps={fps:.02f} elapsed_time={elapsed_time:.03f}' ) # check key pressed if key == ord('q') or key == 27: # q or esc to quit break elif key == ord('r'): # r to reset tracking deepsort.reset() bboxes_visualizer.clear() elif key == 32: # space to pause key = cv2.waitKey(0) if key == ord('q') or key == 27: break frame_id += 1 cv2.destroyAllWindows() cap.release()
class Detector(object): def __init__(self, args): self.args = args args.display = False if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.class_names def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) params = Params(f'projects/{self.args.project}.yml') self.submit = True self.cam_id = 1 self.object_list = [] self.object_list_tracks = [] if args.display: pass # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.efficientdet = EfficientDetBackbone( num_classes=len(params.obj_list), compound_coef=self.args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)).cuda() # self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=True) # self.class_names = self.yolo3.class_names self.efficientdet.load_state_dict(torch.load( args.detector_weights_path), strict=False) def __enter__(self): self.im_width = 1920 self.im_height = 1280 if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'DIVX') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 10, (self.im_width, self.im_height)) return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): for tf_idx, tfrecord in enumerate(tqdm(tfrecord_paths[2:])): self.object_list = [] self.object_list_tracks = [] training_set = TUMuchTrackingDataset(tfrecord_path=tfrecord, transform=tfs, cam_id=self.cam_id) training_generator = DataLoader(training_set, **training_params) for it, data in enumerate(training_generator): imgs = data['img'].to(torch.device("cuda:0")) if self.submit: meta = data['meta'] with torch.no_grad(): features, regression, classification, anchors = self.efficientdet( imgs) out = postprocess(imgs, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # boxes is cx, cy, cw, ch boxes = out[0]["rois"] for idx in range(out[0]["rois"].shape[0]): cx, cy, lx, ly = out[0]["rois"][idx] cw, ch = lx - cx, ly - cy boxes[idx][0] = cx + cw / 2 boxes[idx][1] = cy + ch / 2 boxes[idx][2] = cw boxes[idx][3] = ch bbox_xcycwh, cls_conf, cls_ids = boxes, out[0]["scores"], out[ 0]["class_ids"] if bbox_xcycwh is not None: mask = cls_ids <= 4 bbox_xcycwh = bbox_xcycwh[mask] try: bbox_xcycwh[:, 3:] *= 1 except: continue cls_conf = cls_conf[mask] im = imgs.cpu().numpy() im = im[0, :, :, :] im = np.swapaxes(im, 0, 2) im = np.swapaxes(im, 0, 1) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = im * 255 im = im.astype(np.uint8) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, out[0]["class_ids"], im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] track_class = outputs[:, -1] if self.submit: for box_idx in range(bbox_xyxy.shape[0]): o = meta[:][0] box = label_pb2.Label.Box() box.center_x = (bbox_xyxy[box_idx, 0] + bbox_xyxy[box_idx, 2]) / 2 box.center_y = (bbox_xyxy[box_idx, 1] + bbox_xyxy[box_idx, 3]) / 2 box.length = (bbox_xyxy[box_idx, 2] - bbox_xyxy[box_idx, 0]) box.width = (bbox_xyxy[box_idx, 3] - bbox_xyxy[box_idx, 1]) o.object.box.CopyFrom(box) o.score = 0.9 # CHECK THIS # Use correct type. o.object.type = to_waymo_classes[track_class[ box_idx]] # MAP THIS TO CORRECT CLASSES self.object_list.append(copy.deepcopy(o)) o.object.id = str(identities[box_idx]) self.object_list_tracks.append( copy.deepcopy(o)) # import pdb; pdb.set_trace() if self.args.save_path: draw_bboxes(im, bbox_xyxy, identities) if self.args.display: pass self.args.save_path = "cam_{}.avi".format(self.cam_id) if self.args.save_path: self.output.write(im) objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list: objects.objects.append(o) f = open("./output/detection/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close() objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list_tracks: objects.objects.append(o) f = open("./output/tracking/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close()
class MOTTracker(object): def __init__(self, args): self.args = args # if args.display: # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.open_video() #self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names,use_cuda=args.use_cuda, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.command_type = args.mot_type threshold = np.array([0.7, 0.8, 0.9]) crop_size = [112, 112] if self.command_type == 'face': self.mtcnn = MtcnnDetector(threshold, crop_size, args.detect_model) elif self.command_type == 'person': self.person_detect = RetinanetDetector(args) self.deepsort = DeepSort(args.feature_model, args.face_load_num, use_cuda=args.use_cuda, mot_type=self.command_type) self.kf = KalmanFilter() self.meanes_track = [] self.convariances_track = [] self.id_cnt_dict = dict() self.moveTrack = MoveTrackerRun(self.kf) self.img_clarity = BlurDetection() self.score = 60.0 def open_video(self): if not os.path.isfile(self.args.VIDEO_PATH): raise Exception("Error:input video path is not exist") self.vdo = cv2.VideoCapture(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_dir: if not os.path.exists(self.args.save_dir): os.makedirs(self.args.save_dir) #fourcc = cv2.VideoWriter_fourcc(*'MJPG') #self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width,self.im_height)) if not self.vdo.isOpened(): raise Exception('open video failed') def xcycah2xcyc(self, xyah): xyah = np.array(xyah) xyah = xyah[:, :4] w = xyah[:, 2] * xyah[:, 3] h = xyah[:, 3] xc = xyah[:, 0] #+ w/2 yc = xyah[:, 1] #+ h/2 return np.vstack([xc, yc, w, h]).T def xcycah2xyxy(self, xcycah): xcycah = np.array(xcycah) xcycah = xcycah[:, :4] w = xcycah[:, 2] * xcycah[:, 3] h = xcycah[:, 3] x2 = xcycah[:, 0] + w / 2 y2 = xcycah[:, 1] + h / 2 x1 = xcycah[:, 0] - w / 2 y1 = xcycah[:, 1] - h / 2 return np.vstack([x1, y1, x2, y2]).T def xyxy2xcyc(self, xywh): w = xywh[:, 2] - xywh[:, 0] h = xywh[:, 3] - xywh[:, 1] xc = xywh[:, 0] + w / 2 yc = xywh[:, 1] + h / 2 return np.vstack([xc, yc, w, h]).T def xyxy2xywh(self, xywh): w = xywh[:, 2] - xywh[:, 0] h = xywh[:, 3] - xywh[:, 1] return np.vstack([xywh[:, 0], xywh[:, 1], w, h]).T def xywh2xcycwh(self, xywh): xywh = np.array(xywh) xc = xywh[:, 0] + xywh[:, 2] / 2 yc = xywh[:, 1] + xywh[:, 3] / 2 return np.vstack([xc, yc, xywh[:, 2], xywh[:, 3]]).T def xywh2xyxy(self, xywh): xywh = np.array(xywh) x2 = xywh[:, 0] + xywh[:, 2] y2 = xywh[:, 1] + xywh[:, 3] return np.vstack([xywh[:, 0], xywh[:, 1], x2, y2]).T def xcyc2xcycah(self, bbox_xcycwh): bbox_xcycwh = np.array(bbox_xcycwh, dtype=np.float32) xc = bbox_xcycwh[:, 0] #- bbox_xcycwh[:,2]/2 yc = bbox_xcycwh[:, 1] #- bbox_xcycwh[:,3]/2 a = bbox_xcycwh[:, 2] / bbox_xcycwh[:, 3] return np.vstack([xc, yc, a, bbox_xcycwh[:, 3]]).T def widerbox(self, boxes): x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxw = x2 - x1 boxh = y2 - y1 x1 = np.maximum(0, x1 - 0.3 * boxw) y1 = np.maximum(0, y1 - 0.3 * boxh) x2 = np.minimum(self.im_width, x2 + 0.3 * boxw) y2 = np.minimum(self.im_height, y2 + 0.3 * boxh) return np.vstack([x1, y1, x2, y2]).T def save_track_results(self, bbox_xyxy, img, identities, offset=[0, 0]): for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] x1 = min(max(x1, 0), self.im_width - 1) y1 = min(max(y1, 0), self.im_height - 1) x2 = min(max(x2, 0), self.im_width - 1) y2 = min(max(y2, 0), self.im_height - 1) # box text and bar id = str(identities[i]) if identities is not None else '0' crop_img = img[y1:y2, x1:x2, :] if self.img_clarity._blurrDetection(crop_img) > self.score: tmp_cnt = self.id_cnt_dict.setdefault(id, 0) self.id_cnt_dict[id] = tmp_cnt + 1 save_dir = os.path.join(self.args.save_dir, id) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, id + '_' + str(tmp_cnt) + '.jpg') cv2.imwrite(save_path, crop_img) else: continue def detect(self): cnt = 0 update_fg = True detect_fg = True total_time = 0 outputs = [] while self.vdo.isOpened(): start = time.time() _, ori_im = self.vdo.read() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = np.array([im]) if cnt % 5 == 0 or detect_fg: # bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) # mask = cls_ids==0 # bbox_xcycwh = bbox_xcycwh[mask] # bbox_xcycwh[:,3:] *= 1.2 # cls_conf = cls_conf[mask] if self.command_type == 'face': rectangles = self.mtcnn.detectFace(im, True) rectangles = rectangles[0] if len(rectangles) < 1: continue bboxes = rectangles[:, :4] bboxes = self.widerbox(bboxes) # bbox_xcycwh = self.xyxy2xcyc(bboxes) cls_conf = rectangles[:, 4] elif self.command_type == 'person': bboxes, cls_conf = self.person_detect.test_img_org(ori_im) if len(bboxes) == 0: continue bbox_xcycwh = self.xywh2xcycwh(bboxes) #outputs = bboxes #self.xywh2xyxy(bboxes) update_fg = True box_xcycah = self.xcyc2xcycah(bbox_xcycwh) self.moveTrack.track_init(box_xcycah) self.moveTrack.track_predict() self.moveTrack.track_update(box_xcycah) # detect_xywh = self.xyxy2xywh(bboxes) if self.command_type=='face' else bboxes # self.tracker_run.init(ori_im,detect_xywh.tolist()) detect_fg = False else: if len(bbox_xcycwh) > 0: start1 = time.time() self.moveTrack.track_predict() bbox_xcycwh = self.xcycah2xcyc(self.moveTrack.means_track) #outputs = self.xcycah2xyxy(self.moveTrack.means_track) # boxes_tmp = self.tracker_run.update(ori_im) # bbox_xcycwh = self.xywh2xcycwh(boxes_tmp) end1 = time.time() print('only tracker time consume:', end1 - start1) #outputs = self.xywh2xyxy(boxes_tmp) update_fg = False detect_fg = False else: detect_fg = True if len(bbox_xcycwh) > 0: outputs = self.deepsort.update(bbox_xcycwh, cls_conf, ori_im, update_fg) end = time.time() consume = end - start if len(outputs) > 0: #outputs = rectangles bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #np.zeros(outputs.shape[0]) ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) #self.save_track_results(bbox_xyxy,ori_im,identities) print("frame: {} time: {}s, fps: {}".format( cnt, consume, 1 / (end - start))) cnt += 1 cv2.imshow("test", ori_im) c = cv2.waitKey(1) & 0xFF if c == 27 or c == ord('q'): break #if self.args.save_path: # self.output.write(ori_im) total_time += consume self.vdo.release() cv2.destroyAllWindows() print("video ave fps and total_time: ", cnt / total_time, total_time)
class Detector(object): def __init__(self, opt): self.vdo = cv2.VideoCapture() #self.yolo_info = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") # self.deepsort = DeepSort("deep/checkpoint/ori_net_last.pth") self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 while self.vdo.grab(): frame_no +=1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #start_center = time.time() person_id = 1 confidence = 0.5 # only person ( id == 1) bbox = self.detector.run(im)['results'][person_id] #bbox = ret['results'][person_id] bbox = bbox[bbox[:, 4] > confidence, :] #box_info = ret['results'] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] #+ (bbox[:, 2] - bbox[:, 0]) /2 bbox[:, 3] = bbox[:, 3] - bbox[:, 1] #+ (bbox[:, 3] - bbox[:, 1]) /2 #start_deep_sort = time.time() cls_conf = bbox[:, 4] if frame_no == 10 : print('a') outputs = self.deepsort.update(bbox[:,:4], cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) print("centernet time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.vdo = cv2.VideoCapture() self.yolo3 = InferYOLOv3(args.yolo_cfg, args.img_size, args.yolo_weights, args.data_cfg, device, conf_thres=args.conf_thresh, nms_thres=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.classes def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): frame_cnt = -1 while self.vdo.grab(): frame_cnt += 1 # skip frames every 3 frames if frame_cnt % 3 == 0: continue start = time.time() _, ori_im = self.vdo.retrieve() # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im t1_begin = time.time() bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im) t1_end = time.time() t2_begin = time.time() if bbox_xxyy is not None: # select class cow # mask = cls_ids == 0 # bbox_xxyy = bbox_xxyy[mask] # bbox_xxyy[:, 3:] *= 1.2 # cls_conf = cls_conf[mask] bbox_xcycwh = xyxy2xywh(bbox_xxyy) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) t2_end = time.time() end = time.time() print( "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin), (end - start), ((t1_end - t1_begin) * 100 / ((end - start))), (1 / (end - start)))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.class_names = self.yolo3.class_names def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.end_frame = min(int(self.vdo.get(cv2.CAP_PROP_FRAME_COUNT)), self.args.end_frame) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'mp4v') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): bbox = {} i = 0 while self.vdo.grab() and i <= self.end_frame: start = time.time() bbox[i] = {} _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs, scores = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] states = outputs[:, 4] time_since_updates = outputs[:, 5] for j in range(len(outputs)): bbox[i][int(identities[j])] = [int(bbox_xyxy[j][0]), int(bbox_xyxy[j][1]), int(bbox_xyxy[j][2]), int(bbox_xyxy[j][3]), StateLetters[states[j]], int(time_since_updates[j]), scores[j]] if i % 10 == 0: print(f"processing frame {i}, t/frame={time.time()-start}") i += 1 import pickle import json fileName = self.args.VIDEO_PATH.replace('_original', '').rsplit(".", 1)[0] + "_track" pickle.dump(bbox, open(fileName+'.pkl', "wb")) json.dump(bbox, open(fileName+'.json', "w"), sort_keys=True, indent=4, separators=(',', ': '))
if not exists('yolov3.weights'): !wget -q https://pjreddie.com/media/files/yolov3.weights if not exists('ckpt.t7'): file_id = '1_qwTWdzT9dWNudpusgKavj_4elGgbkUN' !curl -Lb ./cookie "https://drive.google.com/uc?export=download&id={file_id}" -o ckpt.t7 import cv2 import time from YOLOv3 import YOLOv3 from deep_sort import DeepSort from util import draw_bboxes yolo3 = YOLOv3("deep_sort_pytorch/YOLOv3/cfg/yolo_v3.cfg","yolov3.weights","deep_sort_pytorch/YOLOv3/cfg/coco.names", is_xywh=True) deepsort = DeepSort("ckpt.t7") VIDEO_URL = 'http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009bbenfold_headpose/Datasets/TownCentreXVID.avi' DURATION_S = 20 # process only the first 20 seconds video_file_name = 'video.mp4' if not exists(video_file_name): !wget -q $VIDEO_URL dowloaded_file_name = basename(VIDEO_URL) # convert to MP4, because we can show only MP4 videos in the colab noteook !ffmpeg -y -loglevel info -t $DURATION_S -i $dowloaded_file_name $video_file_name def show_local_mp4_video(file_name, width=640, height=480):
class Detector(object): def __init__(self, centernet_opt, args): # CenterNet detector self.detector = detector_factory[centernet_opt.task](centernet_opt) # Deep SORT self.deepsort = DeepSort(args.deepsort_checkpoint, args.max_cosine_distance, args.use_cuda, args.use_original_model) self.debug = args.debug if self.debug and not os.path.exists(args.debug_dir): os.mkdir(args.debug_dir) self.args = args def run(self, sequence_dir, output_file): assert os.path.isdir(sequence_dir), "Invalid sequence dir: {}".format(sequence_dir) seq_info = gather_sequence_info(sequence_dir, None) print("Start to handle sequence: {} (image size: {}, frame {} - {})".format( seq_info["sequence_name"], seq_info["image_size"], seq_info["min_frame_idx"], seq_info["max_frame_idx"])) start_time = time.time() frame_cnt = 0 results = [] for frame in range(seq_info["min_frame_idx"], seq_info["max_frame_idx"] + 1): frame_image = seq_info["image_filenames"][frame] frame_cnt += 1 image = cv2.imread(frame_image) detection_result = self.detector.run(frame_image)["results"][1] xywh, conf = Detector._bbox_to_xywh_cls_conf(detection_result, self.args.min_confidence) output = self.deepsort.update(xywh, conf, image) for x1, y1, x2, y2, track_id in output: results.append(( frame, track_id, x1, y1, x2 - x1, y2 - y1 # tlwh )) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) if self.debug: detect_xyxy = detection_result[detection_result[:, 4] > self.args.min_confidence, :4] detect_image = draw_bboxes(image, detect_xyxy) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-detect.jpg".format(seq_info["sequence_name"], frame)), detect_image) if len(output) == 0: continue image = cv2.imread(frame_image) track_image = draw_bboxes(image, output[:, :4], output[:, -1]) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-track.jpg".format(seq_info["sequence_name"], frame)), track_image) print("Done. Now write output to {}".format(args.output_file)) with open(output_file, mode="w") as f: for row in results: f.write("%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n" % ( row[0], row[1], row[2], row[3], row[4], row[5])) @staticmethod def _bbox_to_xywh_cls_conf(bbox, min_confidence): bbox = bbox[bbox[:, 4] > min_confidence, :] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2 bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2 return bbox[:, :4], bbox[:, 4]
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) self.detectron2 = Detectron2(self.args.detectron_cfg, self.args.detectron_ckpt) if self.args.deep_sort: self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) def _set_tcp_client(self): ip, port = self.args.tcp_ip_port.strip().split(":") port = int(port) self.tcp_client = TCPClient(ip, port) self.tcp_client.LaunchConnection() def _set_video_writer(self, video_path): fourcc = cv2.VideoWriter_fourcc(*"MJPG") try: self.im_width = int(self.vdo.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = 20 except Exception as e: print(e) self.im_width = 1280 self.im_height = 786 fps = 20 self.video_output = cv2.VideoWriter(video_path, fourcc, fps, (self.im_width, self.im_height)) def __enter__(self): if self.args.tcp_ip_port is not None: self._set_tcp_client() self.vdo = VideoCapture( self.args.video_path, self.args.capture_buffer_length, real_time=self.args.real_time, ) assert self.vdo.cap.isOpened() self.vd_name = os.path.basename(self.args.video_path) if self.args.save_video_to: self._set_video_writer("{}/0_{}_{}".format( self.args.save_video_to, self.args.save_video_freq, self.vd_name)) return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) import math @staticmethod def filter_imgs_buffer(im_list, max_len=3): c = math.floor(len(im_list) / 2) filtered_ims = [im_list[0], im_list[c], im_list[-1]] return filtered_ims def detect_im(self, im, apply_batch_ensemble=False): if not isinstance(im, list): im = [im] batch_outs = self.detectron2.detect_batch(im, apply_batch_ensemble) final_bbox_xywh = [] for idx, each_im_ouputs in enumerate(batch_outs): bbox_xcycwh, cls_conf, cls_ids = each_im_ouputs # Some objects are found bbox_xywh = [] if bbox_xcycwh is not None and bbox_xcycwh != []: mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] cls_conf = cls_conf[mask] persons_count_before_track = len(bbox_xcycwh) if self.args.deep_sort: bbox_xcycwh[:, 3:] *= 1.2 outputs = self.deepsort.update_new(bbox_xcycwh, cls_conf, im[idx]) persons_count = len(outputs) if persons_count > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] bbox_xywh = get_bbox_xywh(bbox_xyxy, identities) final_bbox_xywh.append(bbox_xywh) print("# of persons filter from {} -> {} after tracking". format(persons_count_before_track, persons_count)) else: bbox_xywh = bbox_cxywh_xywh(bbox_xcycwh) final_bbox_xywh.append(bbox_xywh) return final_bbox_xywh def _get_latest_frames(self, filter_policy=None): flag, im = self.vdo.read() assert isinstance(im, list) if not self.args.supress_verbose: print("video_frame ids collected for processing: {}".format( [x[0] for x in im])) im = [x[1] for x in im] if len(im) > 1: init_l = len(im) if filter_policy is not None: im = self.filter_imgs_buffer(im) if not self.args.supress_verbose: print("Only processing {} out of {} captured frames".format( len(im), init_l)) return flag, im def detect_video(self): self.frame_count = 0 self.processing_frame_count = 0 persons_count = 0 model_avg_time = 0.0 proc_avg_time = 0.0 proc_total_time = 0.0 init_time = time.time() while True: frame_start_time = time.time() flag, imgs = self._get_latest_frames( filter_policy=self.args.buffer_filter_policy) if not flag: break self.frame_count += len(imgs) f_h, f_w = imgs[0].shape[:2] if (self.frame_count - 1) % self.args.proc_freq == 0: self.processing_frame_count += len(imgs) model_init_time = time.time() bbox_xywhs = self.detect_im(imgs, self.args.apply_batch_ensemble) model_end_time = time.time() persons_count = 0 if len(bbox_xywhs) > 0: persons_count = len(bbox_xywhs[-1]) im = imgs[-1] if persons_count > 0 and (self.args.display or self.args.save_video_to or (self.args.save_frames_to is not None)): im = draw_bboxes_xywh(im, bbox_xywhs, None) if self.args.display: cv2.imshow("Live preview", im) if cv2.waitKey(1) & 0xFF == ord("q"): break if self.args.save_video_to: self.video_output.write(im) if self.frame_count % self.args.save_video_freq == 0: self.video_output.release() print("Video saved") self._set_video_writer("{}/{}_{}_{}".format( self.args.save_video_to, self.frame_count, self.frame_count + self.args.save_video_freq, self.vd_name, )) if self.args.save_frames_to is not None and persons_count > 0: frame_path = "{}/{}.jpg".format(self.args.save_frames_to, self.frame_count) print(frame_path) cv2.imwrite(frame_path, im) proc_total_time = proc_total_time + (time.time() - frame_start_time) if self.args.tcp_ip_port is not None: frame_bbox_flat = [] if persons_count > 0: for bbox_xywh in bbox_xywhs: for bbox in bbox_xywh: print(bbox) bbox_ = [ bbox[0] / f_w, bbox[1] / f_h, bbox[2] / f_w, bbox[3] / f_h, ] frame_bbox_flat += bbox_ try: if len(frame_bbox_flat) > 0: self.tcp_client.SendBoundingBoxes(frame_bbox_flat) except Exception as e: print(e) print("Unable to send data to TCP server") if not self.args.supress_verbose: model_avg_fps = self.frame_count // proc_total_time proc_avg_time = proc_total_time / self.frame_count frame_time = time.time() - frame_start_time nn_time = model_end_time - model_init_time actual_fps = self.frame_count // (time.time() - init_time) video_fps = self.vdo.fr_count // (time.time() - init_time) cap_f_count = self.vdo.fr_count print( "cap_frame:{} p_Frame:{} p_count:{} : M_FPS:{} cap_FPS:{:.4f} Process_FPS:{} nn_time/avg:[{:.4f}/{:.4f}], frame_time/avg:[{:.4f}/{:.4f}]" .format( cap_f_count, self.frame_count, persons_count, model_avg_fps, video_fps, actual_fps, nn_time, model_avg_time, frame_time, proc_avg_time, ))
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3("YOLOv3/cfg/yolo_v3.cfg", "YOLOv3/yolov3.weights", "YOLOv3/cfg/coco.names", is_xywh=True) self.deepsort = DeepSort("deep_sort/deep/checkpoint/ckpt.t7") self.class_names = self.yolo3.class_names self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): # Configure depth and color streams pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) # Start streaming profile = pipeline.start(config) xmin, ymin, xmax, ymax = 0, 0, 640, 480 try: while True: start = time.time() # Wait for a coherent pair of frames: depth and color frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue # Convert images to numpy arrays depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) ori_im = color_image im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #3dim (0,1,2) --> (2,1,0) index rearrange bbox_xywh, cls_conf, cls_ids = self.yolo3(im) if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin,ymin)) # Modification of draw_bboxes offset = (xmin, ymin) for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] #most left up point is (0,0) #x1,y1 is left up point, x2,y2 is right down point // pixel unit x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] boxed_depth = depth_image[y1:y2, x1:x2] # #get closest depth in xyxy box # min_depth = np.amin(boxed_depth) # min_result = np.where(boxed_depth == min_depth) # listOfCordinates = list(zip(min_result[0], min_result[1])) # for cord in listOfCordinates: # min_pixel = cord #only use first cordinate # break # min_pixel = list(min_pixel) # #revert to pixel in original depth before sliced # min_pixel[0] += y1 # min_pixel[1] += x1 # Get real Distance depth_scale = profile.get_device( ).first_depth_sensor().get_depth_scale() depth = boxed_depth * depth_scale #real_dist,_,_,_ = cv2.mean(depth) #meters unit real_dist = np.median(depth) # Get real Width # d434's FOV Horizontal:91.2 width_scale = (2 * real_dist * math.tan( math.radians(91.2 / 2))) / 640 real_width = width_scale * (x2 - x1) # Get real Height # d434's FOV Vertical:65.5 height_scale = (2 * real_dist * math.tan( math.radians(65.5 / 2))) / 480 real_height = height_scale * (y2 - y1) # box text and bar id = int( identities[i]) if identities is not None else 0 color = COLORS_10[id % len(COLORS_10)] label = '{} {}, d={:.3f} w={:.3f} h={:.3f}'.format( "object", id, real_dist, real_width, real_height) print(label) print('pixel of top left and bottom right') print('(', x1, ',', y1, ') (', x2, ',', y2, ')') end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) #if self.write_video: # self.output.write(ori_im) finally: # Stop streaming pipeline.stop()
from yolo3.detect.video_detect import VideoDetector from yolo3.models import Darknet if __name__ == '__main__': LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) model = Darknet("config/yolov4.cfg", img_size=(608, 608)) model.load_darknet_weights("weights/yolov4.weights") model.to("cuda:0") # 跟踪器 tracker = DeepSort("weights/ckpt.t7", min_confidence=1, use_cuda=True, nn_budget=30, n_init=3, max_iou_distance=0.7, max_dist=0.3, max_age=30) # Action Identify # action_id = ActionIdentify(actions=[TakeOff(4, delta=(0, 1)), # Landing(4, delta=(2, 2)), # Glide(4, delta=(1, 2)), # FastCrossing(4, speed=0.2), # BreakInto(0, timeout=2)], # max_age=30, # max_size=8) video_detector = VideoDetector( model,