class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.yolo3 = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.class_names = self.yolo3.class_names self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] bbox_xywh, cls_conf, cls_ids = self.yolo3(im) #bbox_xyxy = torch.zeros_like(bbox_xywh, dtype=bbox_xywh.dtype) #bbox_xyxy[0] = bbox_xywh[:,0]-bbox_xywh[:,2]/2 #bbox_xyxy[1] = bbox_xywh[:,1]-bbox_xywh[:,3]/2 #bbox_xyxy[2] = bbox_xywh[:,0]+bbox_xywh[:,2]/2 #bbox_xyxy[3] = bbox_xywh[:,1]+bbox_xywh[:,3]/2 if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): frame_no +=1 _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] results = test_net(im, net, detector, args.cuda, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.4) # RFBNet使用教程 bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
class Detector(object): def __init__(self, centernet_opt, args): # CenterNet detector self.detector = detector_factory[centernet_opt.task](centernet_opt) # Deep SORT self.deepsort = DeepSort(args.deepsort_checkpoint, args.max_cosine_distance, args.use_cuda) self.args = args def run(self, video_path, output_path): # open input video assert os.path.isfile(video_path), "Error: invalid video path" vdo = cv2.VideoCapture() vdo.open(video_path) # open output video im_width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) im_height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*"MJPG") output_vdo = cv2.VideoWriter(output_path, fourcc, 20, (im_width, im_height)) # track each frame in video start_time = time.time() frame_cnt = 0 while vdo.grab(): frame_cnt += 1 _, ori_im = vdo.retrieve() im = ori_im[0:im_height, 0:im_width] detection = self.detector.run(im)["results"][1] bbox_xywh, conf = Detector._bbox_to_xywh_cls_conf(detection, self.args.min_confidence) outputs = self.deepsort.update(bbox_xywh, conf, im) if(len(outputs) > 0): bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) output_vdo.write(ori_im) @staticmethod def _bbox_to_xywh_cls_conf(bbox, min_confidence): bbox = bbox[bbox[:, 4] > min_confidence, :] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2 bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2 return bbox[:, :4], bbox[:, 4]
class DeepsortTracker(object): def __init__(self, config=config): self.config = config self.deepsort = DeepSort(config.deepsort_checkpoint, use_cuda=config.use_cuda) def detect(self, img, boxes_x1y1x2y2conf): box_xcycwh = [] box_conf = [] for box_x1y1x2y2conf in boxes_x1y1x2y2conf: box = box_x1y1x2y2conf box_xcycwh.append(np.array([(box[0] + box[2]) // 2, (box[1] + box[3]) // 2, box[2] - box[0], box[3] - box[1]], dtype=np.int32)) box_conf.append(box[4]) box_xcycwh = np.array(box_xcycwh) outputs, track_states = self.deepsort.update(box_xcycwh, box_conf, img) if outputs == []: return [], [] box_x1y1x2y2 = outputs[:, :4] identities = outputs[:, -1] return box_x1y1x2y2, identities, track_states
class DeepSortDetector(object): def __init__(self, cfg, weights, video_path, deep_checkpoint="deep_sort/deep/checkpoint/resnet50_last.pt", output_file=None, img_size=512, display=True, max_dist=0.2, display_width=800, display_height=600, save_path=None, json_path='./data/pascal_voc_classes.json'): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # init opencv video capturer self.vidCap = cv2.VideoCapture() # init a detector self.yolov3 = InferYOLOv3(cfg, img_size, weights, device, json_path) # init a deepsort tracker self.deepsort = DeepSort(deep_checkpoint, max_dist) # settings self.display = display self.video_path = video_path self.output_file = output_file self.save_path = save_path if self.display: cv2.namedWindow("Test", cv2.WINDOW_NORMAL) cv2.resizeWindow("Test", display_width, display_height) # define a video writter named self.output def __enter__(self): assert os.path.isfile(self.video_path), "Error: path error" self.vidCap.open(self.video_path) self.im_width = int(self.vidCap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vidCap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #self.im_width = 1280 #self.im_height = 720 if self.save_path is not None: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.save_path, fourcc, 15.0, (self.im_width, self.im_height)) assert self.vidCap.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) # this is the key function to detect and count fishes def detect(self): json_path = './data/pascal_voc_classes.json' json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} # All these classes will be counted as 'catch' list_of_catch = ["nephrops", "flat_fish", "round_fish"] # these classes will be counted as 'by-catch' list_of_bycatch = ["other"] LABELS = ['flat_fish', 'round_fish', 'nephrops', 'other'] # to store the object infomation key:id value: class all_obj_info = {} frame_no = -1 num_frames, nephrops_count, flatfish_count, roundfish_count, other_count = 0, 0, 0, 0, 0 catch_ratio, bycatch_ratio = 0, 0 # skip_no = 2 if self.output_file: f = open(output_file, "w") while self.vidCap.grab(): frame_no += 1 # skip frames every n frames # if frame_no % skip_no != 0: # continue # start time total_begin = time.time() _, img = self.vidCap.retrieve() #img = img[:, :1280] # yolov3 yolo_begin = time.time() # get the detections: bbx coordinates, confidences, classes bbox_xyxy_ori, cls_conf, cls_ids = self.yolov3.predict(img) print(cls_ids) # [x1,y1,x2,y2] yolo_end = time.time() # deepsort ds_begin = time.time() if bbox_xyxy_ori is not None: # transfer the coorinates bbox_cxcywh = xyxy2xywh(bbox_xyxy_ori) # use the tracker to update outputs = self.deepsort.update(bbox_cxcywh, cls_conf, cls_ids, img) if len(outputs) > 0: # [x1,y1,x2,y2] id class # now we can fetch the bbx info, ids and classes bbox_xyxy = outputs[:, :4] ids = outputs[:, -2] object_class = outputs[:, -1] print(ids) print(object_class) ## obj_id and class alignment has some problems # it is hard to be very acurate # need to make it better # for i in range(len(ids)): # if ids[i] not in all_obj_info: # if len(cls_ids) == len(ids) - 1: # all_obj_info[ids[i]] = cls_ids[i-1] # elif len(cls_ids) == len(ids) - 2: # all_obj_info[ids[i]] = cls_ids[i-2] # elif len(cls_ids) == len(ids) - 3: # all_obj_info[ids[i]] = cls_ids[i-3] # elif len(cls_ids) == len(ids) - 4: # all_obj_info[ids[i]] = cls_ids[i-4] # elif len(cls_ids) == len(ids) - 5: # all_obj_info[ids[i]] = cls_ids[i-5] # elif len(cls_ids) == len(ids) - 6: # all_obj_info[ids[i]] = cls_ids[i-6] # elif len(cls_ids) == len(ids) - 7: # all_obj_info[ids[i]] = cls_ids[i-7] # elif len(cls_ids) == len(ids) - 8: # all_obj_info[ids[i]] = cls_ids[i-8] # elif len(cls_ids) == len(ids) - 9: # all_obj_info[ids[i]] = cls_ids[i-9] # elif len(cls_ids) == len(ids) - 10: # all_obj_info[ids[i]] = cls_ids[i-10] # else: # all_obj_info[ids[i]] = cls_ids[i] for i in range(len(ids)): if ids[i] not in all_obj_info: all_obj_info[ids[i]] = object_class[i] else: continue print(all_obj_info) # draw the bbx img = draw_box(img, bbox_xyxy_ori, cls_ids, cls_conf, category_index) #img = draw_bboxes(img, bbox_xyxy, ids) # frame,id,tlwh,1,-1,-1,-1 # record the info if self.output_file: bbox_tlwh = xyxy2xywh(bbox_xyxy) for i in range(len(bbox_tlwh)): write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( frame_no + 1, outputs[i, -1], int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]), int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3])) f.write(write_line) ds_end = time.time() total_end = time.time() # count the current number of each category cur_categories = list(all_obj_info.values()) flatfish_count = cur_categories.count(1) roundfish_count = cur_categories.count(2) nephrops_count = cur_categories.count(3) other_count = cur_categories.count(4) # start from frame 3 if frame_no >= 3: catch_ratio = round( (flatfish_count + roundfish_count + nephrops_count) / (flatfish_count + roundfish_count + nephrops_count + other_count), 2) bycatch_ratio = round( other_count / (flatfish_count + roundfish_count + nephrops_count + other_count), 2) else: catch_ratio = None bycatch_ratio = None # print info to the console if frame_no is not None: print( "frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin), (total_end - total_begin), ((yolo_end - yolo_begin) * 100 / (total_end - total_begin)), (1 / (total_end - total_begin)))) # display all the count info on the screen if self.display == True: img = np.uint8(img) displayNephropsCount(img, nephrops_count) displayFlatfishCount(img, flatfish_count) displayRoundfishCount(img, roundfish_count) displayOtherfishCount(img, other_count) displayCatchRatio(img, catch_ratio) displayByCatchRatio(img, bycatch_ratio) cv2.putText(img, 'FPS {:.1f}'.format(1 / (total_end - total_begin)), (20, 280), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.FONT_HERSHEY_COMPLEX_SMALL) cv2.imshow("Test", img) cv2.waitKey(1) # press Q to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # determine if output the new video if self.save_path: self.output.write(img) if self.output_file: f.close()
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.yolo3 = YOLO3("YOLO3/cfg/yolo_v3.cfg", "/local/b/cam2/data/HumanBehavior/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) self.deepsort = DeepSort("/local/b/cam2/data/HumanBehavior/ckpt.t7") self.class_names = self.yolo3.class_names self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo.avi", fourcc, 30, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area model = Darknet("./yolov3/cfg/yolov3.cfg") model.load_weights("/local/b/cam2/data/HumanBehavior/yolov3.weights") model.cuda() model.eval() print("loaded YOLO") while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #bbox_xywh, cls_conf, cls_ids = self.yolo3(im) ''' print("xy: \n", bbox_xywh) print("conf: \n", cls_conf) print("ids: \n", cls_ids) print("-----------------") ''' bbox_xywh, cls_conf, cls_ids = detect_frame(model, im) ''' print("xy: \n", bbox_xywh) print("conf: \n", cls_conf) print("ids: \n", cls_ids) print("-----------------") ''' if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) #cv2.imshow("test", ori_im) #cv2.waitKey(1) self.output.write(ori_im) print("done...")
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.class_names = self.yolo3.class_names def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.end_frame = min(int(self.vdo.get(cv2.CAP_PROP_FRAME_COUNT)), self.args.end_frame) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'mp4v') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): bbox = {} i = 0 while self.vdo.grab() and i <= self.end_frame: start = time.time() bbox[i] = {} _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs, scores = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] states = outputs[:, 4] time_since_updates = outputs[:, 5] for j in range(len(outputs)): bbox[i][int(identities[j])] = [int(bbox_xyxy[j][0]), int(bbox_xyxy[j][1]), int(bbox_xyxy[j][2]), int(bbox_xyxy[j][3]), StateLetters[states[j]], int(time_since_updates[j]), scores[j]] if i % 10 == 0: print(f"processing frame {i}, t/frame={time.time()-start}") i += 1 import pickle import json fileName = self.args.VIDEO_PATH.replace('_original', '').rsplit(".", 1)[0] + "_track" pickle.dump(bbox, open(fileName+'.pkl', "wb")) json.dump(bbox, open(fileName+'.json', "w"), sort_keys=True, indent=4, separators=(',', ': '))
class Detector(object): def __init__(self, args): self.args = args if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.class_names #self.maskrcnn = def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width,self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def return_user_dict(self): return self.user_entry_dict def detect(self): #xmin, ymin, xmax, ymax = self.area jump_flag = 1 start = time.time() while self.vdo.grab(): #multicore #pool = mp.Pool(processes=6) #6-core _, ori_im = self.vdo.retrieve() im_height, im_width = ori_im.shape[:2] x_max = 5 y_max = 5 x_grid = int(im_width / x_max) y_grid = int(im_height / y_max) display_im = ori_im # for i in range(1, x_max + 1): # cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height), (0, 255, 255), 3) # for i in range(1, y_max + 1): # cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i), (0, 255, 255), 3) # for i in range(len(unseen_frame)): # if unseen_frame[i] > -1: # unseen_frame[i] += 1 if jump_flag%2 ==0 : #jump frame #start = time.time() clientsocket = socket(AF_INET,SOCK_STREAM) clientsocket.connect(('140.114.79.179',10523)) clientsocket.send(pickle.dumps(user_entry_dict)) im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) #img = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) cv2.circle(ori_im, (3900, 2100), 50, (255,0,0),-1) if bbox_xcycwh is not None: # select class person mask = cls_ids==0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:,3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) for output in outputs: if output[4] > len(people_path): for i in range(0, output[4] - len(people_path)): people_path.append([]) direction_start.append(0) unseen_frame.append(-1) people_path[output[4] - 1].append(np.array(([(output[0] + output[2]) / 2, output[3]]))) coordinate = output[:4] bbox_area = get_bbox_area(coordinate) features = [] if bbox_area > area_threshold : try : if area_dic[output[-1]] < bbox_area : area_dic[output[-1]] = bbox_area roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x] features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...] features = merge_color(roiImg, features) #result = pool.apply_async(subroi,(ori_im,output)) #results.append(result) #for wait() print("re: ---------------",features) except KeyError: area_dic.setdefault(output[-1],bbox_area) roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x] features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...] features = merge_color(roiImg, features) #result = pool.apply_async(subroi,(ori_im,output)) #results.append(result) print("wait---------------") if output[-1] not in user_entry_dict: user_entry_dict.setdefault(output[-1],[features,exix_point,CAMERA_ID,[]]) #add entry id else: for feature in features: flag = 1 for i in range(len(user_entry_dict[output[-1]][0])): if feature[0] in user_entry_dict[output[-1]][0][i]: user_entry_dict[output[-1]][0][i][1] = max(user_entry_dict[output[-1]][0][i][1],feature[1]) #update the confodence flag = 0 if flag == 1 : user_entry_dict[output[-1]][0].append(feature) print(user_entry_dict) #call project.py find_grids( output, [x_grid, y_grid], 0.3, user_entry_dict[output[-1]]) x = [] y = [] for i in range(direction_start[output[4] - 1], len(people_path[output[4] - 1])): x.append(people_path[output[4] - 1][i][0]) y.append(people_path[output[4] - 1][i][1]) path_x = (output[0] + output[2]) / 2 path_y = output[3] if(len(x) > 1): a, b, c = pu.cal_simple_linear_regression_coefficients(x, y) #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b)) if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) > 200 and unseen_frame[output[4] - 1] < 10: continue; if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) < distance_threshold: #print("projection") path_x, path_y = pu.find_projection(a, b, c, path_x, path_y) if len(people_path[output[4] - 1]) > 0: prev_x = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][0] prev_y = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][1] velocity = math.sqrt((path_x - prev_x) * (path_x - prev_x) + (path_y - prev_y) * (path_y - prev_y)) * 30 / (unseen_frame[output[4] - 1] + 1) #print("velocity: {}".format(velocity)) else: #print("turn") direction_start[output[4] - 1] = len(people_path[output[4] - 1]) people_path[output[4] - 1].append(np.array((path_x, path_y))) unseen_frame[output[4] - 1] = 0 if len(outputs) > 0: bbox_xyxy = outputs[:,:4] identities = outputs[:,-1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) for id in identities: for i in range(1, len(people_path[id-1])): cv2.line(ori_im, (int(people_path[id-1][i-1][0]), int(people_path[id-1][i-1][1])), (int(people_path[id-1][i][0]), int(people_path[id-1][i][1])), (0, 0, 255), 3) #pool.close() #pool.join() # for result in results: # print(result.get()) #end = time.time() #print("time: {}s, fps: {}".format(end-start, 1/(end-start))) print(area_dic) jump_flag+=1 if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im) end = time.time() print(end-start)
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) #self.vdo = cv2.VideoCapture() self.imgList = natsort.natsorted(glob.glob(self.args.imgs_path)) self.detectron2 = Detectron2() # Initialize coordinate mapper self.myCoordMapper = coord_mapper.CoordMapperCSG( match_code='HUN-BEL 2. Half') self.fps = 6 self.deepsort = DeepSort(args.deepsort_checkpoint, lambdaParam=0.6, coordMapper=self.myCoordMapper, max_dist=1.0, min_confidence=0.1, nms_max_overlap=0.7, max_iou_distance=0.7, max_age=self.fps * 3, n_init=3, nn_budget=50, use_cuda=use_cuda) def __enter__(self): #assert os.path.isfile(self.args.video_path), "Error: path error" #self.vdo.open(self.args.video_path) #self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) #self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) img = cv2.imread(self.imgList[0]) self.im_height, self.im_width, _ = img.shape # FIXME: Output FPS is hardcoded to 20 if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'XVID') self.output = cv2.VideoWriter(self.args.save_path, fourcc, self.fps, (self.im_width, self.im_height)) #assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): # Check wheter there is next frame results = [] allDetection = dict() idx_frame = 0 #while self.vdo.grab(): while idx_frame < len(self.imgList): start = time.time() # Retrieve next frame #_, im = self.vdo.retrieve() im = cv2.imread(self.imgList[idx_frame]) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # only for images # Detect object on image bbox_xcycwh, cls_conf, cls_ids = self.detectron2.detect(im) detection_mask = [(xc, yc + (h / 2)) for xc, yc, w, h in bbox_xcycwh] detection_mask = self.myCoordMapper.image2xy(detection_mask) detection_mask = [ False if x is None else True for x in detection_mask ] bbox_xcycwh, cls_conf, cls_ids = bbox_xcycwh[ detection_mask], cls_conf[detection_mask], cls_ids[ detection_mask] # TODO: Kell ide null check? if bbox_xcycwh is not None: # and len(bbox_xcycwh) > 0 # NOTE: This is double check since all the returned boxes are person objects (in the detect funcion it is asserted) # select class person mask = cls_ids == 0 cls_conf = cls_conf[mask] # NOTE: only the height is multiplies by 1.2, why? # ANSWER: bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # TODO: Uncomment 1.1 bbox_xcycwh = bbox_xcycwh[mask] #bbox_xcycwh[:, 3:] *= 1.1 idx_frame += 1 # Összes box kirjazolása bb_xyxy = [[xc - w / 2, yc - h / 2, xc + w / 2, yc + h / 2] for xc, yc, w, h in bbox_xcycwh] bb_xyxy = [ x for x, conf in zip(bb_xyxy, cls_conf) if conf > self.deepsort.min_confidence ] all1 = [None] * len(bb_xyxy) im = draw_bboxes(im, bb_xyxy, all1) # Do tracking outputs, deadtracks = self.deepsort.update( bbox_xcycwh, cls_conf, im) print('len outputs:{0}, len deadtracks:{1}'.format( len(outputs), len(deadtracks))) # Draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] im = draw_bboxes(im, bbox_xyxy, identities) # Write to file bbox_tlwh = [ self.deepsort._xyxy_to_tlwh(bb) for bb in bbox_xyxy ] results.append((idx_frame - 1, bbox_tlwh, identities)) im = draw_frameNum(im, (2514, 330), idx_frame - 1) # Draw boxes for dead tracks for debugging if len(outputs) > 0: bbox_xyxy = [x[:4] for x in deadtracks] labels = [x[-1] for x in deadtracks] im = draw_dead_bboxes(im, bbox_xyxy, labels) end = time.time() print( "time: {}s, fps: {}, frame: {}".format(end - start, 1 / (end - start), idx_frame - 1), '\n', '-' * 30, '\n') if self.args.save_path: self.output.write(im) # Write all tracked objs to file write_results(self.args.result_path, results, 'mot')
class Detector(object): def __init__(self, args): self.args = args args.display = False if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.class_names def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
class Detector(object): def __init__(self, opt): self.vdo = cv2.VideoCapture() #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") self.write_video = True def open(self, video_path): if opt.input_type == 'webcam': self.vdo.open(opt.webcam_ind) elif opt.input_type == 'ipcam': # load cam key, secret with open("cam_secret.txt") as f: lines = f.readlines() key = lines[0].strip() secret = lines[1].strip() self.vdo.open(opt.ipcam_url.format(key, secret, opt.ipcam_no)) # video else: assert os.path.isfile(opt.vid_path), "Error: path error" self.vdo.open(opt.vid_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) #return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): frame_no += 1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] #im = ori_im[ymin:ymax, xmin:xmax, :] #start_center = time.time() results = self.detector.run(im)['results'] bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) fps = 1 / (end - start) avg_fps += fps print("centernet time: {}s, fps: {}, avg fps : {}".format( end - start, fps, avg_fps / frame_no)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
video_writer = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'MJPG'), fps, (width, height)) while video_capture.isOpened(): ret, frame = video_capture.read() if not ret: break start = time.time() xmin, ymin, xmax, ymax = 0, 0, width, height im = frame[ymin:ymax, xmin:xmax, (2,1,0)] bbox_xywh, cls_conf, cls_ids = yolo3(im) if bbox_xywh is not None: mask = cls_ids==0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:,3] *= 1.2 cls_conf = cls_conf[mask] outputs = deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:,:4] identities = outputs[:,-1] frame = draw_bboxes(frame, bbox_xyxy, identities, offset=(xmin,ymin)) end = time.time() print("time: {}s, fps: {}".format(end-start, 1/(end-start))) video_writer.write(frame) video_capture.release() video_writer.release() # convert AVI to MP4 !ffmpeg -y -loglevel info -i output.avi output.mp4 else: print("can't open the given input video file!")
class MOTTracker(object): def __init__(self, args): self.args = args # if args.display: # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.open_video() #self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names,use_cuda=args.use_cuda, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.command_type = args.mot_type threshold = np.array([0.7, 0.8, 0.9]) crop_size = [112, 112] if self.command_type == 'face': self.mtcnn = MtcnnDetector(threshold, crop_size, args.detect_model) elif self.command_type == 'person': self.person_detect = RetinanetDetector(args) self.deepsort = DeepSort(args.feature_model, args.face_load_num, use_cuda=args.use_cuda, mot_type=self.command_type) self.kf = KalmanFilter() self.meanes_track = [] self.convariances_track = [] self.id_cnt_dict = dict() self.moveTrack = MoveTrackerRun(self.kf) self.img_clarity = BlurDetection() self.score = 60.0 def open_video(self): if not os.path.isfile(self.args.VIDEO_PATH): raise Exception("Error:input video path is not exist") self.vdo = cv2.VideoCapture(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_dir: if not os.path.exists(self.args.save_dir): os.makedirs(self.args.save_dir) #fourcc = cv2.VideoWriter_fourcc(*'MJPG') #self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width,self.im_height)) if not self.vdo.isOpened(): raise Exception('open video failed') def xcycah2xcyc(self, xyah): xyah = np.array(xyah) xyah = xyah[:, :4] w = xyah[:, 2] * xyah[:, 3] h = xyah[:, 3] xc = xyah[:, 0] #+ w/2 yc = xyah[:, 1] #+ h/2 return np.vstack([xc, yc, w, h]).T def xcycah2xyxy(self, xcycah): xcycah = np.array(xcycah) xcycah = xcycah[:, :4] w = xcycah[:, 2] * xcycah[:, 3] h = xcycah[:, 3] x2 = xcycah[:, 0] + w / 2 y2 = xcycah[:, 1] + h / 2 x1 = xcycah[:, 0] - w / 2 y1 = xcycah[:, 1] - h / 2 return np.vstack([x1, y1, x2, y2]).T def xyxy2xcyc(self, xywh): w = xywh[:, 2] - xywh[:, 0] h = xywh[:, 3] - xywh[:, 1] xc = xywh[:, 0] + w / 2 yc = xywh[:, 1] + h / 2 return np.vstack([xc, yc, w, h]).T def xyxy2xywh(self, xywh): w = xywh[:, 2] - xywh[:, 0] h = xywh[:, 3] - xywh[:, 1] return np.vstack([xywh[:, 0], xywh[:, 1], w, h]).T def xywh2xcycwh(self, xywh): xywh = np.array(xywh) xc = xywh[:, 0] + xywh[:, 2] / 2 yc = xywh[:, 1] + xywh[:, 3] / 2 return np.vstack([xc, yc, xywh[:, 2], xywh[:, 3]]).T def xywh2xyxy(self, xywh): xywh = np.array(xywh) x2 = xywh[:, 0] + xywh[:, 2] y2 = xywh[:, 1] + xywh[:, 3] return np.vstack([xywh[:, 0], xywh[:, 1], x2, y2]).T def xcyc2xcycah(self, bbox_xcycwh): bbox_xcycwh = np.array(bbox_xcycwh, dtype=np.float32) xc = bbox_xcycwh[:, 0] #- bbox_xcycwh[:,2]/2 yc = bbox_xcycwh[:, 1] #- bbox_xcycwh[:,3]/2 a = bbox_xcycwh[:, 2] / bbox_xcycwh[:, 3] return np.vstack([xc, yc, a, bbox_xcycwh[:, 3]]).T def widerbox(self, boxes): x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxw = x2 - x1 boxh = y2 - y1 x1 = np.maximum(0, x1 - 0.3 * boxw) y1 = np.maximum(0, y1 - 0.3 * boxh) x2 = np.minimum(self.im_width, x2 + 0.3 * boxw) y2 = np.minimum(self.im_height, y2 + 0.3 * boxh) return np.vstack([x1, y1, x2, y2]).T def save_track_results(self, bbox_xyxy, img, identities, offset=[0, 0]): for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] x1 = min(max(x1, 0), self.im_width - 1) y1 = min(max(y1, 0), self.im_height - 1) x2 = min(max(x2, 0), self.im_width - 1) y2 = min(max(y2, 0), self.im_height - 1) # box text and bar id = str(identities[i]) if identities is not None else '0' crop_img = img[y1:y2, x1:x2, :] if self.img_clarity._blurrDetection(crop_img) > self.score: tmp_cnt = self.id_cnt_dict.setdefault(id, 0) self.id_cnt_dict[id] = tmp_cnt + 1 save_dir = os.path.join(self.args.save_dir, id) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, id + '_' + str(tmp_cnt) + '.jpg') cv2.imwrite(save_path, crop_img) else: continue def detect(self): cnt = 0 update_fg = True detect_fg = True total_time = 0 outputs = [] while self.vdo.isOpened(): start = time.time() _, ori_im = self.vdo.read() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = np.array([im]) if cnt % 5 == 0 or detect_fg: # bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) # mask = cls_ids==0 # bbox_xcycwh = bbox_xcycwh[mask] # bbox_xcycwh[:,3:] *= 1.2 # cls_conf = cls_conf[mask] if self.command_type == 'face': rectangles = self.mtcnn.detectFace(im, True) rectangles = rectangles[0] if len(rectangles) < 1: continue bboxes = rectangles[:, :4] bboxes = self.widerbox(bboxes) # bbox_xcycwh = self.xyxy2xcyc(bboxes) cls_conf = rectangles[:, 4] elif self.command_type == 'person': bboxes, cls_conf = self.person_detect.test_img_org(ori_im) if len(bboxes) == 0: continue bbox_xcycwh = self.xywh2xcycwh(bboxes) #outputs = bboxes #self.xywh2xyxy(bboxes) update_fg = True box_xcycah = self.xcyc2xcycah(bbox_xcycwh) self.moveTrack.track_init(box_xcycah) self.moveTrack.track_predict() self.moveTrack.track_update(box_xcycah) # detect_xywh = self.xyxy2xywh(bboxes) if self.command_type=='face' else bboxes # self.tracker_run.init(ori_im,detect_xywh.tolist()) detect_fg = False else: if len(bbox_xcycwh) > 0: start1 = time.time() self.moveTrack.track_predict() bbox_xcycwh = self.xcycah2xcyc(self.moveTrack.means_track) #outputs = self.xcycah2xyxy(self.moveTrack.means_track) # boxes_tmp = self.tracker_run.update(ori_im) # bbox_xcycwh = self.xywh2xcycwh(boxes_tmp) end1 = time.time() print('only tracker time consume:', end1 - start1) #outputs = self.xywh2xyxy(boxes_tmp) update_fg = False detect_fg = False else: detect_fg = True if len(bbox_xcycwh) > 0: outputs = self.deepsort.update(bbox_xcycwh, cls_conf, ori_im, update_fg) end = time.time() consume = end - start if len(outputs) > 0: #outputs = rectangles bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #np.zeros(outputs.shape[0]) ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) #self.save_track_results(bbox_xyxy,ori_im,identities) print("frame: {} time: {}s, fps: {}".format( cnt, consume, 1 / (end - start))) cnt += 1 cv2.imshow("test", ori_im) c = cv2.waitKey(1) & 0xFF if c == 27 or c == ord('q'): break #if self.args.save_path: # self.output.write(ori_im) total_time += consume self.vdo.release() cv2.destroyAllWindows() print("video ave fps and total_time: ", cnt / total_time, total_time)
class Detector(object): def __init__(self, detections_file: str, resolution: tuple, fps: int, input_images_dir: str, output_video_path: str, output_result_path: str, use_cuda: bool, lambdaParam: float, max_dist: float, min_confidence: float, nms_max_overlap: float, max_iou_distance: float, max_age: int, n_init: int, nn_budget: int, model_path='deep_sort/deep/checkpoint/ckpt.t7', early_stopping=None): self.detections_file = detections_file # A pickle fájl amiben az összes detekció benne van self.input_images_dir = input_images_dir # A mappa ahol a 2.5K-s képek vannak {frameNum}.jpg formátumban self.output_video_path = output_video_path # Ahova a vizualizálandó videót mentem self.output_result_path = output_result_path # Ahová a kimenetet mentem CSV formátumba self.early_stopping = early_stopping assert self.output_result_path is not None and self.detections_file is not None self._use_cuda = use_cuda self.fps = fps self.resolution = resolution # Initialize coordinate mapper self.myCoordMapper = coord_mapper.CoordMapperCSG( match_code='HUN-BEL 1. Half') self.deepsort = DeepSort(model_path=model_path, lambdaParam=lambdaParam, coordMapper=self.myCoordMapper, max_dist=max_dist, min_confidence=min_confidence, nms_max_overlap=nms_max_overlap, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init, nn_budget=nn_budget, use_cuda=self._use_cuda, resolution=(self.resolution[0] * 2, self.resolution[1]), fps=self.fps) def initVideoOutput(self): if self.input_images_dir is None or self.output_video_path is None: return # Itt minden kép 2.5K-s imgList = natsort.natsorted(glob.glob(self.input_images_dir)) self.dict_frame2path = { int(path.split('/')[-1].split('.')[0]): path for path in imgList } self.out_vid_height, self.out_vid_width = self.resolution[ 1], self.resolution[0] * 2 fourcc = cv2.VideoWriter_fourcc(*'XVID') self.output = cv2.VideoWriter( self.output_video_path, fourcc, self.fps, (self.out_vid_width, self.out_vid_height)) def writeVideoOutput(self, frameNum, list_detections, tracks, deadtracks, draw_detections=True, draw_tracks=True, draw_deadtracks=True): if self.input_images_dir is None or self.output_video_path is None: return # Beolvasom a megfelelő képkockát img = cv2.imread(self.dict_frame2path[frameNum]) # Resizeolom img = cv2.resize(img, (self.out_vid_width, self.out_vid_height), interpolation=cv2.INTER_AREA) # Detection Boxot rajzolok rá... if draw_detections: bb_xyxy = [det['box'] for det in list_detections] all1 = [None] * len(bb_xyxy) img = draw_bboxes(img, bb_xyxy, all1) resizeFactor = self.resolution[0] / 2560 # Trackeket rajzolok rá if len(tracks) > 0 and draw_tracks: bbox_xyxy = tracks[:, :4] * resizeFactor identities = tracks[:, 4] img = draw_bboxes(img, bbox_xyxy, identities) # Draw boxes for dead tracks for debugging if len(deadtracks) > 0 and draw_deadtracks: bbox_xyxy = [x[:4] for x in deadtracks] bbox_xyxy = [np.array(c) * resizeFactor for c in bbox_xyxy] labels = [x[4] for x in deadtracks] img = draw_dead_bboxes(img, bbox_xyxy, labels) # Frame Numbert is felrajzolom img = draw_frameNum( img, (self.out_vid_width // 2, self.out_vid_height // 10), frameNum) # Write to file self.output.write(img) def closeVideoOutput(self): if self.input_images_dir is None or self.output_video_path is None: return self.output.release() def writeResults(self, frameNum, tracks, ts_start, ts_end): ''' tracks : np.array = List[ [x1, y1, x2, y2, tID, xWorld, yWorld] ] ''' if len(tracks) == 0: return list_tracks = [{ 'frame': frameNum, 'ts_start': ts_start, 'ts_end': ts_end, 'xTL': xTL, 'yTL': yTL, 'xBR': xBR, 'yBR': yBR, 'tID': tID, 'xWorld': xWorld, 'yWorld': yWorld } for xTL, yTL, xBR, yBR, tID, xWorld, yWorld in tracks] pd.DataFrame(list_tracks).to_csv( self.output_result_path, mode='a', index=None, header=(not os.path.exists(self.output_result_path))) def doTrackingOnDetectionFile(self): ''' A detectionons pickle fájl így néz ki: dict( frameNum : List[dict_detection]) dict_detection = {'worldXY' : tuple(X, Y), 'box' : [xTL, yTL, xBR, yBR], 'bigBox' : [xTL, yTL, xBR, yBR], 'score' : float, 'image' : np.array(NxM), 'team' = ['red', 'yellow', 'other', 'more player from different team']} ''' # Calc frame skipping assert 30 % self.fps == 0 stepFrame = 60 // self.fps print('Reading detections pickle') # Read in detection pickle with open(self.detections_file, 'rb') as handle: dict_detections = pickle.load(handle) print('Done') self.initVideoOutput() for frameNum in sorted(dict_detections.keys()): if (frameNum % stepFrame) != 0: continue #list_dets = dict_detections[frameNum] # Leszűröm csak a hazai detekciókat list_dets = [ x for x in dict_detections[frameNum] if x['team'] in ['red'] ] print('Frame', frameNum) # Mivel leszűröm piros játékosokra ezért lehet hogy nulla játékos lesz if len(list_dets) > 0: self.doTrackingForOneFrame(frameNum, list_dets) if self.early_stopping is not None and frameNum >= self.early_stopping: break # Végül bezárom a videót ha van self.closeVideoOutput() def doTrackingForOneFrame(self, frameNum, list_of_detections): ''' list_of_detections : List[ {'worldXY' : tuple(X, Y), 'box' : [xTL, yTL, xBR, yBR], 'bigBox' : [xTL, yTL, xBR, yBR], 'score' : float, 'image' : np.array(NxM)} ] ''' ts_start = time.time() # Létrehozom a BBoxokat, átalakítva, úgy hogy cX, cY, W, H legyen # FONTOS: Mivel ki fogom plotolni ezért a kisképen lévő bboxok kellenek bbox_xcycwh = [det['bigBox'] for det in list_of_detections] bbox_xcycwh = [[(xBR + xTL) / 2, (yBR + yTL) / 2, (xBR - xTL), (yBR - yTL)] for xTL, yTL, xBR, yBR in bbox_xcycwh] cls_conf = [det['score'] for det in list_of_detections] bbox_imgs = [det['image'] for det in list_of_detections] worldCoordXY = [det['worldXY'] for det in list_of_detections] outputs, deadtracks = self.deepsort.update(bbox_xcycwh, cls_conf, bbox_imgs, worldCoordXY) ts_end = time.time() self.writeVideoOutput(frameNum, list_of_detections, outputs, deadtracks) self.writeResults(frameNum, outputs, ts_start, ts_end)
class Detector(object): def __init__(self): self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3("YOLOv3/cfg/yolo_v3.cfg", "YOLOv3/yolov3.weights", "YOLOv3/cfg/coco.names", is_xywh=True) self.deepsort = DeepSort("deep_sort/deep/checkpoint/ckpt.t7") self.class_names = self.yolo3.class_names self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): # Configure depth and color streams pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) # Start streaming profile = pipeline.start(config) xmin, ymin, xmax, ymax = 0, 0, 640, 480 try: while True: start = time.time() # Wait for a coherent pair of frames: depth and color frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue # Convert images to numpy arrays depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) ori_im = color_image im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #3dim (0,1,2) --> (2,1,0) index rearrange bbox_xywh, cls_conf, cls_ids = self.yolo3(im) if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin,ymin)) # Modification of draw_bboxes offset = (xmin, ymin) for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] #most left up point is (0,0) #x1,y1 is left up point, x2,y2 is right down point // pixel unit x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] boxed_depth = depth_image[y1:y2, x1:x2] # #get closest depth in xyxy box # min_depth = np.amin(boxed_depth) # min_result = np.where(boxed_depth == min_depth) # listOfCordinates = list(zip(min_result[0], min_result[1])) # for cord in listOfCordinates: # min_pixel = cord #only use first cordinate # break # min_pixel = list(min_pixel) # #revert to pixel in original depth before sliced # min_pixel[0] += y1 # min_pixel[1] += x1 # Get real Distance depth_scale = profile.get_device( ).first_depth_sensor().get_depth_scale() depth = boxed_depth * depth_scale #real_dist,_,_,_ = cv2.mean(depth) #meters unit real_dist = np.median(depth) # Get real Width # d434's FOV Horizontal:91.2 width_scale = (2 * real_dist * math.tan( math.radians(91.2 / 2))) / 640 real_width = width_scale * (x2 - x1) # Get real Height # d434's FOV Vertical:65.5 height_scale = (2 * real_dist * math.tan( math.radians(65.5 / 2))) / 480 real_height = height_scale * (y2 - y1) # box text and bar id = int( identities[i]) if identities is not None else 0 color = COLORS_10[id % len(COLORS_10)] label = '{} {}, d={:.3f} w={:.3f} h={:.3f}'.format( "object", id, real_dist, real_width, real_height) print(label) print('pixel of top left and bottom right') print('(', x1, ',', y1, ') (', x2, ',', y2, ')') end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) #if self.write_video: # self.output.write(ori_im) finally: # Stop streaming pipeline.stop()
def main(): print('Connecting to camera') # cap = cv2.VideoCapture(0) cap = ThreadedVideoCapture( 'rtsp://*****:*****@[email protected]:554/Streaming/Channels/101/') # cap = ThreadedVideoCapture('rtsp://*****:*****@[email protected]/H264?ch=1&subtype=0') assert cap.isOpened(), 'Unable to connect to camera' width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cam_fps = int(cap.get(cv2.CAP_PROP_FPS)) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('Loading models') detector = Detector('weights/yolov5s.pt', img_size=(640, 640), conf_thresh=0.5, iou_thresh=0.5, agnostic_nms=False, device=device) deepsort = DeepSort('weights/ckpt.t7', max_dist=0.2, min_confidence=0.3, nms_max_overlap=0.5, max_iou_distance=0.7, max_age=100, lingering_age=5, n_init=5, nn_budget=100, device=device) bboxes_visualizer = BBoxVisualizer() fps_estimator = IncrementalMeanTracker(max_count=cam_fps * 5) person_cls_id = detector.names.index('person') # get id of 'person' class print(f'Starting capture, camera_fps={cam_fps}') # Start cap.start() win_name = 'MICA ReID Demo' cv2.namedWindow(win_name, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_FREERATIO) cv2.resizeWindow(win_name, width, height) frame_id = 0 pbar = tqdm(desc=win_name) while True: start_it = time.time() ret, img = cap.read() if not ret: print('Unable to read camera') break detections = detector.detect([img])[0] num_people = 0 if detections is not None: detections = detections[detections[:, -1].eq( person_cls_id)] # filter person xywh, confs = parse_detection(detections) outputs = deepsort.update(xywh, confs, img) num_people = len(outputs) bboxes_visualizer.remove([ t.track_id for t in deepsort.tracker.tracks if t.time_since_update > 3 or t.is_deleted() ]) bboxes_visualizer.update(outputs) # draw detections for pid in outputs[:, -1]: bboxes_visualizer.box(img, pid, label=f'Person {pid}', line_thickness=5, trail_trajectory=True, trail_bbox=False) # draw counting count_str = f'Number of people: {num_people}' img = bboxes_visualizer.text(img, count_str, (960, 25), fontScale=0.8, box_alpha=0.4, color=(255, 255, 255), box_color=(0, 0, 0)) # show cv2.imshow(win_name, img) key = cv2.waitKey(1) elapsed_time = time.time() - start_it fps = fps_estimator.update(1 / elapsed_time) desc = f'[{frame_id:06d}] num_detections={num_people} fps={fps:.02f} elapsed_time={elapsed_time:.03f}' pbar.update() pbar.set_description(desc) # check key pressed if key == ord('q') or key == 27: # q or esc to quit break elif key == ord('r'): # r to reset tracking deepsort.reset() bboxes_visualizer.clear() elif key == 32: # space to pause key = cv2.waitKey(0) if key == ord('q') or key == 27: break frame_id += 1 cv2.destroyAllWindows() cap.release()
bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] bbox_xywh[:, 0] = bbox_xywh[:, 0] + (bbox_xywh[:, 2]) / 2 bbox_xywh[:, 1] = bbox_xywh[:, 1] + (bbox_xywh[:, 3]) / 2 cls_conf = output[:, 5] cls_ids = output[:, 7] if bbox_xywh is not None: mask = cls_ids == 0.0 bbox_xywh = bbox_xywh[mask] cls_conf = cls_conf[mask] #if bbox_xywh[0]==0 and bbox_xywh[1]==0 and bbox_xywh[2]==0 and bbox_xywh[3]==0:continue #print("***********{}".format(bbox_xywh)) #cv2.imshow("debug",orig_im) #cv2.waitKey(0) outputs = deepsort.update(bbox_xywh, cls_conf, orig_im) #Bbox+ID,naarry 3,5 ####################################################################################### # print('outputs = {}'.format(outputs)) # outputs = np.array(outputs) # print(outputs) # # now_time = time.time() # diff_time = now_time-last_time # last_time = now_time # print('diff_time = {}'.format(diff_time)) # # distance = [] # speed = [] # # a = time.time() # for i in range(outputs.shape[0]): # if last.shape[0] == 0:
sort = DeepSort('checkpoint/net', n_init=2) # paddle.enable_static() for i in tqdm(ds.file_list): image_name = i[0] im = cv2.imread(image_name) start = time.time() result = model.predict(im) # print('infer time:{:.6f}s'.format(time.time()-start)) # print('detected num:', len(result)) # paddle.disable_static() font = cv2.FONT_HERSHEY_SIMPLEX threshold = 0.1 result = list(filter(lambda x: x['score'] > threshold, result)) bboxes = np.array(list(map(lambda v: np.array(v['bbox']), result))) confidence = list(map(lambda v: v['score'], result)) track = sort.update(bboxes, confidence, im) if INTERACTIVE: for value in result: xmin, ymin, w, h = np.array(value['bbox']).astype(np.int) cls = value['category'] score = value['score'] cv2.rectangle(im, (xmin, ymin), (xmin + w, ymin + h), (255, 0, 0), 4) cv2.putText(im, '{:s} {:.3f}'.format(cls, score), (xmin, ymin), font, 0.5, (0, 225, 0), thickness=1) for value in track: x, y, w, h, track, conf = value if INTERACTIVE: cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 4) cv2.putText(im, '{:d} {:d}'.format(track, track), (x, y), font, 0.5, (255, 0, 0), thickness=2) evaluator.write_target(track, left=x, top=y, width=w, height=h, conf=1) # int(confidence[0]))
detector = YOLOv3(0.5, 0.4) frame_idx = 0 for im in video.list: frame_idx += 1 start = time.time() #print('detection:') detections = detector.detect(im) imgs = [] for d in detections: d = d[:4].astype(np.int) #print(d) imgs.append(im[d[1]:d[3], d[0]:d[2], :]) detections, ids = deepsort.update(detections, imgs) for detection, id in zip(detections, ids): detection = detection.astype(np.int) img = crop_img(im, detection[:4]) label = "id:{}".format(id) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.putText(im, label, (detection[0], detection[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, COLORS_10[id], 2) params = predictor.predict(img) kpt = predictor.pst68(params, detection) new_box = parse_roi_box_from_landmark(kpt) detection = new_box.astype(np.int) img = crop_img(im, detection[:4]) params = predictor.predict(img)
def main(): args = get_parser().parse_args() if args.display: cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL) cv2.resizeWindow("out_vid", 960, 720) sort = Sort() deepsort = DeepSort(args.deepsort_checkpoint, nms_max_overlap=args.nms_max_overlap, use_cuda=bool(strtobool(args.use_cuda))) assert os.path.isfile( os.path.join(args.input, 'via_export_json.json' )), "Error: path error, via_export_json.json not found" ''' if args.out_vid: out_vid = cv2.VideoWriter( filename=args.out_vid, fourcc=cv2.VideoWriter_fourcc(*'MJPG'), fps=args.fps, frameSize=(1920, 1440), ) ''' if args.out_txt: out_txt = open(args.out_txt, "w+") total_counter = [0] * 1000 json_file = os.path.join(args.input, 'via_export_json.json') with open(json_file) as f: imgs_anns = json.load(f) for idx, v in tqdm(enumerate(imgs_anns.values()), total=len(imgs_anns.values())): filename = os.path.join(args.input, v["filename"]) annos = v["regions"] polys = [] dets = [] for anno in annos: region_attributes = anno["region_attributes"] if not region_attributes: break anno = anno["shape_attributes"] if anno["name"] != "polygon": break px = anno["all_points_x"] py = anno["all_points_y"] poly = np.array([[x, y] for x, y in zip(px, py)], np.int32).reshape((-1, 1, 2)) if int(region_attributes["category_id"]): dets.append( [np.min(px), np.min(py), np.max(px), np.max(py), 1]) polys.append(poly) start = time.time() im = cv2.imread(filename) current_counter = [] if args.tracker == 'sort': if len(dets): dets = np.array(dets) else: dets = np.empty((0, 5)) outputs = sort.update(dets) outputs = np.array([element.clip(min=0) for element in outputs]).astype(int) else: if len(dets): ccwh_boxes = [] for det in dets: ccwh_boxes.append([(det[0] + det[2]) / 2, (det[1] + det[3]) / 2, det[2] - det[0], det[3] - det[1]]) ccwh_boxes = np.array(ccwh_boxes) confidences = np.ones(len(dets)) outputs, __ = deepsort.update(ccwh_boxes, confidences, im) else: outputs = [] if len(outputs): tlbr_boxes = outputs[:, :4] identities = current_counter = outputs[:, -1] ordered_identities = [] for identity in identities: if not total_counter[identity]: total_counter[identity] = max(total_counter) + 1 ordered_identities.append(total_counter[identity]) im = draw_bboxes(im, tlbr_boxes, ordered_identities, binary_masks=[]) if args.out_txt: for i in range(len(ordered_identities)): tlbr = tlbr_boxes[i] line = [ idx + 1, ordered_identities[i], tlbr[0], tlbr[1], tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1 ] out_txt.write(",".join(str(item) for item in line) + "\n") end = time.time() im = draw_polys(im, polys) im = cv2.putText(im, "Frame ID: " + str(idx), (20, 20), 0, 5e-3 * 200, (0, 255, 0), 2) time_fps = "Time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, 'Groundtruth2' + args.tracker, (20, 100), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, "Current Hand Counter: " + str(len(current_counter)), (20, 140), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)), (20, 180), 0, 5e-3 * 200, (0, 255, 0), 2) if args.display: cv2.imshow("out_vid", im) cv2.waitKey(1) '''
class Detector(object): def __init__(self, centernet_opt, args): # CenterNet detector self.detector = detector_factory[centernet_opt.task](centernet_opt) # Deep SORT self.deepsort = DeepSort(args.deepsort_checkpoint, args.max_cosine_distance, args.use_cuda, args.use_original_model) self.debug = args.debug if self.debug and not os.path.exists(args.debug_dir): os.mkdir(args.debug_dir) self.args = args def run(self, sequence_dir, output_file): assert os.path.isdir(sequence_dir), "Invalid sequence dir: {}".format(sequence_dir) seq_info = gather_sequence_info(sequence_dir, None) print("Start to handle sequence: {} (image size: {}, frame {} - {})".format( seq_info["sequence_name"], seq_info["image_size"], seq_info["min_frame_idx"], seq_info["max_frame_idx"])) start_time = time.time() frame_cnt = 0 results = [] for frame in range(seq_info["min_frame_idx"], seq_info["max_frame_idx"] + 1): frame_image = seq_info["image_filenames"][frame] frame_cnt += 1 image = cv2.imread(frame_image) detection_result = self.detector.run(frame_image)["results"][1] xywh, conf = Detector._bbox_to_xywh_cls_conf(detection_result, self.args.min_confidence) output = self.deepsort.update(xywh, conf, image) for x1, y1, x2, y2, track_id in output: results.append(( frame, track_id, x1, y1, x2 - x1, y2 - y1 # tlwh )) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) if self.debug: detect_xyxy = detection_result[detection_result[:, 4] > self.args.min_confidence, :4] detect_image = draw_bboxes(image, detect_xyxy) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-detect.jpg".format(seq_info["sequence_name"], frame)), detect_image) if len(output) == 0: continue image = cv2.imread(frame_image) track_image = draw_bboxes(image, output[:, :4], output[:, -1]) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-track.jpg".format(seq_info["sequence_name"], frame)), track_image) print("Done. Now write output to {}".format(args.output_file)) with open(output_file, mode="w") as f: for row in results: f.write("%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n" % ( row[0], row[1], row[2], row[3], row[4], row[5])) @staticmethod def _bbox_to_xywh_cls_conf(bbox, min_confidence): bbox = bbox[bbox[:, 4] > min_confidence, :] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2 bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2 return bbox[:, :4], bbox[:, 4]
def main(): args = get_parser().parse_args() if args.display: cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL) cv2.resizeWindow("out_vid", 960, 720) sort = Sort() deepsort = DeepSort(args.deepsort_checkpoint, nms_max_overlap=args.nms_max_overlap, use_cuda=bool(strtobool(args.use_cuda))) assert os.path.isfile( args.input), "Error: path error, input file not found" if args.out_vid: out_vid = cv2.VideoWriter( filename=args.out_vid, fourcc=cv2.VideoWriter_fourcc(*'MJPG'), fps=args.fps, frameSize=(1920, 1440), ) if args.out_txt: out_txt = open(args.out_txt, "w+") total_counter = [0] * 1000 inp_vid = cv2.VideoCapture(args.input) num_frames = int(inp_vid.get(cv2.CAP_PROP_FRAME_COUNT)) predictor = DefaultPredictor(setup_cfg(args)) for frameID in tqdm(range(num_frames)): ret, im = inp_vid.read() start = time.time() dets, masks, region = detectron2(im, args, predictor) if args.region_based: im = region if args.tracker == 'sort': if len(dets): dets = np.array(dets) else: dets = np.empty((0, 5)) outputs = sort.update(dets) outputs = np.array([element.clip(min=0) for element in outputs]).astype(int) else: if len(dets): ccwh_boxes = [] for det in dets: ccwh_boxes.append([(det[0] + det[2]) / 2, (det[1] + det[3]) / 2, det[2] - det[0], det[3] - det[1]]) ccwh_boxes = np.array(ccwh_boxes) confidences = np.ones(len(dets)) outputs, __ = deepsort.update(ccwh_boxes, confidences, im) else: outputs = [] current_counter = [] if len(outputs): tlbr_boxes = outputs[:, :4] identities = current_counter = outputs[:, -1] ordered_identities = [] for identity in identities: if not total_counter[identity]: total_counter[identity] = max(total_counter) + 1 ordered_identities.append(total_counter[identity]) im = draw_bboxes(im, tlbr_boxes, ordered_identities, binary_masks=masks) if args.out_txt: for i in range(len(ordered_identities)): tlbr = tlbr_boxes[i] line = [ frameID + 1, ordered_identities[i], tlbr[0], tlbr[1], tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1 ] out_txt.write(",".join(str(item) for item in line) + "\n") end = time.time() im = cv2.putText(im, "Frame ID: " + str(frameID + 1), (20, 30), 0, 5e-3 * 200, (0, 255, 0), 2) time_fps = "Time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText( im, os.path.basename(args.config_file) + ' ' + args.tracker, (20, 90), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, "Current Hand Counter: " + str(len(current_counter)), (20, 120), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)), (20, 150), 0, 5e-3 * 200, (0, 255, 0), 2) if args.display: cv2.imshow("out_vid", im) cv2.waitKey(1) if args.out_vid: out_vid.write(im) frameID += 1
class Detector(object): def __init__(self, args): self.args = args if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.vdo = cv2.VideoCapture() self.yolo3 = InferYOLOv3(args.yolo_cfg, args.img_size, args.yolo_weights, args.data_cfg, device, conf_thres=args.conf_thresh, nms_thres=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.classes def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): frame_cnt = -1 while self.vdo.grab(): frame_cnt += 1 # skip frames every 3 frames if frame_cnt % 3 == 0: continue start = time.time() _, ori_im = self.vdo.retrieve() # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im t1_begin = time.time() bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im) t1_end = time.time() t2_begin = time.time() if bbox_xxyy is not None: # select class cow # mask = cls_ids == 0 # bbox_xxyy = bbox_xxyy[mask] # bbox_xxyy[:, 3:] *= 1.2 # cls_conf = cls_conf[mask] bbox_xcycwh = xyxy2xywh(bbox_xxyy) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) t2_end = time.time() end = time.time() print( "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin), (end - start), ((t1_end - t1_begin) * 100 / ((end - start))), (1 / (end - start)))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) if not args.image_input: self.vdo = cv2.VideoCapture() cfg = get_cfg() #cfg.merge_from_file("detectron2_repo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml") #cfg.MODEL.WEIGHTS = "detectron2://COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl" cfg.merge_from_file("../detectron2_repo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml") cfg.MODEL.WEIGHTS = args.detectron2_weights #"detectron2://Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl" cfg.MODEL.MASK_ON = False cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 #cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 self.predictor = DefaultPredictor(cfg) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda, extractor_type=args.extractor_type, game_id=args.game_id, team_0=args.team_0) #self.class_names = self.yolo3.class_names def __enter__(self): if not args.image_input: assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) assert self.vdo.isOpened() self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: self.img_list = sorted(glob.glob(os.path.join(self.args.VIDEO_PATH, "*"))) img_test = cv2.imread(self.img_list[0]) self.im_height, self.im_width = img_test.shape[:2] if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, args.save_fps, (self.im_width, self.im_height)) if self.args.save_frames: if os.path.exists('supervisely'): import shutil shutil.rmtree('supervisely') os.makedirs('supervisely') os.makedirs('supervisely/img') if self.args.save_txt: self.txt = open('gt.txt', "w") return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): start = time.time() if not args.image_input: start_second = 0 end_second = 8 fps = self.vdo.get(cv2.CAP_PROP_FPS) print('fps: ', fps) start_frameid = start_second * fps end_frameid = end_second * fps else: frame_id = 0 if self.args.update_tracks: shutil.copytree(self.args.detections_dir, self.args.detections_dir + '_tracked') while True: print(f'FRAME_ID: {frame_id}') logging.debug(f'FRAME_ID: {frame_id}') new_sequence = False if not args.image_input: frame_id = int(round(self.vdo.get(1))) if frame_id < start_frameid: continue elif frame_id > end_frameid: break _, ori_im = self.vdo.read() # retrieve() else: if frame_id>=(len(self.img_list)): break if frame_id > 1: prev_im = ori_im ori_im = cv2.imread(self.img_list[frame_id]) if frame_id > 1: im1_gray = cv2.cvtColor(prev_im, cv2.COLOR_RGB2GRAY) im2_gray = cv2.cvtColor(ori_im, cv2.COLOR_RGB2GRAY) cc, _ = cv2.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1) new_sequence = cc < args.ecc_threshold logging.debug(f'ECC: {cc}') frame_id+=1 logging.debug(f'NEW_SEQUENCE: {new_sequence}') if self.args.save_frames: if not args.image_input: cv2.imwrite(f'./supervisely/img/img_{frame_id:05}.jpg', ori_im) else: cv2.imwrite(f'./supervisely/img/' + self.img_list[frame_id-1][-13:], ori_im) im = ori_im predictions = self.predictor(im) instances = predictions["instances"] if instances.pred_classes.numel() > 0: #print(instances.pred_classes) mask = instances.pred_classes == 0 scores = instances.scores[mask] pred_boxes = instances.pred_boxes[mask] xcyc = pred_boxes.get_centers() wh = pred_boxes.tensor[:, 2:] - pred_boxes.tensor[:, :2] + torch.ones(pred_boxes.tensor[:, 2:].size()).cuda() wh_min, _ = torch.min(wh, 1) # if "pred_masks" in instances.keys(): # pred_masks = instances["pred_masks"][mask] bbox_xcycwh = torch.cat((xcyc, wh), 1)[wh_min >=4].detach().cpu().numpy() cls_conf = scores.detach().cpu().numpy() if self.args.detections_dir!="": ann_dir = os.path.join(self.args.detections_dir) ann = os.path.basename(self.img_list[frame_id-1]) + ".json" ann_path = os.path.join(ann_dir, 'MOT', 'ann', ann) with open(ann_path) as f: ann_dict = json.load(f) bboxes = [] for obj in ann_dict['objects']: bbox = obj["points"]["exterior"] bbox = bbox[0]+bbox[1] bbox = [min(bbox[0], bbox[2]), min(bbox[1], bbox[3]), max(bbox[0], bbox[2]), max(bbox[1], bbox[3])] bboxes.append([(bbox[2]+bbox[0])/2, (bbox[3]+bbox[1])/2, bbox[2]-bbox[0], bbox[3]-bbox[1]]) bbox_xcycwh = np.array(bboxes) cls_conf = np.ones(bbox_xcycwh.shape[0]) #print(bbox_xcycwh, cls_conf) #bbox_xcycwh[:, 3:] *= 1.2 outputs, detections = self.deepsort.update(bbox_xcycwh, cls_conf, im, new_sequence, frame_id-1, self.img_list[frame_id-1]) self.deepsort.export('/content') if len(outputs) > 0: bbox_xyxy = outputs[:, :4] #dh = ((0.1/1.2)*(bbox_xyxy[:,3]-bbox_xyxy[:,1])).astype(int) #bbox_xyxy[:,1] += dh #bbox_xyxy[:,3] -= dh identities = outputs[:, 4] match_method = outputs[:, 5] number = outputs[:, 6] number_bbox = outputs[:, 7:11] detection_id = outputs[:, 11] min_cost = outputs[:, 12] draw_im = draw_bboxes(frame_id, new_sequence, ori_im, bbox_xyxy, identities, match_method, number, number_bbox, detection_id, min_cost) if self.args.save_txt: for j in range(bbox_xyxy.shape[0]): x1 = bbox_xyxy[j,0] y1 = bbox_xyxy[j,1] x2 = bbox_xyxy[j,2] y2 = bbox_xyxy[j,3] self.txt.write(f'{frame_id},{identities[j]},{x1},{y1},{x2-x1},{y2-y1},1,0,-1,-1\n') if self.args.update_tracks: ann_path = os.path.join(self.args.detections_dir + '_tracked', 'MOT', 'ann', ann) print(ann_path) for idx, obj in enumerate(ann_dict['objects']): obj["tags"] = [{"name": "track_id", "value": detections[idx].track_id}] with open(ann_path, 'w') as f: json.dump(ann_dict, f) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", draw_im) cv2.waitKey(1) if self.args.save_path: self.output.write(draw_im)
class Detector(object): def __init__(self, opt): self.vdo = cv2.VideoCapture() #self.yolo_info = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True) #centerNet detector self.detector = detector_factory[opt.task](opt) self.deepsort = DeepSort("deep/checkpoint/ckpt.t7") # self.deepsort = DeepSort("deep/checkpoint/ori_net_last.pth") self.write_video = True def open(self, video_path): assert os.path.isfile(video_path), "Error: path error" self.vdo.open(video_path) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.area = 0, 0, self.im_width, self.im_height if self.write_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height)) return self.vdo.isOpened() def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 while self.vdo.grab(): frame_no +=1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #start_center = time.time() person_id = 1 confidence = 0.5 # only person ( id == 1) bbox = self.detector.run(im)['results'][person_id] #bbox = ret['results'][person_id] bbox = bbox[bbox[:, 4] > confidence, :] #box_info = ret['results'] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] #+ (bbox[:, 2] - bbox[:, 0]) /2 bbox[:, 3] = bbox[:, 3] - bbox[:, 1] #+ (bbox[:, 3] - bbox[:, 1]) /2 #start_deep_sort = time.time() cls_conf = bbox[:, 4] if frame_no == 10 : print('a') outputs = self.deepsort.update(bbox[:,:4], cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) print("centernet time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh) self.deepsort = DeepSort(args.deepsort_checkpoint) self.class_names = self.yolo3.class_names def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): #multicore pool = mp.Pool(processes=6) #6-core #xmin, ymin, xmax, ymax = self.area jump_flag = 1 while self.vdo.grab(): _, ori_im = self.vdo.retrieve() im_height, im_width = ori_im.shape[:2] x_max = 10 y_max = 10 x_grid = int(im_width / x_max) y_grid = int(im_height / y_max) for i in range(1, x_max + 1): cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height), (0, 255, 255), 3) for i in range(1, y_max + 1): cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i), (0, 255, 255), 3) for i in range(len(unseen_frame)): if unseen_frame[i] > -1: unseen_frame[i] += 1 if jump_flag % 2 == 0: #jump frame start = time.time() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) cv2.circle(ori_im, (3900, 2100), 50, (255, 0, 0), -1) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) for output in outputs: if output[4] > len(people_path): for i in range(0, output[4] - len(people_path)): people_path.append([]) direction_start.append(0) unseen_frame.append(-1) people_path[output[4] - 1].append( np.array(([(output[0] + output[2]) / 2, output[3]]))) coordinate = output[:4] bbox_area = get_bbox_area(coordinate) try: if area_dic[output[-1]] < bbox_area: area_dic[output[-1]] = bbox_area pool.apply_async(subroi, (ori_im, output)) print("---------------") except KeyError: area_dic.setdefault(output[-1], bbox_area) pool.apply_async(subroi, (ori_im, output)) print("---------------") x = [] y = [] for i in range(direction_start[output[4] - 1], len(people_path[output[4] - 1])): x.append(people_path[output[4] - 1][i][0]) y.append(people_path[output[4] - 1][i][1]) path_x = (output[0] + output[2]) / 2 path_y = output[3] if (len(x) > 1): a, b, c = pu.cal_simple_linear_regression_coefficients( x, y) #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b)) if abs(a * path_x + b * path_y + c) / math.sqrt( a * a + b * b) > 200 and unseen_frame[ output[4] - 1] < 10: continue if abs(a * path_x + b * path_y + c) / math.sqrt( a * a + b * b) < distance_threshold: #print("projection") path_x, path_y = pu.find_projection( a, b, c, path_x, path_y) if len(people_path[output[4] - 1]) > 0: prev_x = people_path[output[4] - 1][ len(people_path[output[4] - 1]) - 1][0] prev_y = people_path[output[4] - 1][ len(people_path[output[4] - 1]) - 1][1] velocity = math.sqrt( (path_x - prev_x) * (path_x - prev_x) + (path_y - prev_y) * (path_y - prev_y)) * 30 / ( unseen_frame[output[4] - 1] + 1) print("velocity: {}".format(velocity)) else: #print("turn") direction_start[output[4] - 1] = len( people_path[output[4] - 1]) people_path[output[4] - 1].append( np.array((path_x, path_y))) unseen_frame[output[4] - 1] = 0 if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) for id in identities: for i in range(1, len(people_path[id - 1])): cv2.line(ori_im, (int(people_path[id - 1][i - 1][0]), int(people_path[id - 1][i - 1][1])), (int(people_path[id - 1][i][0]), int(people_path[id - 1][i][1])), (0, 0, 255), 3) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) print(area_dic) jump_flag += 1 if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) params = Params(f'projects/{self.args.project}.yml') self.submit = True self.cam_id = 1 self.object_list = [] self.object_list_tracks = [] if args.display: pass # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.efficientdet = EfficientDetBackbone( num_classes=len(params.obj_list), compound_coef=self.args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)).cuda() # self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=True) # self.class_names = self.yolo3.class_names self.efficientdet.load_state_dict(torch.load( args.detector_weights_path), strict=False) def __enter__(self): self.im_width = 1920 self.im_height = 1280 if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'DIVX') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 10, (self.im_width, self.im_height)) return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): for tf_idx, tfrecord in enumerate(tqdm(tfrecord_paths[2:])): self.object_list = [] self.object_list_tracks = [] training_set = TUMuchTrackingDataset(tfrecord_path=tfrecord, transform=tfs, cam_id=self.cam_id) training_generator = DataLoader(training_set, **training_params) for it, data in enumerate(training_generator): imgs = data['img'].to(torch.device("cuda:0")) if self.submit: meta = data['meta'] with torch.no_grad(): features, regression, classification, anchors = self.efficientdet( imgs) out = postprocess(imgs, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # boxes is cx, cy, cw, ch boxes = out[0]["rois"] for idx in range(out[0]["rois"].shape[0]): cx, cy, lx, ly = out[0]["rois"][idx] cw, ch = lx - cx, ly - cy boxes[idx][0] = cx + cw / 2 boxes[idx][1] = cy + ch / 2 boxes[idx][2] = cw boxes[idx][3] = ch bbox_xcycwh, cls_conf, cls_ids = boxes, out[0]["scores"], out[ 0]["class_ids"] if bbox_xcycwh is not None: mask = cls_ids <= 4 bbox_xcycwh = bbox_xcycwh[mask] try: bbox_xcycwh[:, 3:] *= 1 except: continue cls_conf = cls_conf[mask] im = imgs.cpu().numpy() im = im[0, :, :, :] im = np.swapaxes(im, 0, 2) im = np.swapaxes(im, 0, 1) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = im * 255 im = im.astype(np.uint8) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, out[0]["class_ids"], im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] track_class = outputs[:, -1] if self.submit: for box_idx in range(bbox_xyxy.shape[0]): o = meta[:][0] box = label_pb2.Label.Box() box.center_x = (bbox_xyxy[box_idx, 0] + bbox_xyxy[box_idx, 2]) / 2 box.center_y = (bbox_xyxy[box_idx, 1] + bbox_xyxy[box_idx, 3]) / 2 box.length = (bbox_xyxy[box_idx, 2] - bbox_xyxy[box_idx, 0]) box.width = (bbox_xyxy[box_idx, 3] - bbox_xyxy[box_idx, 1]) o.object.box.CopyFrom(box) o.score = 0.9 # CHECK THIS # Use correct type. o.object.type = to_waymo_classes[track_class[ box_idx]] # MAP THIS TO CORRECT CLASSES self.object_list.append(copy.deepcopy(o)) o.object.id = str(identities[box_idx]) self.object_list_tracks.append( copy.deepcopy(o)) # import pdb; pdb.set_trace() if self.args.save_path: draw_bboxes(im, bbox_xyxy, identities) if self.args.display: pass self.args.save_path = "cam_{}.avi".format(self.cam_id) if self.args.save_path: self.output.write(im) objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list: objects.objects.append(o) f = open("./output/detection/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close() objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list_tracks: objects.objects.append(o) f = open("./output/tracking/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close()
class DeepSortDetector(object): """[summary] Arguments: yolov3: cfg - yolov3.cfg weights - weights/best.pt data - coco.data conf_thres - 0.5 nms_thres - 0.4 img_size - 416 deep sort: deep_checkpoint - "deep_sort/deep/checkpoint/ckpt.t7" max_dist - 0.2 video_path - "./data/videosample/vidoe1.mp4" output_file - "./data/videoresults/video1.txt" display_width - 800 display_height - 600 save_path = "./video1_out.mp4" """ def __init__( self, cfg, weights, video_path, deep_checkpoint="deep_sort/deep/checkpoint/ckpt.t7", data="dataset1.data", output_file=None, img_size=416, display=False, nms_thres=0.4, conf_thres=0.5, max_dist=0.2, display_width=800, display_height=600, save_path=None): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.vidCap = cv2.VideoCapture() self.yolov3 = InferYOLOv3(cfg, img_size, weights, data, device, conf_thres, nms_thres) self.deepsort = DeepSort(deep_checkpoint, max_dist) self.display = display self.video_path = video_path self.output_file = output_file self.save_path = save_path if self.display: cv2.namedWindow("Test", cv2.WINDOW_NORMAL) cv2.resizeWindow("Test", display_width, display_height) def __enter__(self): assert os.path.isfile(self.video_path), "Error: path error" self.vidCap.open(self.video_path) self.im_width = int(self.vidCap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vidCap.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.save_path is not None: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vidCap.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): frame_no = -1 # skip_no = 2 if self.output_file: f = open(output_file, "w") while self.vidCap.grab(): frame_no += 1 # skip frames every n frames # if frame_no % skip_no != 0: # continue # start time total_begin = time.time() _, img = self.vidCap.retrieve() # yolov3部分 yolo_begin = time.time() bbox_xyxy, cls_conf, cls_ids = self.yolov3.predict(img) # [x1,y1,x2,y2] yolo_end = time.time() # deepsort部分 ds_begin = time.time() if bbox_xyxy is not None: bbox_cxcywh = xyxy2xywh(bbox_xyxy) outputs = self.deepsort.update(bbox_cxcywh, cls_conf, img) if len(outputs) > 0: # [x1,y1,x2,y2] id bbox_xyxy = outputs[:, :4] ids = outputs[:, -1] img = draw_bboxes(img, bbox_xyxy, ids) # frame,id,tlwh,1,-1,-1,-1 if self.output_file: bbox_tlwh = xyxy2xywh(bbox_xyxy) for i in range(len(bbox_tlwh)): write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( frame_no + 1, outputs[i, -1], int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]), int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3])) f.write(write_line) ds_end = time.time() total_end = time.time() if frame_no % 500 == 0: print("frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin), (total_end - total_begin), ((yolo_end - yolo_begin) * 100 / ( total_end - total_begin)), (1 / (total_end - total_begin)))) if self.display is True: cv2.imshow("Test", img) cv2.waitKey(1) if self.save_path: self.output.write(img) if self.output_file: f.close()
def main(): print('Connecting to camera') cap = cv2.VideoCapture(0) # cap = cv2.VideoCapture('rtsp://*****:*****@[email protected]/H264?ch=1&subtype=0') # - rtsp://admin:comvis@[email protected]:554/H.264 assert cap.isOpened(), 'Unable to connect to camera' device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('Loading models') detector = Detector('weights/yolov5s.pt', img_size=(640, 640), conf_thresh=0.4, iou_thresh=0.5, agnostic_nms=False, device=device) deepsort = DeepSort('weights/ckpt.t7', max_dist=0.2, min_confidence=0.3, nms_max_overlap=0.5, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, device=device) bboxes_visualizer = BoundingBoxesVisualizer() fps_estimator = MeanEstimator() person_cls_id = detector.names.index('person') # get id of 'person' class width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cam_fps = int(cap.get(cv2.CAP_PROP_FPS)) print(f'Starting capture, camera_fps={cam_fps}') # Start of demo win_name = 'MICA ReID Demo' cv2.namedWindow(win_name, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_FREERATIO) cv2.resizeWindow(win_name, width, height) frame_id = 0 while True: start_it = time.time() ret, img = cap.read() if not ret: print('Unable to read camera') break detections = detector.detect([img])[0] num_people = 0 if detections is not None: detections = detections[detections[:, -1].eq( person_cls_id)] # filter person xywh, confs = parse_detection(detections) outputs = deepsort.update(xywh, confs, img) num_people = len(outputs) bboxes_visualizer.remove([ t.track_id for t in deepsort.tracker.tracks if t.time_since_update > 3 or t.is_deleted() ]) bboxes_visualizer.update(outputs) # draw detections for pid in outputs[:, -1]: bboxes_visualizer.plot(img, pid, label=f'Person {pid}', line_thickness=5, trail_trajectory=True, trail_bbox=False) # draw counting overlay = img.copy() count_str = f'Number of people: {num_people}' text_size = cv2.getTextSize(count_str, 0, fontScale=0.5, thickness=1)[0] cv2.rectangle(overlay, (10, 10 + 10), (15 + text_size[0], 10 + 20 + text_size[1]), (255, 255, 255), -1) img = cv2.addWeighted(overlay, 0.4, img, 0.6, 0) cv2.putText(img, count_str, (12, 10 + 15 + text_size[1]), 0, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) # show cv2.imshow(win_name, img) key = cv2.waitKey(1) elapsed_time = time.time() - start_it fps = fps_estimator.update(1 / elapsed_time) print( f'[{frame_id:06d}] num_detections={num_people} fps={fps:.02f} elapsed_time={elapsed_time:.03f}' ) # check key pressed if key == ord('q') or key == 27: # q or esc to quit break elif key == ord('r'): # r to reset tracking deepsort.reset() bboxes_visualizer.clear() elif key == 32: # space to pause key = cv2.waitKey(0) if key == ord('q') or key == 27: break frame_id += 1 cv2.destroyAllWindows() cap.release()
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) if args.display: cv2.namedWindow("test", cv2.WINDOW_NORMAL) cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.detectron2 = Detectron2(args) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda) self.total_counter = [0] * 100 def __enter__(self): assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" self.vdo.open(self.args.VIDEO_PATH) self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width, self.im_height)) assert self.vdo.isOpened() return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): while self.vdo.grab(): start = time.time() _, im = self.vdo.retrieve() # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) print( '----------------------------------------------DEMO started-----------------------------------------------' ) bbox_xcycwh, cls_conf, cls_ids, cls_masks, bbox_xyxy_detectron2 = self.detectron2.detect( im) #print('bbox_xcycwh, cls_conf, cls_ids, cls_masks', bbox_xcycwh, cls_conf, cls_ids, cls_masks) #if bbox_xcycwh is not None: current_counter = [] if len(bbox_xcycwh): mask = cls_ids == 0 # select class person #print('mask>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', mask) #print('bbox_xcycwh', bbox_xcycwh) bbox_xcycwh = bbox_xcycwh[mask] #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_conf', cls_conf) cls_conf = cls_conf[mask] #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_masks[mask]', cls_conf[mask]) binary_masks = cls_masks[mask] #binary_masks = cls_masks #draw detections after NMS, white box outputs, detections = self.deepsort.update( bbox_xcycwh, cls_conf, im) im = draw_detections(detections, im) print( '++++++++++++++++++++++++++++++++++++++ outputs of deepsort.update', outputs) if len(outputs): bbox_xyxy = outputs[:, :4] print( "+++++++++++++++++++++++++++++++++++++bbox_xyxy, bbox_xyxy_detectron2", bbox_xyxy, bbox_xyxy_detectron2) identities = current_counter = outputs[:, -1] #print("+++++++++++++++++++++++++++++++++++++identities", identities) ordered_identities = [] for identity in identities: if not self.total_counter[identity]: self.total_counter[identity] = max( self.total_counter) + 1 ordered_identities.append(self.total_counter[identity]) im = draw_bboxes(im, bbox_xyxy, ordered_identities, binary_masks) #nums = "len(bbox_xyxy): {}, len(identities): {}, len(binary_masks): {}".format(len(bbox_xyxy), len(identities), len(binary_masks)) #im = cv2.putText(im, nums, (150, 150), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) end = time.time() time_fps = "time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText( im, "Total Hand Counter: " + str(max(self.total_counter)), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText( im, "Current Hand Counter: " + str(len(current_counter)), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, time_fps, (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) if self.args.display: cv2.imshow("test", im) cv2.waitKey(1) if self.args.save_path: self.output.write(im)