def __init__(self, model_path): self.min_confidence = 0.3 self.nms_max_overlap = 1.0 self.extractor = Extractor(model_path, use_cuda=True) max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric)
def __init__(self, model_path,use_cuda,map_location_flag,bad_time,left_time): self.min_confidence = 0.3 self.nms_max_overlap = 1.0 self.extractor = Extractor(model_path, use_cuda=use_cuda,map_location_flag=map_location_flag) # 违规时间 self.bad_time = bad_time max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric,left_time=left_time)
def __init__(self, model_path): super(DeepSort, self).__init__() self.min_confidence = 0.3 #根据置信度对检测框进行过滤,即对置信度不足够高的检测框及特征予以删除; self.nms_max_overlap = 1.0 #对检测框进行非最大值抑制,消除一个目标身上多个框的情况; self.extractor = Extractor(model_path, use_cuda=True) #读取当前帧目标检测框的位置及各检测框图像块的深度特征 max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric)
def __init__(self, model_path): # 检测结果阈值。低于这个阈值的检测结果将会被忽略 # 过滤掉置信度小于self.min_confidence的bbox,生成detections self.min_confidence = 0.25 self.nms_max_overlap = 1.0 # 非极大抑制的阈值 原始值1.0 # NMS (这里self.nms_max_overlap的值为1,即保留了所有的detections) self.extractor = Extractor(model_path, use_cuda=True) max_cosine_distance = 0.2 # 0.2 余弦距离的控制阈值 调节这个能改善IDsw # 描述的区域的最大值 它是一个列表,列出了每次出现曲目的特征。nn_bodget确定此列表的大小。例如,如果它是10,则仅存储曲目在板上出现的最后10次的特征 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric)
def __init__(self, model_path='yolov3/of_model/yolov3_model_python/', gpu_ids='0', model_name='resid', confidence_l=0.2, confidence_h=0.4, max_cosine_distance=0.2, max_iou_distance=0.7, save_feature=False, use_filter=False, init_extractor=True, max_age=30, std_Q_w=1e-1, std_Q_wv=1e-3, std_R_w=5e-2, cls_=0): self.confidence_l = confidence_l self.confidence_h = confidence_h self.iou_thresh_l = 0.24 self.iou_thresh = 0.5 self.nms_max_overlap = 1.0 self.extractor = None self.height, self.width = None, None if init_extractor: self.extractor = Extractor(model_name=model_name, load_path=model_path, gpu_ids=gpu_ids, cls=cls_) max_iou = max_iou_distance nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_iou_distance=max_iou, max_age=max_age, std_Q_w=std_Q_w, std_Q_wv=std_Q_wv, std_R_w=std_R_w) self.all_feature = None self.save_feature = save_feature self.count = 1 self.result = [] self.use_filter = use_filter
def recognize_from_video(): results = [] idx_frame = 0 # net initialize detector = init_detector(args.env_id) extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=args.env_id) # tracker class instance metric = NearestNeighborDistanceMetric( "cosine", MAX_COSINE_DISTANCE, NN_BUDGET ) tracker = Tracker( metric, max_iou_distance=0.7, max_age=70, n_init=3 ) capture = webcamera_utils.get_capture(args.video) # create video writer if args.savepath is not None: writer = webcamera_utils.get_writer( args.savepath, int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), ) else: writer = None print('Start Inference...') while(True): idx_frame += 1 ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # In order to use ailia.Detector, the input should have 4 channels. input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) h, w = frame.shape[0], frame.shape[1] # do detection detector.compute(input_img, THRESHOLD, IOU) bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img_crops.append(frame[y1:y2, x1:x2]) if img_crops: # preprocess img_batch = np.concatenate([ normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :] for img in img_crops ], axis=0).transpose(0, 3, 1, 2) # TODO better to pass a batch at once # features = extractor.predict(img_batch) features = [] for img in img_batch: features.append(extractor.predict(img[np.newaxis, :, :, :])[0]) features = np.array(features) else: features = np.array([]) bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) nms_max_overlap = 1.0 indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker tracker.predict() tracker.update(detections) # update bbox identities outputs = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(frame, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) cv2.imshow('frame', frame) if writer is not None: writer.write(frame) if args.savepath is not None: write_results(args.savepath.split('.')[0] + '.txt', results, 'mot') else: write_results('result.txt', results, 'mot') capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print(f'Save results to {args.savepath}') print('Script finished successfully.')
class DeepSort(object): def __init__(self, model_path): self.min_confidence = 0.3 self.nms_max_overlap = 1.0 self.extractor = Extractor(model_path, use_cuda=True) max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric) def update(self, bbox_xywh, confidences, ori_img): self.height, self.width = ori_img.shape[:2] # generate detections features = self._get_features(bbox_xywh, ori_img) detections = [ Detection(bbox_xywh[i], conf, features[i]) for i, conf in enumerate(confidences) if conf > self.min_confidence ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker self.tracker.predict() self.tracker.update(detections) # output bbox identities outputs = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = self._xywh_to_xyxy(box) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) return outputs def _xywh_to_xyxy(self, bbox_xywh): x, y, w, h = bbox_xywh x1 = max(int(x - w / 2), 0) x2 = min(int(x + w / 2), self.width - 1) y1 = max(int(y - h / 2), 0) y2 = min(int(y + h / 2), self.height - 1) return x1, y1, x2, y2 def _get_features(self, bbox_xywh, ori_img): features = [] for box in bbox_xywh: x1, y1, x2, y2 = self._xywh_to_xyxy(box) im = ori_img[y1:y2, x1:x2] feature = self.extractor(im)[0] features.append(feature) features = np.stack(features, axis=0) return features
class DeepSort(object): def __init__(self, model_path,use_cuda,map_location_flag,bad_time,left_time): self.min_confidence = 0.3 self.nms_max_overlap = 1.0 self.extractor = Extractor(model_path, use_cuda=use_cuda,map_location_flag=map_location_flag) # 违规时间 self.bad_time = bad_time max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric,left_time=left_time) def update(self, bbox_xywh, confidences, ori_img, all_name,start_time): self.height, self.width = ori_img.shape[:2] # generate detections features = self._get_features(bbox_xywh, ori_img) detections = [Detection(bbox_xywh[i], conf, features[i], all_name[i],start_time) for i, conf in enumerate(confidences) if conf > self.min_confidence] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) # 非 最大值 抑制 indices = non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker self.tracker.predict() self.tracker.update(detections,start_time) # output bbox identities outputs = [] return_name = [] stay_time_all=[] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1 : # #如果是不是为确认的,则跳过 # if not track.is_confirmed(): continue #只显示违规的 if (start_time-track.start_time)<self.bad_time: continue box = track.to_tlwh() x1, y1, x2, y2 = self._xywh_to_xyxy(box) track_id = track.track_id class_name = track.class_name # print(class_name) outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) return_name.append(class_name) part=[class_name+str(track_id),start_time-track.start_time] stay_time_all.append(part) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) #暂定状态与确定状态的都可以记录其停留时间 # stay_time_all=[ [track.class_name,track.track_id,start_time-track.start_time] \ # for track in self.tracker.tracks \ # if(start_time-track.start_time)>self.bad_time and track.is_confirmed] return outputs, return_name,stay_time_all def _xywh_to_xyxy(self, bbox_xywh): x,y,w,h = bbox_xywh x1 = max(int(x-w/2),0) x2 = min(int(x+w/2),self.width-1) y1 = max(int(y-h/2),0) y2 = min(int(y+h/2),self.height-1) return x1,y1,x2,y2 def _get_features(self, bbox_xywh, ori_img): features = [] for box in bbox_xywh: x1,y1,x2,y2 = self._xywh_to_xyxy(box) im = ori_img[y1:y2,x1:x2] feature = self.extractor(im)[0] features.append(feature) if len(features): features = np.stack(features, axis=0) else: features = np.array([]) return features
class DeepSort(object): def __init__(self, model_path): # 检测结果阈值。低于这个阈值的检测结果将会被忽略 # 过滤掉置信度小于self.min_confidence的bbox,生成detections self.min_confidence = 0.25 self.nms_max_overlap = 1.0 # 非极大抑制的阈值 原始值1.0 # NMS (这里self.nms_max_overlap的值为1,即保留了所有的detections) self.extractor = Extractor(model_path, use_cuda=True) max_cosine_distance = 0.2 # 0.2 余弦距离的控制阈值 调节这个能改善IDsw # 描述的区域的最大值 它是一个列表,列出了每次出现曲目的特征。nn_bodget确定此列表的大小。例如,如果它是10,则仅存储曲目在板上出现的最后10次的特征 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric) def update(self, bbox_xywh, confidences, class_num, ori_img): self.height, self.width = ori_img.shape[:2] # generate detections detections = [] try: features = self._get_features(bbox_xywh, ori_img) for i, conf in enumerate(confidences): if conf >= self.min_confidence and features.any(): # Detection 在detection.py找到相关的类 detections.append( Detection(bbox_xywh[i], conf, class_num[i], features[i])) else: pass except Exception as ex: # TODO Error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/resize.cpp:3720: error: (-215:Assertion failed) !ssize.empty() in function 'resize' print("{} Error: {}".format( time.strftime("%H:%M:%S", time.localtime()), ex)) # print('Error or video finish ') # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = non_max_suppression(boxes, self.nms_max_overlap, scores) # indices = [0] 或者 [0,1] detections = [detections[i] for i in indices] # 根据编号 做 嵌套的list[ [0编号],[1编号] ] # print(detections[0].confidence) # confidence: 0.5057685971260071 # print(detections) # [bbox_xywh: [1508.47619629 483.33926392 34.95910645 77.69906616], # confidence: 0.5140249729156494, # bbox_xywh: [1678.99377441 526.4251709 36.55554199 80.11364746], # confidence: 0.5057685971260071] # update tracker self.tracker.predict() # 现在输入的detections 是 做了嵌套编号的 list[ [0编号],[1编号] ] self.tracker.update(detections) # print("confidence {}".format(detections[0].confidence)) # output bbox identities # tracks 存储相关信息 outputs = [] # tracker的属性 trackers储存着 很多个track类实例 for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh( ) # (top left x, top left y, width, height) 每帧都刷新 x1, y1, x2, y2 = self._xywh_to_xyxy_centernet( box) # xywh 转成 矩形的对角点坐标 # 画运动轨迹 # 轨迹为检测框中心 center = (int((x1 + x2) / 2), int((y1 + y2) / 2)) #画轨迹图 记录每一次的中心点 # 轨迹为检测框底部 # center = (int((x1+x2)/2), int((y2))) # 画轨迹图 记录每一次的底部 points[track.track_id].append(center) # 用队列先进先出的结构 记录运动中心轨迹 # print(points[1][-1]) # 查看跟踪号为1的对象的中心点存储记忆 # for j in range(1, len(points[track.track_id])): # if points[track.track_id][j - 1] is None or points[track.track_id][j] is None: # continue # # thickness = int(np.sqrt(32 / float(j + 1)) * 2) #第一个点重 后续线逐渐变细 # cv2.line(ori_img,(points[track.track_id][j-1]), (points[track.track_id][j]),(8,196,255),thickness = 3,lineType=cv2.LINE_AA) track_id = track.track_id confidences = track.confidence * 100 cls_num = track.class_num # print("track_id {} confidences {}".format(track_id,confidences)) outputs.append( np.array([x1, y1, x2, y2, track_id, confidences, cls_num], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) return outputs, points # for centernet (x1,x2 w,h -> x1,y1,x2,y2) def _xywh_to_xyxy_centernet(self, bbox_xywh): x1, y1, w, h = bbox_xywh x1 = max(x1, 0) y1 = max(y1, 0) x2 = min(int(x1 + w), self.width - 1) y2 = min(int(y1 + h), self.height - 1) return int(x1), int(y1), x2, y2 # for yolo (centerx,centerx, w,h -> x1,y1,x2,y2) def _xywh_to_xyxy_yolo(self, bbox_xywh): x, y, w, h = bbox_xywh x1 = max(int(x - w / 2), 0) x2 = min(int(x + w / 2), self.width - 1) y1 = max(int(y - h / 2), 0) y2 = min(int(y + h / 2), self.height - 1) return x1, y1, x2, y2 def _get_features(self, bbox_xywh, ori_img): # TODO features = [] for box in bbox_xywh: x1, y1, x2, y2 = self._xywh_to_xyxy_centernet(box) im = ori_img[y1:y2, x1:x2] feature = self.extractor(im)[0] features.append(feature) if len(features): features = np.stack(features, axis=0) else: features = np.array([]) return features
def recognize_from_video(): try: print('[INFO] Webcam mode is activated') RECORD_TIME = 80 capture = cv2.VideoCapture(int(args.video)) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) except ValueError: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) frame_rate = capture.get(cv2.CAP_PROP_FPS) if FRAME_SKIP: action_recognize_fps = int(args.fps) else: action_recognize_fps = frame_rate if args.savepath != "": size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps, size) else: writer = None # pose estimation env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.arch == "lw_human_pose": pose = ailia.PoseEstimator(MODEL_PATH, WEIGHT_PATH, env_id=env_id, algorithm=ALGORITHM) detector = None else: detector = ailia.Detector(DETECTOR_MODEL_PATH, DETECTOR_WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id) # tracker class instance extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id) metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE, NN_BUDGET) tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3) # action recognition env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id) action_data = {} frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) idx_frame = 0 time_start = time.time() while (True): time_curr = time.time() if args.video == '0' and time_curr - time_start > RECORD_TIME: break ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb): break if FRAME_SKIP: mod = round(frame_rate / action_recognize_fps) if mod >= 1: if idx_frame % mod != 0: idx_frame = idx_frame + 1 continue input_image, input_data = adjust_frame_size( frame, frame.shape[0], frame.shape[1], ) input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA) # inferece if args.arch == "lw_human_pose": _ = pose.compute(input_data) else: detector.compute(input_data, THRESHOLD, IOU) # deepsort format h, w = input_image.shape[0], input_image.shape[1] if args.arch == "lw_human_pose": bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose( pose, h, w) else: bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector if args.arch == "pose_resnet": # bbox_xywh[:, 3:] *= 1.2 #May need to be removed in the future cls_conf = cls_conf[mask] # do tracking img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img_crops.append(input_image[y1:y2, x1:x2]) if img_crops: # preprocess img_batch = np.concatenate([ normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :] for img in img_crops ], axis=0).transpose(0, 3, 1, 2) # TODO better to pass a batch at once # features = extractor.predict(img_batch) features = [] for img in img_batch: features.append(extractor.predict(img[np.newaxis, :, :, :])[0]) features = np.array(features) else: features = np.array([]) bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) nms_max_overlap = 1.0 indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker tracker.predict() tracker.update(detections) # update bbox identities outputs = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) # action detection actions = [] persons = [] if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] for i, box in enumerate(bbox_xyxy): id = identities[i] if not (id in action_data): action_data[id] = np.zeros( (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3)) # action recognition action, person = action_recognition(box, input_image, pose, detector, model, action_data[id]) actions.append(action) persons.append(person) # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(input_image, bbox_xyxy, identities, actions, action_data, (0, 0)) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy)) # draw skelton for person in persons: if person != None: display_result(input_image, person) if writer is not None: writer.write(input_image) # show progress if idx_frame == "0": print() print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="") if idx_frame == frame_nb - 1: print() cv2.imshow('frame', input_image) idx_frame = idx_frame + 1 if writer is not None: writer.release() capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
class KCTracker(object): def __init__(self, model_path='yolov3/of_model/yolov3_model_python/', gpu_ids='0', model_name='resid', confidence_l=0.2, confidence_h=0.4, max_cosine_distance=0.2, max_iou_distance=0.7, save_feature=False, use_filter=False, init_extractor=True, max_age=30, std_Q_w=1e-1, std_Q_wv=1e-3, std_R_w=5e-2, cls_=0): self.confidence_l = confidence_l self.confidence_h = confidence_h self.iou_thresh_l = 0.24 self.iou_thresh = 0.5 self.nms_max_overlap = 1.0 self.extractor = None self.height, self.width = None, None if init_extractor: self.extractor = Extractor(model_name=model_name, load_path=model_path, gpu_ids=gpu_ids, cls=cls_) max_iou = max_iou_distance nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_iou_distance=max_iou, max_age=max_age, std_Q_w=std_Q_w, std_Q_wv=std_Q_wv, std_R_w=std_R_w) self.all_feature = None self.save_feature = save_feature self.count = 1 self.result = [] self.use_filter = use_filter #print('batch mode') def saveResult(self, file_name): if os.path.exists(file_name): os.remove(file_name) self.result = np.array(self.result) # frameid_pid_tlwhc if self.use_filter: self.result = removeUnMoveLowConfObj(self.result) else: self.result = removeSmallOrBigBbox(self.result) writeResult(self.result, file_name) print('save result:', file_name) def getFeatureFromImage(self, bbox_tlwhcs, data, input_type, type): bbox_tlwhs = bbox_tlwhcs[:, 0:4] features = None if input_type == 'img': self.height, self.width = data.shape[:2] try: features = self._get_features_batch(bbox_tlwhs, data, type) except Exception as e: print(e) else: # input_type == 'feature' features = data return features def update(self, frame_id, bbox_tlwhcs, ori_img, input_type='img', type=0): #print('ini boxs number:',len(bbox_tlwhcs)) # print('ini confs number:',len(confidences)) if len(bbox_tlwhcs) == 0: self.count += 1 return [], [] confidences = bbox_tlwhcs[:, -1] mask_l = (confidences >= self.confidence_l) & (confidences < self.confidence_h) mask_h = confidences >= self.confidence_h bbox_tlwhcs_low = bbox_tlwhcs[mask_l, :] bbox_tlwhcs_ture = bbox_tlwhcs[mask_h, :] bbox_tlwhcs_new = [] bbox_tlwhcs_temp = bbox_tlwhcs_low.copy() for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue if len(bbox_tlwhcs_temp) == 0: continue box_tlwh_temp = track.to_tlwh() ious_ = iou(box_tlwh_temp, bbox_tlwhcs_temp[:, 0:4]) iou_max_ind = np.argmax(ious_) if ious_[iou_max_ind] > self.iou_thresh_l: bbox_tlwhcs_new.append(bbox_tlwhcs_temp[iou_max_ind]) np.delete(bbox_tlwhcs_temp, iou_max_ind, axis=0) bbox_tlwhcs_new = np.array(bbox_tlwhcs_ture.tolist() + bbox_tlwhcs_new) if len(bbox_tlwhcs_new) == 0: self.count += 1 return [], [] #try: # indices = non_max_suppression(bbox_tlwhcs_new[:, 0:4], 0.6, bbox_tlwhcs_new[:, 4]) # bbox_tlwhcs_new = np.array([bbox_tlwhcs_new[i] for i in indices]) #except Exception as e: # print(e) # return [], [] if len(bbox_tlwhcs_new) == 0: self.count += 1 return [], [] bbox_tlwhs_new = bbox_tlwhcs_new[:, 0:4] confidences_new = bbox_tlwhcs_new[:, 4] features = self.getFeatureFromImage(bbox_tlwhcs_new, ori_img, input_type, type) if self.save_feature: if self.all_feature is None and len(features): self.all_feature = features else: self.all_feature = np.vstack((self.all_feature, features)) detections = [ Detection(bbox_tlwhs_new[i], conf, features[i], i) for i, conf in enumerate(confidences_new) ] # update tracker self.tracker.predict() self.tracker.update(detections, self.confidence_h) self.count += 1 # output bbox identities outputs = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box_tlwh = track.to_tlwh() # tlwh x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh) track_id = track.track_id conf = track.confidence ori_id = track.ori_id outputs.append(np.array([track_id, x1, y1, x2, y2, conf, ori_id])) self.result.append( np.array([ frame_id, track_id, x1, y1, box_tlwh[2], box_tlwh[3], conf ])) bbox_tlwhcs_results = [] for i, bbox in enumerate(bbox_tlwhcs): track_id_ = -1 for output in outputs: if int(output[6]) == i: track_id_ = output[0] #if track_id_ == -1: # continue box_tlwh = bbox[0:4] conf_ = bbox[4] x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh) bbox_tlwhcs_results.append( np.array([x1, y1, x2, y2, conf_, track_id_])) if len(bbox_tlwhcs_results) > 0: bbox_tlwhcs_results = np.stack(bbox_tlwhcs_results, axis=0) return bbox_tlwhcs_results, features # for centernet (x1,x2 w,h -> x1,y1,x2,y2) def _tlwh_to_xyxy(self, bbox_tlwh): x1, y1, w, h = bbox_tlwh x2 = x1 + w y2 = y1 + h return x1, y1, x2, y2 def _tlwh_to_limit_xyxy(self, bbox_tlwh): x1, y1, w, h = bbox_tlwh x1 = max(x1, 0) y1 = max(y1, 0) x2 = min(int(x1 + w), self.width - 1) y2 = min(int(y1 + h), self.height - 1) return int(x1), int(y1), x2, y2 # for yolo (centerx,centerx, w,h -> x1,y1,x2,y2) def _cxcywh_to_xyxy(self, bbox_xywh): x, y, w, h = bbox_xywh x1 = max(int(x - w / 2), 0) x2 = min(int(x + w / 2), self.width - 1) y1 = max(int(y - h / 2), 0) y2 = min(int(y + h / 2), self.height - 1) return x1, y1, x2, y2 def _get_features_batch(self, bbox_tlwhs, ori_img, type): imgs = [] if self.width == None: self.height, self.width = ori_img.shape[:2] for box in bbox_tlwhs: x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box) im = ori_img[int(y1):int(y2), int(x1):int(x2)] imgs.append(im) features = self.extractor(imgs, 20, feature_type=type) return features def _get_features(self, bbox_tlwh, ori_img): features = [] if self.width == None: self.height, self.width = ori_img.shape[:2] for box in bbox_tlwh: x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box) im = ori_img[int(y1):int(y2), int(x1):int(x2)] feature = self.extractor(im)[0] features.append(feature) if len(features): features = np.stack(features, axis=0) else: features = np.array([]) return features def saveFeature(self, filename=None): if filename is not None: np.save(filename, self.all_feature) print('save feature:', filename)
class DeepSort( object ): # DeepSort(torch.jit.ScriptModule): #按视频帧顺序处理,每一帧的处理 def __init__(self, model_path): super(DeepSort, self).__init__() self.min_confidence = 0.3 #根据置信度对检测框进行过滤,即对置信度不足够高的检测框及特征予以删除; self.nms_max_overlap = 1.0 #对检测框进行非最大值抑制,消除一个目标身上多个框的情况; self.extractor = Extractor(model_path, use_cuda=True) #读取当前帧目标检测框的位置及各检测框图像块的深度特征 max_cosine_distance = 0.2 nn_budget = 100 metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric) # @script_method def update(self, bbox_xywh, confidences, ori_img): self.height, self.width = ori_img.shape[:2] # generate detections,features:为特征向量 features = self._get_features(bbox_xywh, ori_img) # dectections包含 self.tlwh(左上角xy),self.confidence,self.feature # dectections为ndarray格式 # 置信度筛选和nms可以考虑删除 detections = [ Detection(bbox_xywh[i], conf, features[i]) for i, conf in enumerate(confidences) if conf > self.min_confidence ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker self.tracker.predict() self.tracker.update(detections) # output bbox identities outputs = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = self._xywh_to_xyxy(box) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) return outputs # @script_method def _xywh_to_xyxy(self, bbox_xywh): x, y, w, h = bbox_xywh x1 = max(int(x - w / 2), 0) x2 = min(int(x + w / 2), self.width - 1) y1 = max(int(y - h / 2), 0) y2 = min(int(y + h / 2), self.height - 1) return x1, y1, x2, y2 # @script_method def _get_features(self, bbox_xywh, ori_img): features = [] for box in bbox_xywh: x1, y1, x2, y2 = self._xywh_to_xyxy(box) im = ori_img[y1:y2, x1:x2] feature = self.extractor(im)[0] features.append(feature) if len(features): features = np.stack(features, axis=0) else: features = np.array([]) return features