def kalman_predict_out_line(track, line, out_direction, predict_long=None): # print(track.track_id) # print(line) # print(out_direction) # print(track.tlbr) if box_line_relative(track.tlbr, line) == out_direction: return 0 predict_num_out = 0 prev_mean, prev_cov = track.mean, track.covariance kal_man = KalmanFilter() predict_thres = 15 if out_direction == 'up' else 0 if predict_long is not None: max_long_predict = predict_long else: max_long_predict = 50 if out_direction == 'up' else 4 while box_line_relative(mean_to_tlbr(prev_mean), line) != out_direction: predict_num_out += 1 cur_mean = prev_mean #of t mean, cov = kal_man.predict(prev_mean, prev_cov) if predict_num_out > predict_thres: new_mean, new_cov = mean, cov else: new_mean, new_cov = kal_man.update(prev_mean, prev_cov, mean[:4]) prev_mean, prev_cov = new_mean, new_cov #of t+1 if predict_num_out >= max_long_predict or np.sum( np.abs(cur_mean - mean)) == 0: break # print(mean_to_tlbr(mean)) return predict_num_out
def kalman_predict_out_line(track,line,out_direction): # print(track.track_id) # print(line) # print(out_direction) # print(track.tlbr) if box_line_relative(track.tlbr,line)==out_direction: return 0 predict_num_out=0 prev_mean,prev_cov=track.mean,track.covariance kal_man=KalmanFilter() predict_thres=0 if out_direction=='up' else 0 max_long_predict=5 if out_direction=='up' else 2 if track.infer_type() in ['person','motorcycle','biycycle'] else 8 while box_line_relative(mean_to_tlbr(prev_mean),line) !=out_direction: predict_num_out+=1 cur_mean=prev_mean #of t mean,cov=kal_man.predict(prev_mean,prev_cov) if predict_num_out>predict_thres: new_mean,new_cov=mean,cov else: new_mean,new_cov= kal_man.update(prev_mean,prev_cov,mean[:4]) prev_mean,prev_cov=new_mean,new_cov #of t+1 if predict_num_out>=max_long_predict or np.sum(np.abs(cur_mean-mean))==0: break # print(mean_to_tlbr(mean)) return predict_num_out
def __init__(self, opt, frame_rate=30): # 帧率的意义 self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) # 加载模型, self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] # 保存一系列追踪中的轨迹 self.lost_stracks = [] # type: list[STrack] # 保存已经丢失的轨迹 self.removed_stracks = [] # type: list[STrack] # 保存已经移除的轨迹 self.frame_id = 0 self.det_thresh = opt.conf_thres # 检测框阈值,这里设置为与tracking的置信度阈值相同 self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) # 还是等于输入视频的真实帧率 self.max_time_lost = self.buffer_size # 最大连续self.buffer_size次没有匹配到目标时,表示该轨迹丢失 self.max_per_image = 128 self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter() # 预测,根据上一帧目标的检测位置和速度,预测当前帧目标的检测位置和速度
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() # input = torch.randn(1, 3, 640, 640, requires_grad=True) # input=input.to(opt.device) # out = self.model(input) # torch.onnx.export(self.model, # model being run # input, # model input (or a tuple for multiple inputs) # "./test.onnx", # where to save the model (can be a file or file-like object) # export_params=True, # store the trained parameter weights inside the model file# ) # opset_version=9 # ) self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter()
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter() self.roi_align = RoIAlign(7, 7) cfg = get_config() cfg.merge_from_file( "/home/hongwei/track-human/FairMOT/src/lib/tracker/deep_configs/yolov3.yaml" ) cfg.merge_from_file( "/home/hongwei/track-human/FairMOT/src/lib/tracker/deep_configs/deep_sort.yaml" ) self.detector = build_detector(cfg, True)
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = 128 self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter()
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() # convert to onnx # input_names = ["input0"] # output_names = ["hm", "wh", "id", "reg"] # inputs = torch.randn(1, 3, 480, 640).to('cpu') # torch_out = torch.onnx._export(self.model, inputs, 'pruned.onnx', export_params=True, verbose=False, # input_names=input_names, output_names=output_names) # print("export onnx sucess") self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter()
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, num_gnn_layers=opt.num_gnn_layers, gnn_type=opt.gnn_type, use_residual=opt.use_residual, return_pre_gnn_layer_outputs=opt.return_pre_gnn_layer_outputs, heads_share_params=opt.heads_share_params, omit_gnn=opt.omit_gnn, use_roi_align=opt.use_roi_align, viz_attention=opt.viz_attention) self.model = load_model(self.model, opt.load_model, distributed=True, copy_head_weights=False) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 # self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter() self.viz_attention = opt.viz_attention
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.use_hog_reid: print('USE HOG AS FEATURE EXTRACTION !!!!') self.re_im_h, self.re_im_w = 300, 120 cell_size = (50, 24) block_size = (6, 5) nbins = 9 self.reid_model = cv2.HOGDescriptor( _winSize=(self.re_im_w // cell_size[1] * cell_size[1], self.re_im_h // cell_size[0] * cell_size[0]), _blockSize=(block_size[1] * cell_size[1], block_size[0] * cell_size[0]), _blockStride=(cell_size[1], cell_size[0]), _cellSize=(cell_size[1], cell_size[0]), _nbins=nbins) self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.prev_img = None self.kalman_filter = KalmanFilter()
def reset(self): """ :return: """ # Reset tracks dict self.tracked_tracks_dict = defaultdict(list) # value type: list[Track] self.lost_tracks_dict = defaultdict(list) # value type: list[Track] self.removed_tracks_dict = defaultdict(list) # value type: list[Track] # Reset frame id self.frame_id = 0 # Reset kalman filter to stabilize tracking self.kalman_filter = KalmanFilter()
def __init__(self, opt): self.opt = opt # ---------- Init model max_ids_dict = { 0: 330, 1: 102, 2: 104, 3: 312, 4: 53 } # cls_id -> track id number for traning device = opt.device # model in track mode(do detection and reid feature vector extraction) self.model = Darknet(opt.cfg, opt.img_size, False, max_ids_dict, 128, 'track').to(device) # Load checkpoint if opt.weights.endswith('.pt'): # pytorch format ckpt = torch.load(opt.weights, map_location=device) self.model.load_state_dict(ckpt['model']) if 'epoch' in ckpt.keys(): print('Checkpoint of epoch {} loaded.'.format(ckpt['epoch'])) else: # darknet format load_darknet_weights(self.model, opt.weights) # Put model to device and set eval mode self.model.to(device).eval() # ---------- # Define tracks dict self.tracked_tracks_dict = defaultdict(list) # value type: list[Track] self.lost_tracks_dict = defaultdict(list) # value type: list[Track] self.removed_tracks_dict = defaultdict(list) # value type: list[Track] # init frame index self.frame_id = 0 # init hyp self.det_thresh = opt.conf_thres self.buffer_size = int(opt.track_buffer) self.max_time_lost = self.buffer_size # self.mean = np.array([0.408, 0.447, 0.470]).reshape(1, 1, 3) # self.std = np.array([0.289, 0.274, 0.278]).reshape(1, 1, 3) # init kalman filter(to stabilize tracking) self.kalman_filter = KalmanFilter()
def __init__(self, opt, model, frame_rate=30): self.opt = opt print('Creating model...') self.model = model self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter()
def __init__(self, opt, frame_rate=30): self.opt = opt print('Creating model...') self.model = create_model('dla_34', opt.heads, opt.head_conv) if opt.multi_load_model != '': self.model = load_model(self.model, opt.multi_load_model) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.kalman_filter = KalmanFilter()
def __init__(self, gpus, load_model_name, frame_rate=30): # self.opt = opt # if opt.gpus[0] >= 0: # opt.device = torch.device('cuda') # else: # opt.device = torch.device('cpu') if len(gpus) > 0: self.device = torch.device('cuda') else: self.device = torch.device('cpu') print('Creating model...') heads = {'hm': 1, 'wh': 4, 'id': 128, 'reg': 2} head_conv = 256 self.model = create_model(heads, head_conv) self.model = load_model(self.model, load_model_name) self.model = self.model.to(self.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = 0.4 self.buffer_size = int(frame_rate / 30.0 * 30) self.max_time_lost = self.buffer_size self.max_per_image = 500 self.mean = np.array([0.408, 0.447, 0.47], dtype=np.float32).reshape(1, 1, 3) self.std = np.array([0.289, 0.274, 0.278], dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter() self.num_classes = 1 self.down_ratio = 4 self.reg_offset = True self.ltrb = True self.K = 500 self.conf_thres = 0.4
class STrack(BaseTrack): shared_kalman = KalmanFilter() out_of_frame_patience = 5 num_cluster = 5 def __init__(self, tlwh, score, temp_feat, buffer_size=30): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 self.smooth_feat = None self.update_features(temp_feat, None) self.alpha = 0.9 self.occlusion_status = False # use for bbox only self.iou_box = None #use for bbox only self.num_out_frame = 0 def update_features(self, feat, new_track): feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: self.smooth_feat = self.alpha * self.smooth_feat + ( 1 - self.alpha) * feat self.smooth_feat /= np.linalg.norm(self.smooth_feat) @staticmethod def warp_predict(mean, cov, warp_matrix, warp_mode): if warp_matrix is None: return mean, cov track_xyah = mean[:4] track_tlwh = STrack.xyah_to_tlwh(track_xyah) track_tlbr = STrack.tlwh_to_tlbr(track_tlwh) t, l, b, r = track_tlbr if warp_mode == cv2.MOTION_HOMOGRAPHY: warp_tlbr = cv2.perspectiveTransform(np.array([[[t, l], [b, r]]]), warp_matrix)[0].flatten() else: warp_tlbr = cv2.transform(np.array([[[t, l], [b, r]]]), warp_matrix)[0].flatten() warp_tlwh = STrack.tlbr_to_tlwh(warp_tlbr) warp_xyah = STrack.tlwh_to_xyah(warp_tlwh) track_mean, track_cov = list(warp_xyah) + list(mean[4:]), cov return np.array(track_mean), track_cov @staticmethod def get_camera_intension(warp_matrix, warp_mode): if warp_matrix is None: return 0 warp_matrix_flattern = warp_matrix.flatten() if warp_mode == cv2.MOTION_HOMOGRAPHY: non_change_warp = np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]) else: non_change_warp = np.array([1, 0, 0, 0, 1, 0]) similarity = np.dot(warp_matrix_flattern, non_change_warp) / ( np.sqrt(np.sum(warp_matrix_flattern**2)) * np.sqrt(np.sum(non_change_warp**2))) return 1 - similarity def predict(self, warp_matrix, warp_mode, smooth=0.0): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 motion_intensity = STrack.get_camera_intension(warp_matrix, warp_mode) * smooth self.mean, self.covariance = self.kalman_filter.predict( mean_state, self.covariance, motion_intensity=motion_intensity) self.mean, self.covariance = STrack.warp_predict( self.mean, self.covariance, warp_matrix, warp_mode) @staticmethod def multi_predict(stracks, warp_matrix, warp_mode, smooth=0.0): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 motion_intensity = STrack.get_camera_intension( warp_matrix, warp_mode) * smooth multi_mean, multi_covariance = STrack.shared_kalman.multi_predict( multi_mean, multi_covariance, motion_intensity=motion_intensity) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): stracks[i].mean = mean stracks[i].covariance = cov stracks[i].mean, stracks[i].covariance = STrack.warp_predict( stracks[i].mean, stracks[i].covariance, warp_matrix, warp_mode) def activate(self, kalman_filter, frame_id): """Start a new tracklet""" self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate( self.tlwh_to_xyah(self._tlwh)) self.tracklet_len = 0 self.state = TrackState.Tracked #self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id # self.box_hist.append(self.tlwh) # self.track_frames.append(frame_id) def re_activate(self, new_track, frame_id, new_id=False): self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)) self.update_features(new_track.curr_feat, new_track) #self.update_cluster(new_track.curr_feat) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() def update(self, new_track, frame_id, update_feature=True): """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.tracklet_len += 1 new_tlwh = new_track.tlwh self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score if update_feature: self.update_features(new_track.curr_feat, new_track) @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret @staticmethod # @jit(nopython=True) def xyah_to_tlwh(xyah): w, h = xyah[2] * xyah[3], xyah[3] x, y = xyah[0], xyah[1] t, l = x - w / 2, y - h / 2 return [t, l, w, h] def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
def __init__(self, opt,polygon, paths, polygon2=None,frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[opt.compound_coef] if opt.detection_model=='Efficient' : self.obj_list =['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] self.person_or_motorcycle=['person'] self.obj_interest=[ 'motorcycle','bicycle', 'bus', 'truck','car'] if self.person_or_motorcycle[0]!='person' else [ 'person', 'bus', 'truck','car'] self.detetection_model= EfficientDetBackbone(compound_coef=opt.compound_coef, num_classes=len(self.obj_list), ratios=anchor_ratios, scales=anchor_scales) self.detetection_model.load_state_dict(torch.load(f'EfficientDet/weights/efficientdet-d{opt.compound_coef}.pth')) self.detetection_model.eval() device = torch.device('cuda:0') self.detetection_model = self.detetection_model.to(device) elif opt.detection_model=='FasterRcnn' : config_file = "Drone_FasterRCNN/drone_demo/e2e_faster_rcnn_X_101_32x8d_FPN_1x_visdrone.yaml" cfg.merge_from_file(config_file) cfg.merge_from_list(["MODEL.WEIGHT", "Drone_FasterRCNN/drone_demo/visdrone_model_0360000.pth"]) self.detetection_model = COCODemo( cfg, min_image_size=opt.min_img_size, confidence_threshold=opt.conf_thres, ) label_of_interest=[ # "__background", # "unused", # "pedestrian", # "person", # "bicycle", "car", "van", "truck", # "tricycle", # "awning-tricycle", "bus", "motor" ] self.person_or_motorcycle=["motor"] #'bicycle' self.obj_interest=[ 'motor', 'bus', 'truck','car','van', "tricycle"] if self.person_or_motorcycle[0]!='person' else [ 'person', 'bus', 'truck','car','van', "tricycle"] else: raise('Not supported detector model') self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.kalman_filter = KalmanFilter() self.polygon=polygon self.paths=paths self.polygon2=polygon2 self.line1=[polygon[0],polygon[1]] self.line2=[polygon[1],polygon[2]] self.line3=[polygon[2],polygon[3]] self.line4=[polygon[0],polygon[4]] self.two_polygon_system=True self.warmup_frame=0 self.virtual_polygon= [ [ 0, 680 ], [ 0, 149 ], [ 1270, 149 ], [ 1270, 680 ] ]
class STrack(BaseTrack): shared_kalman = KalmanFilter() out_of_frame_patience=5 num_cluster=5 type_infer_patience=4 score_infer_type_thres=0.6 def __init__(self, tlwh, score, vehicle_type, buffer_size=30,temp_feat=None,huge_vehicle=False): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 self.smooth_feat = None # self.update_features(temp_feat,None) self.features = deque([], maxlen=buffer_size) self.alpha = 0.6 self.num_out_frame=0 self.cluster_features={'centers':[],'cluster':[]} self.track_frames=[] self.w_hs=[] self.occlusion_status=False # use for bbox only self.iou_box=None #use for bbox only self.box_hist=[] self.vehicle_types_list=[] self.vehicle_types_list.append(vehicle_type) self.track_trajectory=[] self.track_trajectory.append(self.tlwh_to_tlbr(tlwh)) self.huge_vehicle=huge_vehicle def update_cluster(self,feat): feat /= np.linalg.norm(feat) if len(self.cluster_features['cluster'])<STrack.num_cluster: self.cluster_features['cluster'].append([feat]) self.cluster_features['centers'].append(feat) else: min_center=np.argmin(np.squeeze(cdist(self.cluster_features['centers'], [feat], metric='cosine'))) self.cluster_features['cluster'][min_center].append(feat) self.cluster_features['centers'][min_center]=np.mean(self.cluster_features['cluster'][min_center],axis=0) self.cluster_features['centers']/=np.linalg.norm( self.cluster_features['centers']) def update_features(self, feat,iou_box): #feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: if iou_box==None: iou_box=0 update_param=(1-self.alpha)*iou_box+self.alpha #self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat self.smooth_feat = update_param * self.smooth_feat + (1 -update_param) * feat self.box_hist.append(update_param) self.features.append(feat) #self.smooth_feat /= np.linalg.norm(self.smooth_feat) def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(stracks): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): stracks[i].mean = mean stracks[i].covariance = cov def activate(self, kalman_filter, frame_id): """Start a new tracklet""" # self.track_trajectory.append(self.tlbr) self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) self.tracklet_len = 0 self.state = TrackState.Tracked #self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id self.track_frames.append(frame_id) def re_activate(self, new_track, frame_id, new_id=False): new_tlwh = new_track.tlwh self.track_trajectory.append(self.tlwh_to_tlbr(new_tlwh)) self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) ) #self.update_features(new_track.curr_feat,new_track.iou_box) #self.update_cluster(new_track.curr_feat) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() self.track_frames.append(frame_id) def update(self, new_track, frame_id, update_feature=False): """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.tracklet_len += 1 self.vehicle_types_list.append(new_track.vehicle_types_list[-1]) new_tlwh = new_track.tlwh self.track_trajectory.append(self.tlwh_to_tlbr(new_tlwh)) self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score if update_feature: self.update_features(new_track.curr_feat,new_track.iou_box) ########### #self.update_cluster(new_track.curr_feat) self.track_frames.append(frame_id) def infer_type(self): def most_frequent(List): return max(set(List), key = List.count) types=most_frequent(self.vehicle_types_list) return types # if classes in ['bicycle', 'motorcycle']: # return 1 # elif classes =='car': # return 2 # elif classes=='bus': # return 3 # else: # return 4 @property def vehicle_type(self): def most_frequent(List): return max(set(List), key = List.count) if len(self.track_frames)>=self.type_infer_patience: return most_frequent(self.vehicle_types_list) elif self.score >=self.score_infer_type_thres: return most_frequent(self.vehicle_types_list) else: return 'Undetermine' @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class JDETracker(object): def __init__(self, opt, polygon, paths, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[opt.compound_coef] self.obj_list = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] self.person_or_motorcycle = ['person'] self.obj_interest = [ 'motorcycle', 'bicycle', 'bus', 'train', 'truck', 'car' ] if self.person_or_motorcycle[0] != 'person' else [ 'person', 'bus', 'train', 'truck', 'car' ] print(self.obj_interest) self.detetection_model = EfficientDetBackbone( compound_coef=opt.compound_coef, num_classes=len(self.obj_list), ratios=anchor_ratios, scales=anchor_scales) self.detetection_model.load_state_dict( torch.load( f'EfficientDet/weights/efficientdet-d{opt.compound_coef}.pth')) self.detetection_model.eval() device = torch.device('cuda:0') self.detetection_model = self.detetection_model.to(device) self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.kalman_filter = KalmanFilter() self.polygon = polygon self.paths = paths # self.line_of_interest=[self.paths[0][1],self.paths[0][2]] def merge_outputs(self, detections): results = {} for j in range(1, self.opt.num_classes + 1): results[j] = np.concatenate( [detection[j] for detection in detections], axis=0).astype(np.float32) scores = np.hstack( [results[j][:, 4] for j in range(1, self.opt.num_classes + 1)]) if len(scores) > self.max_per_image: kth = len(scores) - self.max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, self.opt.num_classes + 1): keep_inds = (results[j][:, 4] >= thresh) results[j] = results[j][keep_inds] return results def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): ori_imgs, framed_imgs, framed_metas = preprocess( [img0], max_size=self.input_size) device = torch.device('cuda:0') x = torch.stack( [torch.from_numpy(fi).to(device) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) features, regression, classification, anchors = self.detetection_model( x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.opt.det_thres, self.opt.nms_thres) out = invert_affine(framed_metas, out) bbox = [] score = [] types = [] for j in range(len(out[0]['rois'])): obj = self.obj_list[out[0]['class_ids'][j]] if obj in self.obj_interest: x1, y1, x2, y2 = out[0]['rois'][j].astype(np.int) #bike,bicycle if (y1 + y2) / 2 > height / 2 and float( out[0]['scores'][j]) < 0.35: continue if obj not in self.person_or_motorcycle and float( out[0]['scores'][j]) >= 0.3: bbox.append([x1, y1, x2, y2]) score.append(float(out[0]['scores'][j])) types.append(obj) elif obj in self.person_or_motorcycle: #['bicycle', 'motorcycle'] bbox.append([x1, y1, x2, y2]) score.append(float(out[0]['scores'][j])) types.append(obj) # vis # print(len(bbox)) # print(img0.shape) # print(self.polygon) # for i in range(len(bbox)): # bb = bbox[i] # cv2.rectangle(img0, (bb[0], bb[1]), # (bb[2], bb[3]), # (0, 255, 0), 2) # cv2.polylines(img0,[np.asarray(self.polygon)],True,(0,255,255)) # cv2.imshow('dets', img0) # cv2.waitKey(0) if len(bbox) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbr), sco, clas, 30) for (tlbr, sco, clas) in zip(bbox, score, types) ] else: detections = [] detections_plot = detections.copy() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with gating distance''' strack_pool, lost_map_tracks = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) #dists = matching.embedding_distance(strack_pool, detections) occlusion_map = heuristic_occlusion_detection(detections) match_thres = 850 dists = np.zeros(shape=(len(strack_pool), len(detections))) dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections, type_diff=True) #dists = matching.fuse_motion(self.opt,self.kalman_filter, dists, strack_pool, detections,lost_map=lost_map_tracks,occlusion_map=occlusion_map,thres=match_thres) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=match_thres) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' ''' for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.6) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] #print('score :'+str(track.score)) if track.score < self.det_thresh or track.occlusion_status == True or check_bbox_outside_polygon( self.polygon, track.tlbr): continue if self.frame_id >= 5 and track.infer_type( ) in self.person_or_motorcycle and not check_bbox_inside_polygon( self.polygon, track.tlbr): #person, motorcycle continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state and getting out of interest tracklet if have""" out_of_polygon_tracklet = [] out_of_polygon_tracklet = [] refind_stracks_copy = [] activated_starcks_copy = [] for idx, current_tracked_tracks in enumerate( [refind_stracks, activated_starcks]): # for track in current_tracked_tracks: if check_bbox_outside_polygon(self.polygon, track.tlbr): track.mark_removed() removed_stracks.append(track) if ( (len(track.track_frames) >= 2 and self.frame_id <= 5) or len(track.track_frames) >= 4 ) and idx == 1: ########## 4 is confident number of frame if tlbrs_to_mean_area(track.track_trajectory) <= 800: track_type = self.person_or_motorcycle[0] #person else: track_type = track.infer_type() track_center = [[(x[0] + x[2]) / 2, (x[1] + x[3]) / 2] for x in track.track_trajectory] movement_id = counting_moi( self.paths, [(track_center[0], track_center[-1])])[0] frame_id = self.frame_id + 1 if movement_id == '1' else self.frame_id + 2 out_of_polygon_tracklet.append( (frame_id, track.track_id, track_type, movement_id)) else: refind_stracks_copy.append( track) if idx == 0 else activated_starcks_copy.append( track) refind_stracks = refind_stracks_copy activated_starcks = activated_starcks_copy lost_stracks_copy = [] for track in lost_stracks: if check_bbox_intersect_or_outside_polygon(self.polygon, track.tlbr): track.mark_removed() removed_stracks.append(track) if ((len(track.track_frames) >= 2 and self.frame_id <= 5) or len(track.track_frames) >= 4): # print(track.track_id,tlbrs_to_mean_area(track.track_trajectory)) if tlbrs_to_mean_area(track.track_trajectory) <= 800: track_type = self.person_or_motorcycle[0] else: track_type = track.infer_type() track_center = [[(x[0] + x[2]) / 2, (x[1] + x[3]) / 2] for x in track.track_trajectory] movement_id = counting_moi( self.paths, [(track_center[0], track_center[-1])])[0] frame_id = self.frame_id + 3 if str( movement_id) == '2' else self.frame_id + 3 out_of_polygon_tracklet.append( (frame_id, track.track_id, track_type, movement_id)) else: lost_stracks_copy.append(track) lost_stracks = lost_stracks_copy for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) #Remove out of screen tracklet elif track.tlwh[0] + track.tlwh[2] // 2 > width or track.tlwh[ 1] + track.tlwh[3] // 2 > height or min( track.tlwh[0] + track.tlwh[2] // 2, track.tlwh[1] + track.tlwh[3] // 2) < 0: track.num_out_frame += 1 if track.num_out_frame > STrack.out_of_frame_patience: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) # print(out_of_polygon_tracklet) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) #self.merge_track() output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks, detections_plot, out_of_polygon_tracklet #can paralel,current bottleneck of model def merge_track(self, min_thres=0.2, distance_thres=15, consitence_thres=10): def is_overlap(lost_track, tracked_track): if tracked_track.start_frame > lost_track.end_frame or lost_track.start_frame > tracked_track.end_frame: return False def predict_future(lost_track, num_frame): mean, cov = lost_track.mean, lost_track.covariance for _ in range(num_frame): mean, cov = lost_track.kalman_filter.predict(mean, cov) return mean, cov def cluster_compare(lost_track, tracked_track): return np.min( cdist(lost_track.cluster_features['centers'], tracked_track.cluster_features['centers'], metric='cosine')) def distance(lost_track, tracked_track, min_thres=min_thres, distance_thres=distance_thres): if is_overlap(lost_track, tracked_track): return np.inf else: pred_mean, pred_cov = predict_future( lost_track, tracked_track.start_frame - lost_track.end_frame) tracked_xyah = STrack.tlwh_to_xyah(tracked_track._tlwh) if self.kalman_filter.gating_distance( pred_mean, pred_cov, tracked_xyah) > distance_thres: return np.inf else: return cluster_compare(lost_track, tracked_track) cost_matrix = np.zeros(shape=(len(self.lost_stracks), len(self.tracked_stracks))) for i in range(cost_matrix.shape[0]): for j in range(cost_matrix.shape[1]): if min(len(self.lost_stracks[i].track_frames), len(self.tracked_stracks[j].track_frames) ) <= consitence_thres: cost_matrix[i][j] = np.inf else: cost_matrix[i][j] = distance(self.lost_stracks[i], self.tracked_stracks[j]) matches, _, _ = matching.linear_assignment(cost_matrix, thresh=min_thres) map_lost_track = np.ones_like(self.lost_stracks, dtype=np.int) for i in range(cost_matrix.shape[0]): for j in range(cost_matrix.shape[1]): if cost_matrix[i][j] <= 1: print('sim of ' + str(self.lost_stracks[i].track_id) + ' and ' + str(self.tracked_stracks[j].track_id) + ' : ' + str(cost_matrix[i][j])) if len(matches) == 0: return for ilost_track, i_tracked_track in matches: print('------------------------------------') print('merge ' + str(self.tracked_stracks[i_tracked_track].track_id) + ' to ' + str(self.lost_stracks[ilost_track].track_id)) map_lost_track[ilost_track] = 0 # pylint: disable=unsupported-assignment-operation for num_clus in range( len(self.tracked_stracks[i_tracked_track]. cluster_features['cluster'])): for clus in self.tracked_stracks[ i_tracked_track].cluster_features['cluster'][num_clus]: self.lost_stracks[ilost_track].update_cluster(clus) self.lost_stracks[ilost_track].mean, self.lost_stracks[ ilost_track].covariance = self.tracked_stracks[ i_tracked_track].mean, self.tracked_stracks[ i_tracked_track].covariance self.lost_stracks[ ilost_track].track_frames += self.tracked_stracks[ i_tracked_track].track_frames self.lost_stracks[ilost_track].frame_id = self.tracked_stracks[ i_tracked_track].frame_id self.tracked_stracks[i_tracked_track] = self.lost_stracks[ ilost_track] new_lost_tracks = [] for ilost_track in range(len(map_lost_track)): if map_lost_track[ilost_track] == 1: new_lost_tracks.append(self.lost_stracks[ilost_track]) self.lost_stracks = new_lost_tracks
class JDETracker(object): def __init__(self, opt,polygon, paths, polygon2=None,frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[opt.compound_coef] if opt.detection_model=='Efficient' : self.obj_list =['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] self.person_or_motorcycle=['person'] self.obj_interest=[ 'motorcycle','bicycle', 'bus', 'truck','car'] if self.person_or_motorcycle[0]!='person' else [ 'person', 'bus', 'truck','car'] self.detetection_model= EfficientDetBackbone(compound_coef=opt.compound_coef, num_classes=len(self.obj_list), ratios=anchor_ratios, scales=anchor_scales) self.detetection_model.load_state_dict(torch.load(f'EfficientDet/weights/efficientdet-d{opt.compound_coef}.pth')) self.detetection_model.eval() device = torch.device('cuda:0') self.detetection_model = self.detetection_model.to(device) elif opt.detection_model=='FasterRcnn' : config_file = "Drone_FasterRCNN/drone_demo/e2e_faster_rcnn_X_101_32x8d_FPN_1x_visdrone.yaml" cfg.merge_from_file(config_file) cfg.merge_from_list(["MODEL.WEIGHT", "Drone_FasterRCNN/drone_demo/visdrone_model_0360000.pth"]) self.detetection_model = COCODemo( cfg, min_image_size=opt.min_img_size, confidence_threshold=opt.conf_thres, ) label_of_interest=[ # "__background", # "unused", # "pedestrian", # "person", # "bicycle", "car", "van", "truck", # "tricycle", # "awning-tricycle", "bus", "motor" ] self.person_or_motorcycle=["motor"] #'bicycle' self.obj_interest=[ 'motor', 'bus', 'truck','car','van', "tricycle"] if self.person_or_motorcycle[0]!='person' else [ 'person', 'bus', 'truck','car','van', "tricycle"] else: raise('Not supported detector model') self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.kalman_filter = KalmanFilter() self.polygon=polygon self.paths=paths self.polygon2=polygon2 self.line1=[polygon[0],polygon[1]] self.line2=[polygon[1],polygon[2]] self.line3=[polygon[2],polygon[3]] self.line4=[polygon[0],polygon[4]] self.two_polygon_system=True self.warmup_frame=0 self.virtual_polygon= [ [ 0, 680 ], [ 0, 149 ], [ 1270, 149 ], [ 1270, 680 ] ] def box_cross_line(self,bbox_list,line): for bbox in bbox_list: if box_line_relative(bbox,line)=='cross' : return True return False def heusristic_mov_refinement(self,track,predict_mov): #5,2,9 corection if str(predict_mov)=='5': if self.box_cross_line(track.track_trajectory[-3:],self.line1): return '5' elif self.box_cross_line(track.track_trajectory[-3:],self.line2): if track.track_trajectory[0][3]>=350: return '2' elif track.track_trajectory[0][2]>=750: return '9' else: return 'undetermine' else: return 'undetermine' if str(predict_mov) in ['2','9']: if self.box_cross_line(track.track_trajectory[-3:],self.line2): return predict_mov elif self.box_cross_line(track.track_trajectory[-3:],self.line1): if track.track_trajectory[0][3]>=350: return '5' else: return 'undetermine' else: return predict_mov def heusristic_mov_detection(self,track,track_center): if track.track_id==57: print(track_center) if self.box_cross_line(track.track_trajectory[-4:],self.line1) and track_center[-1][1]<=470: paths={'3':self.paths['3'],'5':self.paths['5'],'11':self.paths['11']} if track.track_trajectory[-4][1]<=265 and abs(track.track_trajectory[0][0]-track.track_trajectory[-1][0])<=175 and track.track_trajectory[-1][3]<=295: return 3 mov_id= counting_moi(paths,[(track_center[0],track_center[-1])]) if str(mov_id[0])=='3' and not(track.track_trajectory[-4][1]<=265 and abs(track.track_trajectory[0][0]-track.track_trajectory[-1][0])<=175 and track.track_trajectory[-1][3]<=295): return 'undetermine' return mov_id[0] elif self.box_cross_line(track.track_trajectory[-4:] , self.line2) and track_center[-1][0]>=550: paths={'2':self.paths['2'],'4':self.paths['4'],'9':self.paths['9']} mov_id= counting_moi(paths,[(track_center[0],track_center[-1])]) return mov_id[0] elif self.box_cross_line(track.track_trajectory[-4:],self.line3): paths={'7':self.paths['7'],'8':self.paths['8'],'10':self.paths['10']} mov_id= counting_moi(paths,[(track_center[0],track_center[-1])]) return mov_id[0] elif self.box_cross_line(track.track_trajectory[-4:],self.line4): paths={'6':self.paths['6'],'1':self.paths['1'],'12':self.paths['12']} mov_id= counting_moi(paths,[(track_center[0],track_center[-1])]) if str(mov_id[0])=='12' and track.track_trajectory[0][0]<=500: return 1 if track.track_trajectory[0][3]<=350 else 'undetermine' return mov_id[0] else: return 'undetermine' def merge_outputs(self, detections): results = {} for j in range(1, self.opt.num_classes + 1): results[j] = np.concatenate( [detection[j] for detection in detections], axis=0).astype(np.float32) scores = np.hstack( [results[j][:, 4] for j in range(1, self.opt.num_classes + 1)]) if len(scores) > self.max_per_image: kth = len(scores) - self.max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, self.opt.num_classes + 1): keep_inds = (results[j][:, 4] >= thresh) results[j] = results[j][keep_inds] return results def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] init_polygon=self.polygon2 if self.two_polygon_system and self.frame_id>= self.warmup_frame else self.polygon two_wheel_polygon=self.polygon four_wheel_polygon=self.polygon virtual_polygon=self.virtual_polygon huge_box_thres=150 bbox=[] score=[] types=[] huge_vehicles=[] ''' Step 1: Network forward, get detections & embeddings''' if self.opt.detection_model=='Efficient': with torch.no_grad(): ori_imgs, framed_imgs, framed_metas = preprocess([img0], max_size=self.input_size) device = torch.device('cuda:0') x = torch.stack([torch.from_numpy(fi).to(device) for fi in framed_imgs], 0) x = x.to(torch.float32 ).permute(0, 3, 1, 2) features, regression, classification, anchors = self.detetection_model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.opt.det_thres, self.opt.nms_thres) out = invert_affine(framed_metas, out) bbox=[] score=[] types=[] huge_vehicles=[] for j in range(len(out[0]['rois'])): obj = self.obj_list[out[0]['class_ids'][j]] if obj in self.obj_interest: x1, y1, x2, y2 = out[0]['rois'][j].astype(np.int) #bike,bicycle if (y1+y2)/2>height/2 and float(out[0]['scores'][j])<=0.25: continue if (y2-y1)*(x2-x1)<=200: continue if obj not in self.person_or_motorcycle and float(out[0]['scores'][j])>=0.3: bbox.append([x1, y1, x2, y2]) score.append( float(out[0]['scores'][j])) types.append(obj) huge_vehicles.append(False if ((y2-y1)<=huge_box_thres and (x2-x1)<=180) else True ) elif obj in self.person_or_motorcycle: #['bicycle', 'motorcycle'] bbox.append([x1, y1, x2, y2]) score.append( float(out[0]['scores'][j])) types.append(obj) huge_vehicles.append(False) elif self.opt.detection_model=='FasterRcnn': predictions= self.detetection_model.compute_prediction(img0) top_predictions=self.detetection_model.select_top_predictions(predictions) scores = top_predictions.get_field("scores").tolist() labels = top_predictions.get_field("labels").tolist() labels = [self.detetection_model.CATEGORIES[i] for i in labels] boxes = top_predictions.bbox.tolist() for j in range(len(labels)): obj = labels[j] if obj in self.obj_interest: x1, y1, x2, y2 = boxes[j] if (y1+y2)/2>0.5*height and float(scores[j])<=0.25: continue if (y2-y1)*(x2-x1)<=200: continue if obj not in self.person_or_motorcycle and float(scores[j])>=0.3: bbox.append([x1, y1, x2, y2]) score.append( float(scores[j])) types.append(obj) huge_vehicles.append(False if ((y2-y1)<=huge_box_thres and (x2-x1)<=180) else True ) elif obj in self.person_or_motorcycle and float(scores[j])>=self.opt.det_thres: #['bicycle', 'motorcycle'] bbox.append([x1, y1, x2, y2]) score.append( float(scores[j])) types.append(obj) huge_vehicles.append(False) # vis # print(len(bbox)) # print(img0.shape) # print(self.polygon) # for i in range(len(bbox)): # bb = bbox[i] # cv2.rectangle(img0, (bb[0], bb[1]), # (bb[2], bb[3]), # (0, 255, 0), 2) # cv2.polylines(img0,[np.asarray(self.polygon)],True,(0,255,255)) # cv2.imshow('dets', img0) # cv2.waitKey(0) if len(bbox) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbr), sco, clas, 30,huge_vehicle=hv) for (tlbr, sco,clas,hv) in zip(bbox,score,types,huge_vehicles)] else: detections = [] detections_plot=copy.deepcopy(detections) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with gating distance''' strack_pool,lost_map_tracks = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) #dists = matching.embedding_distance(strack_pool, detections) detections=heuristic_occlusion_detection(detections) match_thres=100 dists=np.zeros(shape=(len(strack_pool),len(detections))) dists = matching.gate_cost_matrix3(self.kalman_filter, dists, strack_pool, detections,type_diff=True) #dists = matching.fuse_motion(self.opt,self.kalman_filter, dists, strack_pool, detections,lost_map=lost_map_tracks,occlusion_map=occlusion_map,thres=match_thres) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=match_thres) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' ''' for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.6) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] track_init_polygon=init_polygon if not track.huge_vehicle else virtual_polygon if track.score < self.det_thresh or track.occlusion_status==True or check_bbox_outside_polygon(track_init_polygon,track.tlbr): continue # track_types=self.person_or_motorcycle[0] if tlbrs_to_mean_area(track.track_trajectory) <=1500 else track.infer_type() if self.frame_id>=1 and not check_bbox_inside_polygon(track_init_polygon,track.tlbr):#and track_types in self.person_or_motorcycle #person, motorcycle continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state and getting out of interest tracklet if have""" out_of_polygon_tracklet=[] refind_stracks_copy=[] activated_starcks_copy=[] for idx,current_tracked_tracks in enumerate([refind_stracks,activated_starcks]) :# for track in current_tracked_tracks: if tlbrs_to_mean_area(track.track_trajectory) <=1000 : track_type= self.person_or_motorcycle[0] #person else: track_type=track.infer_type() if track_type in self.person_or_motorcycle: out_polygon=two_wheel_polygon p_type='two_wheel' else: out_polygon=four_wheel_polygon #if not track.huge_vehicle else virtual_polygon p_type='four_wheel' if check_bbox_outside_polygon(out_polygon,track.tlbr) : track.mark_removed() removed_stracks.append(track) if ((len(track.track_frames)>=4 and self.frame_id <=5) or (len(track.track_frames)>=5 and self.frame_id>=self.warmup_frame+5)) and idx==1:########## 4 is confident number of frame track_center=[ [(x[0]+x[2])/2,(x[1]+x[3])/2] for x in track.track_trajectory] # movement_id=counting_moi(self.paths,[(track_center[0],track_center[-1])])[0] # movement_id=self.heusristic_mov_refinement(track,movement_id) movement_id=self.heusristic_mov_detection(track,track_center) frame_id=self.frame_id out_of_polygon_tracklet.append((frame_id,track.track_id,track_type,movement_id)) else: refind_stracks_copy.append(track) if idx ==0 else activated_starcks_copy.append(track) refind_stracks=refind_stracks_copy activated_starcks=activated_starcks_copy lost_stracks_copy=[] for track in lost_stracks: if tlbrs_to_mean_area(track.track_trajectory) <=1000 : track_type= self.person_or_motorcycle[0] #person else: track_type=track.infer_type() if track_type in self.person_or_motorcycle: out_polygon=two_wheel_polygon p_type='two_wheel' else: out_polygon=four_wheel_polygon p_type='four_wheel' if check_bbox_intersect_or_outside_polygon(out_polygon,track.tlbr) : track.mark_removed() removed_stracks.append(track) if ((len(track.track_frames)>=4 and self.frame_id <=5) or (len(track.track_frames)>=6 and self.frame_id>=self.warmup_frame+5)): track_center=[ [(x[0]+x[2])/2,(x[1]+x[3])/2] for x in track.track_trajectory] # movement_id=counting_moi(self.paths,[(track_center[0],track_center[-1])])[0] # movement_id=self.heusristic_mov_refinement(track,movement_id) movement_id=self.heusristic_mov_detection(track,track_center) # line_interest=self.line1 if str(movement_id)=='1' else self.line2 # out_direction='bottom' if str(movement_id)=='1' else 'up' frame_id=self.frame_id if track_type in self.person_or_motorcycle else self.frame_id+5#kalman_predict_out_line(track,line_interest,out_direction) out_of_polygon_tracklet.append((frame_id,track.track_id,track_type,movement_id)) else: lost_stracks_copy.append(track) lost_stracks=lost_stracks_copy for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost : track.mark_removed() removed_stracks.append(track) #Remove out of screen tracklet elif track.tlwh[0]+track.tlwh[2]//2>width or track.tlwh[1]+track.tlwh[3]//2>height or min(track.tlwh[0]+track.tlwh[2]//2,track.tlwh[1]+track.tlwh[3]//2)<0: track.num_out_frame+=1 if track.num_out_frame>STrack.out_of_frame_patience: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) # print(out_of_polygon_tracklet) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks,_ = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks,_ = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) #self.merge_track() output_stracks = [track for track in self.tracked_stracks if track.is_activated] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks,detections_plot,out_of_polygon_tracklet #can paralel,current bottleneck of model def merge_track(self,min_thres=0.2,distance_thres=15, consitence_thres=10): def is_overlap(lost_track,tracked_track): if tracked_track.start_frame>lost_track.end_frame or lost_track.start_frame>tracked_track.end_frame: return False def predict_future(lost_track,num_frame): mean,cov=lost_track.mean,lost_track.covariance for _ in range(num_frame): mean,cov=lost_track.kalman_filter.predict(mean,cov) return mean,cov def cluster_compare(lost_track,tracked_track): return np.min(cdist(lost_track.cluster_features['centers'],tracked_track.cluster_features['centers'],metric='cosine')) def distance(lost_track,tracked_track,min_thres=min_thres,distance_thres=distance_thres): if is_overlap(lost_track,tracked_track): return np.inf else: pred_mean,pred_cov=predict_future(lost_track,tracked_track.start_frame-lost_track.end_frame) tracked_xyah=STrack.tlwh_to_xyah(tracked_track._tlwh) if self.kalman_filter.gating_distance(pred_mean,pred_cov,tracked_xyah) > distance_thres: return np.inf else: return cluster_compare(lost_track,tracked_track) cost_matrix=np.zeros(shape=(len(self.lost_stracks),len(self.tracked_stracks))) for i in range(cost_matrix.shape[0]): for j in range(cost_matrix.shape[1]): if min(len(self.lost_stracks[i].track_frames),len(self.tracked_stracks[j].track_frames))<=consitence_thres: cost_matrix[i][j]=np.inf else: cost_matrix[i][j]=distance(self.lost_stracks[i],self.tracked_stracks[j]) matches,_,_=matching.linear_assignment(cost_matrix,thresh=min_thres) map_lost_track=np.ones_like(self.lost_stracks,dtype=np.int) for i in range(cost_matrix.shape[0]): for j in range(cost_matrix.shape[1]): if cost_matrix[i][j]<=1: print('sim of ' +str(self.lost_stracks[i].track_id) + ' and ' +str(self.tracked_stracks[j].track_id) +' : '+str(cost_matrix[i][j]) ) if len(matches)==0: return for ilost_track,i_tracked_track in matches: print('------------------------------------') print('merge ' + str(self.tracked_stracks[i_tracked_track].track_id)+' to '+str(self.lost_stracks[ilost_track].track_id)) map_lost_track[ilost_track]=0 # pylint: disable=unsupported-assignment-operation for num_clus in range(len(self.tracked_stracks[i_tracked_track].cluster_features['cluster'])): for clus in self.tracked_stracks[i_tracked_track].cluster_features['cluster'][num_clus]: self.lost_stracks[ilost_track].update_cluster(clus) self.lost_stracks[ilost_track].mean,self.lost_stracks[ilost_track].covariance=self.tracked_stracks[i_tracked_track].mean,self.tracked_stracks[i_tracked_track].covariance self.lost_stracks[ilost_track].track_frames+=self.tracked_stracks[i_tracked_track].track_frames self.lost_stracks[ilost_track].frame_id=self.tracked_stracks[i_tracked_track].frame_id self.tracked_stracks[i_tracked_track]=self.lost_stracks[ilost_track] new_lost_tracks=[] for ilost_track in range(len(map_lost_track)): if map_lost_track[ilost_track] ==1: new_lost_tracks.append(self.lost_stracks[ilost_track]) self.lost_stracks=new_lost_tracks
class STrack(BaseTrack): shared_kalman = KalmanFilter() def __init__(self, tlwh, score, temp_feat, buffer_size=30): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 self.smooth_feat = None self.update_features(temp_feat) self.features = deque([], maxlen=buffer_size) self.alpha = 0.9 self.history = [] def update_features(self, feat): feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat self.features.append(feat) self.smooth_feat /= np.linalg.norm(self.smooth_feat) def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(stracks): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): stracks[i].mean = mean stracks[i].covariance = cov def activate(self, kalman_filter, frame_id): """Start a new tracklet""" self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) self.tracklet_len = 0 self.state = TrackState.Tracked if frame_id == 1: self.is_activated = True #self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id def re_activate(self, new_track, frame_id, new_id=False): self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) ) self.update_features(new_track.curr_feat) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() def update(self, new_track, frame_id, update_feature=True): """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.tracklet_len += 1 new_tlwh = new_track.tlwh # log historical track up to previous 10 frames if len(self.history) == 10: self.history = self.history[1:] self.history.append(new_tlwh.copy()) self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score if update_feature: self.update_features(new_track.curr_feat) @property def tlwh_raw(self): if len(self.history) > 0: return self.history[-1] return self._tlwh.copy() @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class STrack(BaseTrack): shared_kalman = KalmanFilter() # 类变量 def __init__(self, tlwh, score, temp_feat, buffer_size=30): # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.tracklet_len = 0 self.smooth_feat = None self.update_features(temp_feat) self.features = deque([], maxlen=buffer_size) self.alpha = 0.9 def update_features(self, feat): # 增加新的feature,添加到self.feature、self.curr_feat feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat # 指数移动加权平均,self.smooth_feat综合了过去一定量的feature self.features.append(feat) self.smooth_feat /= np.linalg.norm(self.smooth_feat) # 归一化处理 def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(stracks): if len(stracks) > 0: multi_mean = np.asarray([st.mean.copy() for st in stracks]) multi_covariance = np.asarray([st.covariance for st in stracks]) for i, st in enumerate(stracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) # 返回预测状态的均值向量和协方差矩阵 for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): # 然后再将各自的均值向量、协方差矩阵分别分配给对应的stracks stracks[i].mean = mean stracks[i].covariance = cov def activate(self, kalman_filter, frame_id): # 创建一个新的 track """Start a new tracklet""" self.kalman_filter = kalman_filter self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) # 使用当前位置状态进行初始化 self.tracklet_len = 0 self.state = TrackState.Tracked #self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id def re_activate(self, new_track, frame_id, new_id=False): # 新创建一个 track self.mean, self.covariance = self.kalman_filter.update( self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) ) self.update_features(new_track.curr_feat) self.tracklet_len = 0 self.state = TrackState.Tracked self.is_activated = True self.frame_id = frame_id if new_id: self.track_id = self.next_id() def update(self, new_track, frame_id, update_feature=True): # 更新对应的 KF 中的均值向量、协方差矩阵 """ Update a matched track :type new_track: STrack :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id # 当前的帧id self.tracklet_len += 1 # 追踪片段长度增加 1 new_tlwh = new_track.tlwh self.mean, self.covariance = self.kalman_filter.update( # 卡尔曼滤波器 self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked self.is_activated = True self.score = new_track.score if update_feature: self.update_features(new_track.curr_feat) @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class Track(BaseTrack): shared_kalman = KalmanFilter() def __init__(self, tlwh, score, temp_feat, buff_size=30): """ :param tlwh: :param score: :param temp_feat: :param buff_size: """ # wait activate self._tlwh = np.asarray(tlwh, dtype=np.float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False self.score = score self.track_len = 0 self.smooth_feat = None self.update_features(temp_feat) self.features = deque([], maxlen=buff_size) # 指定了限制长度 self.alpha = 0.9 def update_features(self, feat): # L2 normalizing feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat self.features.append(feat) self.smooth_feat /= np.linalg.norm(self.smooth_feat) def predict(self): mean_state = self.mean.copy() if self.state != TrackState.Tracked: mean_state[7] = 0 self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) @staticmethod def multi_predict(tracks): if len(tracks) > 0: multi_mean = np.asarray([track.mean.copy() for track in tracks]) multi_covariance = np.asarray([track.covariance for track in tracks]) for i, st in enumerate(tracks): if st.state != TrackState.Tracked: multi_mean[i][7] = 0 multi_mean, multi_covariance = Track.shared_kalman.multi_predict(multi_mean, multi_covariance) for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): tracks[i].mean = mean tracks[i].covariance = cov def reset_track_id(self): self.reset_track_count() def activate(self, kalman_filter, frame_id): """Start a new tracklet""" self.kalman_filter = kalman_filter # assign a filter to each tracklet? # update the track id self.track_id = self.next_id() self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) self.track_len = 0 self.state = TrackState.Tracked # set flag 'tracked' # self.is_activated = True if frame_id == 1: # to record the first frame's detection result self.is_activated = True self.frame_id = frame_id self.start_frame = frame_id def re_activate(self, new_track, frame_id, new_id=False): self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)) self.update_features(new_track.curr_feat) self.track_len = 0 self.state = TrackState.Tracked # set flag 'tracked' self.is_activated = True self.frame_id = frame_id if new_id: # update the track id self.track_id = self.next_id() def update(self, new_track, frame_id, update_feature=True): """ Update a matched track :type new_track: Track :type frame_id: int :type update_feature: bool :return: """ self.frame_id = frame_id self.track_len += 1 new_tlwh = new_track.tlwh self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) self.state = TrackState.Tracked # set flag 'tracked' self.is_activated = True # set flag 'activated' self.score = new_track.score if update_feature: self.update_features(new_track.curr_feat) @property # @jit(nopython=True) def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() ret[2] *= ret[3] ret[:2] -= ret[2:] / 2 return ret @property # @jit(nopython=True) def tlbr(self): """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., `(top left, bottom right)`. """ ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_xyah(tlwh): """Convert bounding box to format `(center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 ret[2] /= ret[3] return ret def to_xyah(self): return self.tlwh_to_xyah(self.tlwh) @staticmethod # @jit(nopython=True) def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() # numpy中的.copy()是深拷贝 ret[2:] -= ret[:2] return ret @staticmethod # @jit(nopython=True) def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
def __init__(self, opt, polygon, paths, polygon2=None, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[opt.compound_coef] self.obj_list = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] self.person_or_motorcycle = ['motorcycle', 'bicycle'] self.obj_interest = [ 'motorcycle', 'bicycle', 'bus', 'truck', 'car' ] if self.person_or_motorcycle[0] != 'person' else [ 'person', 'bus', 'truck', 'car' ] print(self.obj_interest) self.detetection_model = EfficientDetBackbone( compound_coef=opt.compound_coef, num_classes=len(self.obj_list), ratios=anchor_ratios, scales=anchor_scales) self.detetection_model.load_state_dict( torch.load( f'EfficientDet/weights/efficientdet-d{opt.compound_coef}.pth')) self.detetection_model.eval() device = torch.device('cuda:0') self.detetection_model = self.detetection_model.to(device) self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.kalman_filter = KalmanFilter() self.polygon = polygon self.paths = paths self.polygon2 = polygon2 self.line2 = [self.polygon[1], self.polygon[2]] self.line1 = [self.polygon[4], self.polygon[3]] if len( self.polygon) == 5 else [self.polygon[0], self.polygon[3]] if len( self.polygon) == 4 else None self.two_polygon_system = False self.warmup_frame = 6 if self.two_polygon_system else 0 self.virtual_polygon = [[0, 573], [0, 109], [1270, 109], [1270, 573]]
def __init__(self, opt): self.opt = opt self.tracked_stracks = [] # type: list[STrack] self.frame_id = 0 self.kalman_filter = KalmanFilter()