def match(self, detections, strack_pool): dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) return matches, u_detection, u_track
def update_tracking(self, img, img0): """ Update tracking result of the frame :param img: :param img0: :return: """ # update frame id self.frame_id += 1 # record tracking states activated_starcks_dict = defaultdict(list) refind_stracks_dict = defaultdict(list) lost_stracks_dict = defaultdict(list) removed_stracks_dict = defaultdict(list) output_stracks_dict = defaultdict(list) # ----- do detection and reid feature extraction # only get aggregated result, not original YOLO output with torch.no_grad(): pred, pred_orig, reid_feat_map = self.model.forward( img, augment=self.opt.augment) pred = pred.float() # L2 normalize feature map reid_feat_map = F.normalize(reid_feat_map, dim=1) b, reid_dim, h_id_map, w_id_map = reid_feat_map.shape assert b == 1 # make sure batch size is 1 # apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, merge=False, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) dets = pred[0] # assume batch_size == 1 here # get reid feature for each object class if dets is None: print('[Warning]: no objects detected.') return None # Get reid feature vector for each detection b, c, h, w = img.shape # net input img size id_vects_dict = defaultdict(list) for det in dets: x1, y1, x2, y2, conf, cls_id = det # map center point from net scale to feature map scale(1/4 of net input size) center_x = (x1 + x2) * 0.5 center_y = (y1 + y2) * 0.5 center_x *= float(w_id_map) / float(w) center_y *= float(h_id_map) / float(h) # convert to int64 for indexing center_x += 0.5 # round center_y += 0.5 center_x = center_x.long() center_y = center_y.long() center_x.clamp_(0, w_id_map - 1) # avoid out of reid feature map's range center_y.clamp_(0, h_id_map - 1) id_feat_vect = reid_feat_map[0, :, center_y, center_x] id_feat_vect = id_feat_vect.squeeze() id_feat_vect = id_feat_vect.cpu().numpy() id_vects_dict[int(cls_id)].append( id_feat_vect) # put feat vect to dict(key: cls_id) # Rescale boxes from img_size to img0 size(from net input size to original size) dets[:, :4] = scale_coords(img.shape[2:], dets[:, :4], img0.shape).round() # Process each object class for cls_id in range(self.opt.num_classes): cls_inds = torch.where(dets[:, -1] == cls_id) cls_dets = dets[cls_inds] # n_objs × 6 cls_id_feature = id_vects_dict[cls_id] # n_objs × 128 cls_dets = cls_dets.detach().cpu().numpy() cls_id_feature = np.array(cls_id_feature) if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature) ] else: cls_detections = [] # reset the track ids for a different object class: for the first frame if self.frame_id == 0: for track in cls_detections: track.reset_track_id() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed_dict = defaultdict(list) tracked_stracks_dict = defaultdict(list) for track in self.tracked_stracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) else: tracked_stracks_dict[cls_id].append(track) ''' Step 2: First association, with embedding''' strack_pool_dict = defaultdict(list) strack_pool_dict[cls_id] = joint_stracks( tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # Predict the current location with KF # for strack in strack_pool: STrack.multi_predict(strack_pool_dict[cls_id]) dists = matching.embedding_distance(strack_pool_dict[cls_id], cls_detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool_dict[cls_id], cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: track = strack_pool_dict[cls_id][i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( track) # for multi-class else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' cls_detections = [cls_detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool_dict[cls_id][i] for i in u_track if strack_pool_dict[cls_id][i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: track = r_tracked_stracks[i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detections = [cls_detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update( cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_stracks_dict[cls_id].append(track) """ Step 4: Init new stracks""" for i_new in u_detection: track = cls_detections[i_new] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks_dict[cls_id].append(track) """ Step 5: Update state""" for track in self.lost_stracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks_dict[cls_id].append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_dict[cls_id] = [ t for t in self.tracked_stracks_dict[cls_id] if t.state == TrackState.Tracked ] self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], activated_starcks_dict[cls_id]) self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], refind_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.tracked_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id].extend(lost_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.removed_stracks_dict[cls_id]) self.removed_stracks_dict[cls_id].extend( removed_stracks_dict[cls_id]) self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[ cls_id] = remove_duplicate_stracks( self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # get scores of lost tracks output_stracks_dict[cls_id] = [ track for track in self.tracked_stracks_dict[cls_id] if track.is_activated ] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format( # [track.track_id for track in activated_starcks_dict[cls_id]])) # logger.debug('Refind: {}'.format( # [track.track_id for track in refind_stracks_dict[cls_id]])) # logger.debug('Lost: {}'.format( # [track.track_id for track in lost_stracks_dict[cls_id]])) # logger.debug('Removed: {}'.format( # [track.track_id for track in removed_stracks_dict[cls_id]])) return output_stracks_dict
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标和tracked_tracker之间的cosine距离 STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections) # 利用卡尔曼计算detection和pool_stacker直接的距离代价 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # 匈牙利匹配 // 将跟踪框和检测框进行匹配 // u_track是未匹配的tracker的索引, for itracked, idet in matches: # matches:63*2 , 63:detections的维度,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update( det, self.frame_id) # 匹配的pool_tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU''' """ 在余弦距离未匹配的detection和tracker重新用iou进行匹配 """ detections = [detections[i] for i in u_detection] # u_detection是未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate( det, self.frame_id, new_id=False) # 前面已经限定了是TrackState.Tracked,这里是不用运行到的。 refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和tracked_tracker iou未匹配的tracker的状态改为lost temp = 1 '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection ] # 将cosine/iou未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化一个unconfimed_tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失15帧之后 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update_tracking(self, img, img0): """ Update tracking result of the frame :param img: :param img0: :return: """ # update frame id self.frame_id += 1 # record tracking states activated_tracks_dict = defaultdict(list) refined_tracks_dict = defaultdict(list) lost_tracks_dict = defaultdict(list) removed_tracks_dict = defaultdict(list) output_tracks_dict = defaultdict(list) # ----- do detection and reid feature extraction # only get aggregated result, not original YOLO output with torch.no_grad(): # t1 = torch_utils.time_synchronized() pred, pred_orig, reid_feat_out, yolo_ids = self.model.forward(img, augment=self.opt.augment) pred = pred.float() # L2 normalize feature map reid_feat_out[0] = F.normalize(reid_feat_out[0], dim=1) # apply NMS pred, pred_yolo_ids = non_max_suppression_with_yolo_inds(pred, yolo_ids, self.opt.conf_thres, self.opt.iou_thres, merge=False, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) dets = pred[0] # assume batch_size == 1 here dets_yolo_ids = pred_yolo_ids[0].squeeze() # t2 = torch_utils.time_synchronized() # print('run time (%.3fs)' % (t2 - t1)) # get reid feature for each object class if dets is None: print('[Warning]: no objects detected.') return None # Get reid feature vector for each detection b, c, h, w = img.shape # net input img size id_vects_dict = defaultdict(list) for det, yolo_id in zip(dets, dets_yolo_ids): x1, y1, x2, y2, conf, cls_id = det # print('box area {:.3f}, yolo {:d}'.format((y2-y1) * (x2-x1), int(yolo_id))) # get reid map for this bbox(corresponding yolo idx) reid_feat_map = reid_feat_out[yolo_id] b, reid_dim, h_id_map, w_id_map = reid_feat_map.shape assert b == 1 # make sure batch size is 1 # map center point from net scale to feature map scale(1/4 of net input size) center_x = (x1 + x2) * 0.5 center_y = (y1 + y2) * 0.5 center_x *= float(w_id_map) / float(w) center_y *= float(h_id_map) / float(h) # convert to int64 for indexing center_x += 0.5 # round center_y += 0.5 center_x = center_x.long() center_y = center_y.long() center_x.clamp_(0, w_id_map - 1) # avoid out of reid feature map's range center_y.clamp_(0, h_id_map - 1) id_feat_vect = reid_feat_map[0, :, center_y, center_x] id_feat_vect = id_feat_vect.squeeze() # L2 normalize the feature vector id_feat_vect = F.normalize(id_feat_vect, dim=0) id_feat_vect = id_feat_vect.cpu().numpy() id_vects_dict[int(cls_id)].append(id_feat_vect) # put feat vect to dict(key: cls_id) # Rescale boxes from img_size to img0 size(from net input size to original size) dets[:, :4] = scale_coords(img.shape[2:], dets[:, :4], img0.shape).round() # Process each object class for cls_id in range(self.opt.num_classes): cls_inds = torch.where(dets[:, -1] == cls_id) cls_dets = dets[cls_inds] # n_objs × 6 cls_id_feature = id_vects_dict[cls_id] # n_objs × 128 cls_dets = cls_dets.detach().cpu().numpy() cls_id_feature = np.array(cls_id_feature) if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detections = [Track(Track.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature)] # detection of current frame else: cls_detections = [] # reset the track ids for a different object class in the first frame if self.frame_id == 1: for track in cls_detections: track.reset_track_id() ''' Add newly detected tracks(current frame) to tracked_tracks''' unconfirmed_dict = defaultdict(list) tracked_tracks_dict = defaultdict(list) for track in self.tracked_tracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) # record unconfirmed tracks in this frame else: tracked_tracks_dict[cls_id].append(track) # record tracked tracks of this frame ''' Step 2: First association, with embedding''' # build track pool for the current frame by joining tracked_tracks and lost tracks track_pool_dict = defaultdict(list) track_pool_dict[cls_id] = join_tracks(tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # Predict the current location with KF # for track in track_pool: # kalman predict for track_pool Track.multi_predict(track_pool_dict[cls_id]) dists = matching.embedding_distance(track_pool_dict[cls_id], cls_detections) dists = matching.fuse_motion(self.kalman_filter, dists, track_pool_dict[cls_id], cls_detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: # process matched pairs between track pool and current frame detection track = track_pool_dict[cls_id][i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(cls_detections[i_det], self.frame_id) activated_tracks_dict[cls_id].append(track) # for multi-class else: # re-activate the lost track track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' # match between track pool and unmatched detection in current frame cls_detections = [cls_detections[i] for i in u_detection] # get un-matched detections for following iou matching r_tracked_tracks = [track_pool_dict[cls_id][i] for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_tracks, cls_detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: # process matched tracks track = r_tracked_tracks[i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) for it in u_track: # process unmatched tracks for two rounds track = r_tracked_tracks[it] if not track.state == TrackState.Lost: track.mark_lost() # mark unmatched track as lost track lost_tracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detections = [cls_detections[i] for i in u_detection] # current frame's unmatched detection dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detections) # iou matching matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update(cls_detections[i_det], self.frame_id) activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_tracks_dict[cls_id].append(track) """ Step 4: Init new tracks""" for i_new in u_detection: # current frame's unmatched detection track = cls_detections[i_new] if track.score < self.det_thresh: continue # tracked but not activated track.activate(self.kalman_filter, self.frame_id) # Note: activate do not set 'is_activated' to be True # activated_tarcks_dict may contain track with 'is_activated' False activated_tracks_dict[cls_id].append(track) """ Step 5: Update state""" # update removed tracks for track in self.lost_tracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_tracks_dict[cls_id].append(track) # print('Remained match {} s'.format(t4-t3)) self.tracked_tracks_dict[cls_id] = [t for t in self.tracked_tracks_dict[cls_id] if t.state == TrackState.Tracked] self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id]) # add activated track self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id]) # add refined track self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id]) # update lost tracks self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id]) self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id]) self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id]) self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id] = remove_duplicate_tracks( self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # get scores of lost tracks output_tracks_dict[cls_id] = [track for track in self.tracked_tracks_dict[cls_id] if track.is_activated] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format( # [track.track_id for track in activated_tracks_dict[cls_id]])) # logger.debug('Refined: {}'.format( # [track.track_id for track in refined_tracks_dict[cls_id]])) # logger.debug('Lost: {}'.format( # [track.track_id for track in lost_tracks_dict[cls_id]])) # logger.debug('Removed: {}'.format( # [track.track_id for track in removed_tracks_dict[cls_id]])) return output_tracks_dict
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' ##Join track ids into one strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() ##Calculate joint average mean, dev for kalman tracker STrack.multi_predict(strack_pool) #Gets cost matrix between tracks and dets dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) #If tracks with their assignment are too far away from the kalman filter prediction then assign infinite cose #Update cost matrix with kalman filter dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) #Find optimum assignment using cost matrix #u_track and u_detecion are the unmatched tracks and detections respectively matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) #Update currently tracked tracks with matches found for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] #Get tracked tracks which were not matched before which were tracked r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] #Get cost matrix dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.6) #Default 0.5 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) #For all of the unmatched tracks, mark them as lost for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' #For the unconfirmed tracks, tracks with only one beginning frame, use the remaining detection to try to pair them detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) #Add the matched ones for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) #For the ones that couldn't be matched, remove them for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: # If dissappeared for max_time_lost then remove if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0, output): time_clean() time_sync('prepare') self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' time_sync('prepare') with torch.no_grad(): hm = output['hm'].sigmoid_() wh = output['wh'] rois = output['rois'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None time_sync('decode') dets, inds = mot_decode2(hm, rois, wh, output['rpn_map'].shape[-2:], reg=reg, ltrb=self.opt.ltrb, K=self.opt.K, thr=self.opt.nms_thres) time_sync('decode') id_feature = id_feature[inds] # id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() time_sync('post') dets = dets.unsqueeze(0) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] self.raw_dets.append(dets) remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] time_sync('post') # print(id_feature.shape) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' # img_to_draw = img0.copy() # for i in range(0, dets.shape[0]): # bbox = dets[i][0:4] # cv2.rectangle(img_to_draw, (int(bbox[0]), int(bbox[1])), # (int(bbox[2]), int(bbox[3])), # (0, 255, 0), 2) # cv2.putText(img_to_draw, '%.2f'%dets[i][4], (int(bbox[0]), int(bbox[1]) - 2), cv2.FONT_HERSHEY_COMPLEX, 1, (10, 200, 200), thickness=2) # cv2.imwrite('./test.jpg', img_to_draw) # input() if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # print(dists.min(axis=1)) # print(dists) # input() matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0, p_crops, p_crops_lengths, edge_index, gnn_output_layer=-1, p_imgs=None, conf_thres=0.3): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob, p_crops, p_crops_lengths, edge_index, p_imgs=p_imgs)[gnn_output_layer] if type(output) is list: output = output[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() if self.viz_attention and self.frame_id == self.opt.vis_attn_frame: # vis attention attn = output['p'] node0_neighbor_idx = output['node0_neighbor_idx'] keep = torch.where(attn > self.opt.vis_attn_thres)[0] self.visualize_centers(im_blob, keep, node0_neighbor_idx, attn, output, p_imgs) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # remain_inds = dets[:, 4] > self.opt.conf_thres remain_inds = dets[:, 4] > conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] # if track.score < self.det_thresh: if track.score < conf_thres: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2.0, height / 2.0], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { "c": c, "s": s, "out_height": inp_height // self.opt.down_ratio, "out_width": inp_width // self.opt.down_ratio, } """ Step 1: Network forward, get detections & embeddings""" with torch.no_grad(): output = self.model(im_blob)[-1] hm = output["hm"].sigmoid_() wh = output["wh"] id_feature = output["id"] id_feature = F.normalize(id_feature, dim=1) reg = output["reg"] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis """ for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 """ if len(dets) > 0: """Detections""" detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] """ Add newly detected tracklets to tracked_stracks""" unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) """ Step 2: First association, with embedding""" strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF # for strack in strack_pool: # strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) """ Step 3: Second association, with IOU""" detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) """Deal with unconfirmed tracks, usually tracks with only one beginning frame""" detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug("===========Frame {}==========".format(self.frame_id)) logger.debug("Activated: {}".format( [track.track_id for track in activated_starcks])) logger.debug("Refind: {}".format( [track.track_id for track in refind_stracks])) logger.debug("Lost: {}".format( [track.track_id for track in lost_stracks])) logger.debug("Removed: {}".format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): # 处理当前帧中的检测框 self.frame_id += 1 activated_starcks = [] refind_stracks = [] # 从上一帧到当前帧,新发现的track lost_stracks = [] # 从上一帧到当前帧,丢失的stack removed_stracks = [] # 从上一帧到当前帧,需要被移除的stack width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) # torch.Size([1, 512, 152, 272]) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 预测框左上角、右下角的坐标表示、得分、分类,inds是图像在一维情况下的索引 # inds 是在图像转换成一维情况下,置信度得分最大的128个值,表示最大输出目标的数量 id_feature = _tranpose_and_gather_feat(id_feature, inds) # id_feature torch.Size([1, 512, 152, 272]), inds torch.Size([1, 128]) id_feature = id_feature.squeeze(0) # torch.Size([1, 128, 512]) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) # 是将在feature上的预测结果,映射到原始图像中,给出在原始图像中128个检测框的坐标、及相应置信度 dets = self.merge_outputs([dets])[1] # (128, 5) remain_inds = dets[:, 4] > self.opt.conf_thres # 仅保留置信度得分 大于 设置阈值的检测框 dets = dets[remain_inds] # (2, 5),只剩下两个检测框作为最终的结果 id_feature = id_feature[remain_inds] # (2, 512),对应的feature # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for # 直接调用类的方法进行计算,有什么特别的么? (tlbrs, f) in zip(dets[:, :5], id_feature)] # 创建strack,这里相当于tracklets else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: # 将当前帧之前存在的track,划分为unconfirmed、track_stracks两种类型 if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # 取并集 # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) # 使用卡尔曼滤波预测下一帧中目标的状态,调用每一个track的predict方法进行预测 dists = matching.embedding_distance(strack_pool, detections) # 使用embedding进行匹配,返回匹配矩阵,将detection与当前存在的track的smooth feat计算距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # 对每一个track,计算其与当前帧中每一个detection的门距离 matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # 根据门距离,使用匈牙利算法最大匹配,确定三种匹配结果 for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # 上一帧是被追踪状态 track.update(detections[idet], self.frame_id) # track状态更新,其中 KF 的均值向量、协方差矩阵进行更新 activated_starcks.append(track) else: # 上一帧是new状态 track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' # 第二次,尝试将未匹配到的detection和未匹配到的track匹配起来 detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: # 判断track是否 track.mark_lost() lost_stracks.append(track) '''第三次匹配, Deal with unconfirmed tracks, usually tracks with only one beginning frame 仅追踪到一帧的track为unconfirmed track''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: # 与tracking的置信度阈值相比较 continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() # 移除达到条件的track removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] # hm = output['hm'].sigmoid_() # wh = output['wh'] # reg = output['reg'] if self.opt.reg_offset else None # dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) bboxes, scores, clses = self.detector(im_blob) human_inds = np.where(clses == 0)[0] bboxes = bboxes[human_inds] scores = np.expand_dims(scores[human_inds], axis=1) clses = np.expand_dims(clses[human_inds], axis=1) num_boxes = len(bboxes) dets = np.concatenate((bboxes, scores, clses), axis=1) scale = 1.0 / 4 bboxes_roi_align = torch.from_numpy(bboxes.copy()).cuda() bboxes_roi_align *= scale box_indexs = torch.from_numpy(np.arange(num_boxes)).to( torch.int).cuda() if num_boxes > 0: id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) id_feature = id_feature.repeat(num_boxes, 1, 1, 1) id_feature = self.roi_align(id_feature, bboxes_roi_align, box_indexs) id_feature = torch.mean(id_feature, [2, 3]).cpu().numpy() else: id_feature = np.empty((0, 128), dtype=float) # id_feature = _tranpose_and_gather_feat(id_feature, inds) # id_feature = id_feature.squeeze(0) # id_feature = id_feature.cpu().numpy() dets[:, 0] = dets[:, 0] / inp_width * width dets[:, 1] = dets[:, 1] / inp_height * height dets[:, 2] = dets[:, 2] / inp_width * width dets[:, 3] = dets[:, 3] / inp_height * height # dets = self.post_process(dets, meta) # dets = self.merge_outputs([dets])[1] # ipdb.set_trace() # remain_inds = dets[:, 4] > self.opt.conf_thres # dets = dets[remain_inds] # id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] # ipdb.set_trace() cv2.rectangle(img0, pt1=(int(bbox[0]), int(bbox[1])), pt2=(int(bbox[2]), int(bbox[3])), color=(0, 255, 0), thickness=2) cv2.imshow('dets', img0) cv2.waitKey(1) ''' # id0 = id0-1 if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def workOnDetections(opt, pred, results, img0, frame_id, save_dir, show_image, self_dict): output_tracks = [] self_dict['frame_id'] += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), opt.conf_thres, opt.nms_thres)[0] scale_coords(opt.img_size, dets[:, :4], img0.shape).round() dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy() '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets, embs) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self_dict['tracked_stracks']: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self_dict['lost_stracks']) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self_dict['kalman_filter'], dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self_dict['frame_id']) activated_starcks.append(track) else: track.re_activate(det, self_dict['frame_id'], new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self_dict['frame_id']) activated_starcks.append(track) else: track.re_activate(det, self_dict['frame_id'], new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self_dict['frame_id']) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self_dict['det_thresh']: continue track.activate(self_dict['kalman_filter'], self_dict['frame_id']) activated_starcks.append(track) """ Step 5: Update state""" for track in self_dict['lost_stracks']: if self_dict['frame_id'] - track.end_frame > self_dict[ 'max_time_lost']: track.mark_removed() removed_stracks.append(track) self_dict['tracked_stracks'] = [ t for t in self_dict['tracked_stracks'] if t.state == TrackState.Tracked ] self_dict['tracked_stracks'] = joint_stracks( self_dict['tracked_stracks'], activated_starcks) self_dict['tracked_stracks'] = joint_stracks( self_dict['tracked_stracks'], refind_stracks) self_dict['lost_stracks'] = sub_stracks(self_dict['lost_stracks'], self_dict['tracked_stracks']) self_dict['lost_stracks'].extend(lost_stracks) self_dict['lost_stracks'] = sub_stracks(self_dict['lost_stracks'], self_dict['removed_stracks']) self_dict['removed_stracks'].extend(removed_stracks) self_dict['tracked_stracks'], self_dict[ 'lost_stracks'] = remove_duplicate_stracks( self_dict['tracked_stracks'], self_dict['lost_stracks']) # get scores of lost tracks output_stracks = [ track for track in self_dict['tracked_stracks'] if track.is_activated ] logger.debug('===========Frame {}=========='.format( self_dict['frame_id'])) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) online_targets = output_stracks online_tlwhs = [] online_ids = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) results.append((frame_id['0'] + 1, online_tlwhs, online_ids)) if show_image or save_dir is not None: online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id['0']) ## if show_image: cv2.imshow('online_im', online_im) if save_dir is not None: cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id['0'])), online_im) frame_id['0'] += 1 # print("Processed frame: ", str(frame_id['0'])) # print("End of post-processing") return self_dict, frame_id
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() ''' print("==> [multi-tracker.update] dets:", dets) print("==> [multi-tracker.update] dets.size 1:", dets.size()) # [1, 128, 6] ''' dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 128 print("==> [multi-tracker.update] len(dets[0]):", len(dets[0])) # 5 dets: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816] [ 345.91 188.76 369.22 234.64 0.38459] [ 434.66 170.01 448.6 199.26 0.37619] [ 212.57 177.95 231.56 228.84 0.26836] [ 549.7 168.05 560.64 193.19 0.23459] ... ] print("self.opt.conf_thres:", self.opt.conf_thres) # 0.4 ''' remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 6 print("==> [multi-tracker.update] len(id_feature):", len(id_feature)) # 6 print("==> [multi-tracker.update] id_feature[0]:", id_feature.size) # 3072 3072 = 6 * 512 embedding dimension: 512 ''' # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' ''' print("==> [multi-tracker.update] dets[:, :5]:", dets[:, :5]) print("==> [multi-tracker.update] id_feature:", id_feature) print("==> [multi-tracker.update] len(id_feature)", len(id_feature)) ==> [multi-tracker.update] dets[:, :5]: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816]] ==> [multi-tracker.update] id_feature: [[ 0.047802 0.033811 0.0041801 ... -0.018475 -0.014819 0.010965] [ 0.090996 0.015452 0.020774 ... -0.017812 -0.013593 0.016779] [ -0.023971 0.084845 0.10603 ... -0.063187 0.063411 -0.012202] [ 0.050601 0.063119 0.070075 ... -0.063469 0.0026391 0.051197] [ 0.090193 0.036841 0.045577 ... -0.024319 -0.075271 0.017419] [ 0.014926 0.089218 0.07839 ... -0.09095 0.0066383 0.076563]] ==> [multi-tracker.update] len(id_feature) 6 ''' if len(dets) > 0: '''Detections''' # put dets and id_feature to STrack # init new STrack detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) # print("==> [multi-tracker.update] len(output_stracks):", len(output_stracks)) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings 这里首先通过backbone获取到对应的各个head的输出,接着进行后处理及置信度过滤(NMS),将新的目标加入轨迹 ''' with torch.no_grad(): output = self.model(im_blob)[-1] # 检测网络的检测结果 hm = output['hm'].sigmoid_() # 检测网络输出的热力图 wh = output['wh'] # 检测网络输出的目标宽高 id_feature = output['id'] # 检测网络输出的Re-ID特征 id_feature = F.normalize(id_feature, dim=1) reg = output[ 'reg'] if self.opt.reg_offset else None # 检测网络输出的目标中心offset # 检测的det res(bb, score, clses, ID)以及特征得分图的排序的有效index dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 根据 index 选取 有效的Re-ID特征 id_feature = _tranpose_and_gather_feat(id_feature, inds) # 去除那些维度大小为1的维度 id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() # 对检测结果做后处理 dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # 检测置信度阈值过滤,得到有效的目标和对应的Re-ID特征 remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections 对每个检测目标转化为跟踪对象,并绑定检测结果等属性''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding 1. 将[activated_stracks lost_stracks]融合成strack_pool 2. detections和strack_pool根据feats计算外观cost矩阵,就是用feat计算cosine距离 3. 利用卡尔曼算法预测strack_pool的新的mean,covariance、 4. 计算strack_pool和detection的距离cost矩阵,并将大于距离阈值的外观cost矩阵赋值为inf 5. 利用匈牙利算法进行匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: strack_pool中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks strack_pool中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: 得到新的detections,r_tracked_stracks ''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标detections和strack_pool之间的cosine距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections ) # 利用卡尔曼计算strack_pool和detection的距离cost,并将大于距离阈值的外观cost矩阵赋值为inf(距离约束) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7 ) # LAPJV匹配 // 将跟踪框和检测框进行匹配 // matches是匹配对索引,u_track是未匹配的tracker的索引,u_detection是未匹配的检测目标索引 for itracked, idet in matches: # matches:63*2 , 63:匹配成对个数,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) # 匹配的tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU 对余弦距离未匹配剩下的detections,r_tracked_stracks进行IOU匹配 1. detections和r_tracked_stracks计算IOU cost矩阵 2. 针对IOU cost进行匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: r_tracked_stracks中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks r_tracked_stracks中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: r_tracked_stracks中的状态track_state不为lost的,改为lost detections再遗留到下一步进行继续匹配 ''' detections = [detections[i] for i in u_detection] # u_detection是上步未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] # 上步没有匹配的且是跟踪状态的tracker dists = matching.iou_distance(r_tracked_stracks, detections) # 计算IOU cost矩阵 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # 针对IOU cost进行LAPJV匹配 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和r_tracked_stracks iou未匹配的剩下的tracker的状态改为lost ''' Deal with unconfirmed tracks, usually tracks with only one beginning frame 上一步遗留的detection与unconfirmed_stracks进行IOU匹配 1. 计算IOU cost矩阵 2. 匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks b. 未成功匹配: unconfirmed_stracks直接计入removed_stracks 不能匹配的detections,再遗留到下一步 ''' detections = [detections[i] for i in u_detection ] # 将cosine/iou 未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update( detections[idet], self.frame_id ) # 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append( track) # unconfirmed_stracks直接计入removed_stracks """ Step 4: Init new stracks 上一步遗留的detections,初始化成新的tracker,计入activated_stracks """ for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化成一个新的tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失 max_time_lost 帧之后,计入removed_stracks,删除 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update_sep3(self, im_blob, img0, conf_thres=None): self.frame_id += 1 activated_stracks_dict = defaultdict(list) refind_stracks_dict = defaultdict(list) lost_stracks_dict = defaultdict(list) removed_stracks_dict = defaultdict(list) output_stracks_dict = defaultdict(list) width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' cls_id_feats = [] with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds, cls_inds_mask = mot_decode_sp( hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) cls_id_feats = [] for cls_id in range(self.opt.num_classes): # cls_inds = inds[:, cls_inds_mask[cls_id]] cls_inds = inds[:, cls_inds_mask[cls_id].squeeze(0)] cls_id_feature = _tranpose_and_gather_feat( id_feature, cls_inds) # inds: 1×128 cls_id_feature = cls_id_feature.squeeze(0) # n × FeatDim cls_id_feature = cls_id_feature.cpu().numpy() cls_id_feats.append(cls_id_feature) dets = self.post_process(dets, meta) dets_classes = self.merge_outputs([dets]) #[1] # dets = np.concatenate( # [dets[i] for i in range(1, self.opt.num_classes+1)], axis=0) output_stracks = [] start_idx = 0 emb_thresh = [0.9, 1.7, 0.7, 0.7, 0.7, 0.7, 1.0, 1.7] for i_class in range(1, self.opt.num_classes + 1): activated_stracks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] dets = dets_classes[i_class] if conf_thres is None: remain_inds = dets[:, 4] > self.opt.conf_thres else: remain_inds = dets[:, 4] > conf_thres[i_class - 1] dets = dets[remain_inds] id_feature = cls_id_feats[i_class - 1] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30, class_id=i_class - 1) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks_sp[i_class]: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks_sp[i_class]) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) if i_class not in [4, 5, 7]: # print('not truck bus motorcycle') dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=emb_thresh[i_class - 1]) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_stracks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] # print(track.score, self.det_thresh) if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_stracks.append(track) """ Step 5: Update state""" for track in self.lost_stracks_sp[i_class]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_sp[i_class] = [ t for t in self.tracked_stracks_sp[i_class] if t.state == TrackState.Tracked ] self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], activated_stracks) self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], refind_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.tracked_stracks_sp[i_class]) self.lost_stracks_sp[i_class].extend(lost_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.removed_stracks_sp[i_class]) self.removed_stracks_sp[i_class].extend(removed_stracks) self.tracked_stracks_sp[i_class], self.lost_stracks_sp[ i_class] = remove_duplicate_stracks( self.tracked_stracks_sp[i_class], self.lost_stracks_sp[i_class]) # get scores of lost tracks # output_stracks = [track for track in self.tracked_stracks if track.is_activated] output_stracks.extend([ track for track in self.tracked_stracks_sp[i_class] if track.is_activated ]) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] t1 = time.time() ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): pred = self.model(im_blob) pred = pred[pred[:, :, 4] > self.opt.conf_thres] if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for (tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])] else: detections = [] t2 = time.time() # print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF for strack in strack_pool: strack.predict() dists = matching.embedding_distance(strack_pool, detections) dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) t4 = time.time() # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) t5 = time.time() # print('Final {} s'.format(t5-t4)) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): if hasattr(self.model, 'relation'): outputs, stuff = self.model(im_blob) det_heads = set(['wh', 'hm', 'reg']) trk_heads = set(['id']) for head in (set(self.model.backend.heads) & det_heads): outputs[head] = getattr(self.model.backend, head)(outputs['raw']) # for head in (set(self.model.heads) & trk_heads): # outputs[head] = getattr(self.model, head)(outputs['raw_trk']) # del outputs['raw_trk'] del outputs['raw'] output = outputs if hasattr(self.model.relation, 'loss'): cur_feats = stuff[-2] self.model.relation.lock.acquire() self.model.relation.feature_bank.append( cur_feats.detach().cpu()) self.model.relation.lock.release() else: output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] self.inputs_embs.append((dets, id_feature)) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, detection, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] ''' Step 1: Network forward, get detections & embeddings''' dets = [] for x, y, w, h in detection: t, l, b, r = x - w / 2, y - h / 2, x + w / 2, y + h / 2 dets.append([t * width, l * height, b * width, r * height, 1]) dets = np.array(dets) id_feature = [] if self.opt.use_hog_reid: for box in dets[:, :5]: try: x1, y1, x2, y2, conf = max(int(box[0]), 0), max( int(box[1]), 0), min(int(box[2]), width - 1), min(int(box[3]), height - 1), box[4] id_feature.append( self.reid_model.compute( cv2.resize(img0[y1:y2, x1:x2:, ], (self.re_im_w, self.re_im_h)))[:, 0]) except: id_feature.append(np.zeros_like(id_feature[-1])) else: id_feature = np.zeros((len(dets), 1)) warp_mode = cv2.MOTION_TRANSLATION if self.prev_img is not None and self.opt.use_cam_motion == True: warp_matrix = self.get_warp_matrix(self.prev_img, img0.copy(), warp_mode, resize_factor=4) else: warp_matrix = None if self.opt.use_cam_motion: self.prev_img = img0 if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] detections_plot = detections.copy() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool, lost_map_tracks = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool, warp_matrix, warp_mode) if self.opt.use_hog_reid: dists = matching.embedding_distance( strack_pool, detections ) if not self.opt.use_reranking else matching.reranking_embeding_distance( strack_pool, detections) else: dists = np.zeros(shape=(len(strack_pool), len(detections))) if self.opt.use_kalman: dists = matching.fuse_motion(self.opt, self.kalman_filter, dists, strack_pool, detections, lost_map=lost_map_tracks, lambda_=0.99) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) #0.6 for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) else: u_detection = range(len(detections)) u_track = range(len(strack_pool)) r_stracks = strack_pool ''' Step 3: Second association, with IOU''' if self.opt.use_iou: detections = [detections[i] for i in u_detection] if self.opt.use_kalman: r_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] else: r_stracks = [strack_pool[i] for i in u_track] dists = matching.iou_distance(r_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.9) #0.7 for itracked, idet in matches: track = r_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' ''' for it in u_track: track = r_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: cam_veloc_weight = 0.95 if self.opt.use_dynamic_retrack: cam_motion = STrack.get_camera_intension( warp_matrix, warp_mode) track_vtlwh = np.array(STrack.xyah_to_tlwh(track.mean[4:])) track_vtlbr = STrack.tlwh_to_tlbr(track_vtlwh) veloc_motion = np.sqrt(np.sum(track_vtlbr**2)) max_time_lost = self.max_time_lost * 3.2 * np.exp( -(cam_veloc_weight * cam_motion + (1 - cam_veloc_weight) * veloc_motion)) else: max_time_lost = self.max_time_lost if self.frame_id - track.end_frame > max_time_lost: track.mark_removed() removed_stracks.append(track) #Remove out of screen tracklet elif track.tlwh[0] + track.tlwh[2] // 2 > width or track.tlwh[ 1] + track.tlwh[3] // 2 > height: track.num_out_frame += 1 if track.num_out_frame > STrack.out_of_frame_patience: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.removed_stracks.extend(removed_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) #merge track output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] print('===========Frame {}=========='.format(self.frame_id)) print('Activated: {}'.format( [track.track_id for track in activated_starcks])) print('Refind: {}'.format([track.track_id for track in refind_stracks])) print('Lost: {}'.format( [track.track_id for track in self.lost_stracks])) print('Removed: {}'.format( [track.track_id for track in self.removed_stracks])) return output_stracks, detections_plot
def update(self, im_blob, img0): self.frame_id += 1 # print(self.frame_id) activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): # output = self.model(im_blob)[-1] output = self.model(im_blob) postprocess = PostProcess() dets = postprocess(output, img0.shape, 'val') # print(dets) # cv2.imwrite('input3.jpg', img0) # img1 = plot_detections(img0, dets[0]['boxes'].cpu()) # cv2.imwrite('out4.jpg', img1) # hm = output['hm'].sigmoid_() # wh = output['wh'] indices = dets[0]['topk_index'] # print(indices) id_feature = output['id'] # for qeury/ref id_feature=torch.index_select(output['id'], dim=1, index=indices) id_feature = F.normalize(id_feature, dim=1) #for reid # index = indices.reshape(1,len(indices)) # id_feature = _tranpose_and_gather_feat(id_feature, index) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() scores = dets[0]['scores'] bboxes = dets[0]['boxes'] dets = torch.cat([bboxes, scores.resize(scores.shape[0],1)],dim=1).cpu().numpy() # dets = self.post_process(dets, meta)#det格式为[cx,cy,w,h,s] # dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # print(len(id_feature)) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # # 对confirmd tracks进行级联匹配 # matches, u_track, u_detection = \ # matching.matching_cascade( # dists, 0.4, 3, # strack_pool, detections, u_detection) # for itracked, idet in matches: # track = strack_pool[itracked] # det = detections[idet] # if track.state == TrackState.Tracked: # track.update(detections[idet], self.frame_id) # activated_starcks.append(track) # else: # track.re_activate(det, self.frame_id, new_id=False) # refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) # """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) #只留下状态是tracked的stracks self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)#状态是tracked+激活了的stracks self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)#状态是tracked+激活了的stracks+refind self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)#从lost中删除(状态是tracked+激活了的stracks+refind) self.lost_stracks.extend(lost_stracks)#把lost_stracks加入到全局Lost_stracks中 self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)#从全局lost_stracks中删除removed_stracks self.removed_stracks.extend(removed_stracks)#再将removed_stracked加入全局removed中 self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)#按照IOU去除重复的stracks # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] t1 = time.time() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # 包括跟踪到的和丢失的轨迹,不包括的未确认的? # Predict the current location with KF STrack.multi_predict(strack_pool) # kalman滤波估计mean std, multi_mean, multi_covariance = STrack.shared_kalman. print("# strack_pool", len(strack_pool)) sys.stdout.flush() ''' Step 1: Network forward, get detections & embeddings''' self.opt.conf_thres = 0.3 self.opt.nms_thres = 0.8 with torch.no_grad(): pred = self.model(im_blob) # im_blob: torch.Size([1, 3, 480, 864]), pred: torch.Size([1, 34020, 518]) print("# real dets:", len(pred)) sys.stdout.flush() pred = pred[pred[:, :, 4] > self.opt.conf_thres] # 0.5 #TODO, 一般的置信度是多少?还是要删掉置信度太低的 # torch.Size([68, 518]) print("# 1-pass filter dets:", len(pred)) sys.stdout.flush() if len(pred) > 0: # dets = non_max_suppression(pred.unsqueeze(0), 0.3, 0.8)[0] # conf_thres: 0.5->0.3, nms_thres: 0.4->0.8 dets = pred motion_dists = matching.iou_motion(strack_pool, dets) # 已有的轨迹的预测结果叫做strack_pool '''cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance, lambda_=0.98''' # alpha = 2.0 # motion_dists = torch.squeeze(motion_dists, 0) # argument 'input' (position 1) must be Tensor, not numpy.ndarray # print(torch.from_numpy(motion_dists).dtype) # print(dets[:, 4].dtype) # print("motion_dists", motion_dists.shape) # print("dets", dets.shape) # dets[:, 4] = alpha * dets[:, 4] + (1 - alpha) * torch.from_numpy(motion_dists).float().cuda() # dets[:, 4] = alpha * dets[:, 4] + (1 - alpha) * torch.from_numpy(motion_dists).cuda() # dets[:, 4] = dets[:, 4] + alpha * torch.from_numpy(motion_dists).cuda() dets[:, 4] = dets[:, 4] + 2.0 * torch.from_numpy(motion_dists).cuda() dets = non_max_suppression(dets.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0] scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy() '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets, embs)] else: detections = [] '''cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance, lambda_=0.98''' dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # dists = matching.iou_distance(strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' ''' 对于上次没有关联上的量测,以下代码基本没有改动 ''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ] # 如果以前是关联上的,但是今天没有关联 dists = matching.iou_distance(r_tracked_stracks, detections) # dists = matching.embedding_distance(r_tracked_stracks, detections) # dists = matching.fuse_motion(self.kalman_filter, dists, r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, dets, features): self.frame_id += 1 activated_starcks = [ ] # for storing active tracks, for the current frame refind_stracks = [ ] # Lost Tracks whose detections are obtained in the current frame lost_stracks = [ ] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) removed_stracks = [] if len(dets) > 0: detections = [ STrack(STrack.tlbr_to_tlwh(x[:4]), score=x[4], label=x[5], buffer_size=self.buffer_size, feat=feat) for (x, feat) in zip(dets, features) ] else: detections = [] # print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: # previous tracks which are not active in the current frame are added in unconfirmed list unconfirmed.append(track) # print("Should not be here, in unconfirmed") else: # Active tracks are added to the local list 'tracked_stracks' tracked_stracks.append(track) ''' Step 2: First association, with embedding''' # Combining currently tracked_stracks and lost_stracks strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool, self.kalman_filter) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # The dists is the list of distances of the detection with the tracks in strack_pool matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.thresh1) # The matches is the array for corresponding matches of the detection with the corresponding strack_pool for itracked, idet in matches: # itracked is the id of the track and idet is the detection track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # If the track is active, add the detection to the track track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # None of the steps below happen if there are no undetected tracks. ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] # detections is now a list of the unmatched detections r_tracked_stracks = [ ] # This is container for stracks which were tracked till the # previous frame but no detection was found for it in the current frame for i in u_track: if strack_pool[i].state == TrackState.Tracked: r_tracked_stracks.append(strack_pool[i]) dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.thresh2) # matches is the list of detections which matched with corresponding tracks by IOU distance method for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # Same process done for some unmatched detections, but now considering IOU_distance as measure for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=self.thresh3) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) # The tracks which are yet not matched for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) # after all these confirmation steps, if a new detection is found, it is initialized for a new track """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" # If the tracks are lost for more frames than the threshold number, the tracks are removed. for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] # print('Final {} s'.format(t5-t4)) return output_stracks
def update(self, im_blob, img0): """ Processes the image frame and finds bounding box(detections). Associates the detection with corresponding tracklets and also handles lost, removed, refound and active tracklets Parameters ---------- im_blob : torch.float32 Tensor of shape depending upon the size of image. By default, shape of this tensor is [1, 3, 608, 1088] img0 : ndarray ndarray of shape depending on the input image sequence. By default, shape is [608, 1080, 3] Returns ------- output_stracks : list of Strack(instances) The list contains information regarding the online_tracklets for the recieved image tensor. """ self.frame_id += 1 activated_starcks = [ ] # for storing active tracks, for the current frame refind_stracks = [ ] # Lost Tracks whose detections are obtained in the current frame lost_stracks = [ ] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) removed_stracks = [] t1 = time.time() ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): pred = self.model(im_blob) # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings pred = pred[pred[:, :, 4] > self.opt.conf_thres] # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() # Final proposals are obtained in dets. Information of bounding box and embeddings also included # Next step changes the detection scales scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' # class_pred is the embeddings. detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for (tlbrs, f) in zip(dets[:, :5], dets[:, 6:]) ] else: detections = [] t2 = time.time() #print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: # previous tracks which are not active in the current frame are added in unconfirmed list unconfirmed.append(track) # print("Should not be here, in unconfirmed") else: # Active tracks are added to the local list 'tracked_stracks' tracked_stracks.append(track) ''' Step 2: First association, with embedding''' # Combining currently tracked_stracks and lost_stracks # print(tracked_stracks) strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # print(strack_pool) # Predict the current location with KF STrack.multi_predict(strack_pool, self.kalman_filter) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # The dists is the list of distances of the detection with the tracks in strack_pool matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # The matches is the array for corresponding matches of the detection with the corresponding strack_pool for itracked, idet in matches: # itracked is the id of the track and idet is the detection track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # If the track is active, add the detection to the track track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # None of the steps below happen if there are no undetected tracks. ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] # detections is now a list of the unmatched detections r_tracked_stracks = [ ] # This is container for stracks which were tracked till the # previous frame but no detection was found for it in the current frame for i in u_track: if strack_pool[i].state == TrackState.Tracked: r_tracked_stracks.append(strack_pool[i]) dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) # matches is the list of detections which matched with corresponding tracks by IOU distance method for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # Same process done for some unmatched detections, but now considering IOU_distance as measure for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) # The tracks which are yet not matched for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) # after all these confirmation steps, if a new detection is found, it is initialized for a new track """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" # If the tracks are lost for more frames than the threshold number, the tracks are removed. for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] print(self.tracked_stracks) self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] print(self.frame_id) # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) # print('Final {} s'.format(t5-t4)) return output_stracks
def _update(self, detections): activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: if not self.merge_tracks(track): track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks, activated_starcks, self.lost_stracks